1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "d3d12_context.h" 25#include "d3d12_format.h" 26#include "d3d12_resource.h" 27#include "d3d12_screen.h" 28#include "d3d12_surface.h" 29#include "d3d12_video_dec.h" 30#include "d3d12_video_dec_h264.h" 31#include "d3d12_video_buffer.h" 32#include "d3d12_residency.h" 33 34#include "vl/vl_video_buffer.h" 35#include "util/format/u_format.h" 36#include "util/u_inlines.h" 37#include "util/u_memory.h" 38#include "util/u_video.h" 39#include "util/vl_vlc.h" 40 41struct pipe_video_codec * 42d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec) 43{ 44 /// 45 /// Initialize d3d12_video_decoder 46 /// 47 48 49 // Not using new doesn't call ctor and the initializations in the class declaration are lost 50 struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder; 51 52 pD3D12Dec->base = *codec; 53 pD3D12Dec->m_screen = context->screen; 54 55 pD3D12Dec->base.context = context; 56 pD3D12Dec->base.width = codec->width; 57 pD3D12Dec->base.height = codec->height; 58 // Only fill methods that are supported by the d3d12 decoder, leaving null the rest (ie. encode_* / decode_macroblock 59 // / get_feedback for encode) 60 pD3D12Dec->base.destroy = d3d12_video_decoder_destroy; 61 pD3D12Dec->base.begin_frame = d3d12_video_decoder_begin_frame; 62 pD3D12Dec->base.decode_bitstream = d3d12_video_decoder_decode_bitstream; 63 pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame; 64 pD3D12Dec->base.flush = d3d12_video_decoder_flush; 65 66 pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile); 67 pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile); 68 pD3D12Dec->m_d3d12DecProfile = d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(codec->profile); 69 70 /// 71 /// Try initializing D3D12 Video device and check for device caps 72 /// 73 74 struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context; 75 pD3D12Dec->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen); 76 77 /// 78 /// Create decode objects 79 /// 80 HRESULT hr = S_OK; 81 if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface( 82 IID_PPV_ARGS(pD3D12Dec->m_spD3D12VideoDevice.GetAddressOf())))) { 83 debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - D3D12 Device has no Video support\n"); 84 goto failed; 85 } 86 87 if (!d3d12_video_decoder_check_caps_and_create_decoder(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) { 88 debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on " 89 "d3d12_video_decoder_check_caps_and_create_decoder\n"); 90 goto failed; 91 } 92 93 if (!d3d12_video_decoder_create_command_objects(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) { 94 debug_printf( 95 "[d3d12_video_decoder] d3d12_video_create_decoder - Failure on d3d12_video_decoder_create_command_objects\n"); 96 goto failed; 97 } 98 99 if (!d3d12_video_decoder_create_video_state_buffers(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) { 100 debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on " 101 "d3d12_video_decoder_create_video_state_buffers\n"); 102 goto failed; 103 } 104 105 pD3D12Dec->m_decodeFormatInfo = { pD3D12Dec->m_decodeFormat }; 106 hr = pD3D12Dec->m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, 107 &pD3D12Dec->m_decodeFormatInfo, 108 sizeof(pD3D12Dec->m_decodeFormatInfo)); 109 if(FAILED(hr)) { 110 debug_printf("CheckFeatureSupport failed with HR %x\n", hr); 111 goto failed; 112 } 113 114 return &pD3D12Dec->base; 115 116failed: 117 if (pD3D12Dec != nullptr) { 118 d3d12_video_decoder_destroy((struct pipe_video_codec *) pD3D12Dec); 119 } 120 121 return nullptr; 122} 123 124/** 125 * Destroys a d3d12_video_decoder 126 * Call destroy_XX for applicable XX nested member types before deallocating 127 * Destroy methods should check != nullptr on their input target argument as this method can be called as part of 128 * cleanup from failure on the creation method 129 */ 130void 131d3d12_video_decoder_destroy(struct pipe_video_codec *codec) 132{ 133 if (codec == nullptr) { 134 return; 135 } 136 137 d3d12_video_decoder_flush(codec); // Flush pending work before destroying. 138 139 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; 140 141 // 142 // Destroys a decoder 143 // Call destroy_XX for applicable XX nested member types before deallocating 144 // Destroy methods should check != nullptr on their input target argument as this method can be called as part of 145 // cleanup from failure on the creation method 146 // 147 148 // No need for d3d12_destroy_video_objects 149 // All the objects created here are smart pointer members of d3d12_video_decoder 150 // No need for d3d12_destroy_video_decoder_and_heap 151 // All the objects created here are smart pointer members of d3d12_video_decoder 152 // No need for d3d12_destroy_video_dpbmanagers 153 // All the objects created here are smart pointer members of d3d12_video_decoder 154 155 // No need for m_pD3D12Screen as it is not managed by d3d12_video_decoder 156 157 // Call dtor to make ComPtr work 158 delete pD3D12Dec; 159} 160 161/** 162 * start decoding of a new frame 163 */ 164void 165d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec, 166 struct pipe_video_buffer *target, 167 struct pipe_picture_desc *picture) 168{ 169 // Do nothing here. Initialize happens on decoder creation, re-config (if any) happens in 170 // d3d12_video_decoder_decode_bitstream 171 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; 172 assert(pD3D12Dec); 173 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n", 174 pD3D12Dec->m_fenceValue); 175} 176 177/** 178 * decode a bitstream 179 */ 180void 181d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec, 182 struct pipe_video_buffer *target, 183 struct pipe_picture_desc *picture, 184 unsigned num_buffers, 185 const void *const *buffers, 186 const unsigned *sizes) 187{ 188 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; 189 assert(pD3D12Dec); 190 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream started for fenceValue: %d\n", 191 pD3D12Dec->m_fenceValue); 192 assert(pD3D12Dec->m_spD3D12VideoDevice); 193 assert(pD3D12Dec->m_spDecodeCommandQueue); 194 assert(pD3D12Dec->m_pD3D12Screen); 195 struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target; 196 assert(pD3D12VideoBuffer); 197 198 /// 199 /// Compressed bitstream buffers 200 /// 201 202 /// Mesa VA frontend Video buffer passing semantics for H264, HEVC, MPEG4, VC1 and PIPE_VIDEO_PROFILE_VC1_ADVANCED 203 /// are: If num_buffers == 1 -> buf[0] has the compressed bitstream WITH the starting code If num_buffers == 2 -> 204 /// buf[0] has the NALU starting code and buf[1] has the compressed bitstream WITHOUT any starting code. If 205 /// num_buffers = 3 -> It's JPEG, not supported in D3D12. num_buffers is at most 3. 206 /// Mesa VDPAU frontend passes the buffers as they get passed in VdpDecoderRender without fixing any start codes 207 /// except for PIPE_VIDEO_PROFILE_VC1_ADVANCED 208 // In https://http.download.nvidia.com/XFree86/vdpau/doxygen/html/index.html#video_mixer_usage it's mentioned that: 209 // It is recommended that applications pass solely the slice data to VDPAU; specifically that any header data 210 // structures be excluded from the portion of the bitstream passed to VDPAU. VDPAU implementations must operate 211 // correctly if non-slice data is included, at least for formats employing start codes to delimit slice data. For all 212 // codecs/profiles it's highly recommended (when the codec/profile has such codes...) that the start codes are passed 213 // to VDPAU, even when not included in the bitstream the VDPAU client is parsing. Let's assume we get all the start 214 // codes for VDPAU. The doc also says "VDPAU implementations must operate correctly if non-slice data is included, at 215 // least for formats employing start codes to delimit slice data" if we ever get an issue with VDPAU start codes we 216 // should consider adding the code that handles this in the VDPAU layer above the gallium driver like mesa VA does. 217 218 // To handle the multi-slice case end_frame already takes care of this by parsing the start codes from the 219 // combined bitstream of all decode_bitstream calls. 220 221 // VAAPI seems to send one decode_bitstream command per slice, but we should also support the VDPAU case where the 222 // buffers have multiple buffer array entry per slice {startCode (optional), slice1, slice2, ..., startCode 223 // (optional) , sliceN} 224 225 if (num_buffers > 2) // Assume this means multiple slices at once in a decode_bitstream call 226 { 227 // Based on VA frontend codebase, this never happens for video (no JPEG) 228 // Based on VDPAU frontends codebase, this only happens when sending more than one slice at once in decode bitstream 229 230 // To handle the case where VDPAU send all the slices at once in a single decode_bitstream call, let's pretend it 231 // was a series of different calls 232 233 // group by start codes and buffers and perform calls for the number of slices 234 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream multiple slices on same call detected " 235 "for fenceValue: %d, breaking down the calls into one per slice\n", 236 pD3D12Dec->m_fenceValue); 237 238 size_t curBufferIdx = 0; 239 240 // Vars to be used for the delegation calls to decode_bitstream 241 unsigned call_num_buffers = 0; 242 const void *const *call_buffers = nullptr; 243 const unsigned *call_sizes = nullptr; 244 245 while (curBufferIdx < num_buffers) { 246 // Store the current buffer as the base array pointer for the delegated call, later decide if it'll be a 247 // startcode+slicedata or just slicedata call 248 call_buffers = &buffers[curBufferIdx]; 249 call_sizes = &sizes[curBufferIdx]; 250 251 // Usually start codes are less or equal than 4 bytes 252 // If the current buffer is a start code buffer, send it along with the next buffer. Otherwise, just send the 253 // current buffer. 254 call_num_buffers = (sizes[curBufferIdx] <= 4) ? 2 : 1; 255 256 // Delegate call with one or two buffers only 257 d3d12_video_decoder_decode_bitstream(codec, target, picture, call_num_buffers, call_buffers, call_sizes); 258 259 curBufferIdx += call_num_buffers; // Consume from the loop the buffers sent in the last call 260 } 261 } else { 262 /// 263 /// Handle single slice buffer path, maybe with an extra start code buffer at buffers[0]. 264 /// 265 266 // Both the start codes being present at buffers[0] and the rest in buffers [1] or full buffer at [0] cases can be 267 // handled by flattening all the buffers into a single one and passing that to HW. 268 269 size_t totalReceivedBuffersSize = 0u; // Combined size of all sizes[] 270 for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) { 271 totalReceivedBuffersSize += sizes[bufferIdx]; 272 } 273 274 // Bytes of data pre-staged before this decode_frame call 275 size_t preStagedDataSize = pD3D12Dec->m_stagingDecodeBitstream.size(); 276 277 // Extend the staging buffer size, as decode_frame can be called several times before end_frame 278 pD3D12Dec->m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize); 279 280 // Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new 281 // buffers will be appended 282 uint8_t *newSliceDataPositionDstBase = pD3D12Dec->m_stagingDecodeBitstream.data() + preStagedDataSize; 283 284 // Append new data at the end. 285 size_t dstOffset = 0u; 286 for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) { 287 memcpy(newSliceDataPositionDstBase + dstOffset, buffers[bufferIdx], sizes[bufferIdx]); 288 dstOffset += sizes[bufferIdx]; 289 } 290 291 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream finalized for fenceValue: %d\n", 292 pD3D12Dec->m_fenceValue); 293 } 294} 295 296void 297d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec, 298 struct pipe_video_buffer *target, 299 struct pipe_picture_desc *picture) 300{ 301 switch (pD3D12Dec->m_d3d12DecProfileType) { 302 case d3d12_video_decode_profile_type_h264: 303 { 304 pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture; 305 pD3D12Dec->m_pCurrentDecodeTarget = target; 306 pD3D12Dec->m_pCurrentReferenceTargets = pPicControlH264->ref; 307 } break; 308 309 default: 310 { 311 unreachable("Unsupported d3d12_video_decode_profile_type"); 312 } break; 313 } 314} 315 316/** 317 * end decoding of the current frame 318 */ 319void 320d3d12_video_decoder_end_frame(struct pipe_video_codec *codec, 321 struct pipe_video_buffer *target, 322 struct pipe_picture_desc *picture) 323{ 324 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; 325 assert(pD3D12Dec); 326 struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen; 327 assert(pD3D12Screen); 328 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame started for fenceValue: %d\n", 329 pD3D12Dec->m_fenceValue); 330 assert(pD3D12Dec->m_spD3D12VideoDevice); 331 assert(pD3D12Dec->m_spDecodeCommandQueue); 332 struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target; 333 assert(pD3D12VideoBuffer); 334 335 /// 336 /// Store current decode output target texture and reference textures from upper layer 337 /// 338 d3d12_video_decoder_store_upper_layer_references(pD3D12Dec, target, picture); 339 340 /// 341 /// Codec header picture parameters buffers 342 /// 343 344 d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer); 345 assert(pD3D12Dec->m_picParamsBuffer.size() > 0); 346 347 /// 348 /// Prepare Slice control buffers before clearing staging buffer 349 /// 350 assert(pD3D12Dec->m_stagingDecodeBitstream.size() > 0); // Make sure the staging wasn't cleared yet in end_frame 351 d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture); 352 assert(pD3D12Dec->m_SliceControlBuffer.size() > 0); 353 354 /// 355 /// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer 356 /// 357 358 uint64_t sliceDataStagingBufferSize = pD3D12Dec->m_stagingDecodeBitstream.size(); 359 uint8_t *sliceDataStagingBufferPtr = pD3D12Dec->m_stagingDecodeBitstream.data(); 360 361 // Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory 362 if (pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) { 363 if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) { 364 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on " 365 "d3d12_video_decoder_create_staging_bitstream_buffer\n"); 366 debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n", 367 pD3D12Dec->m_fenceValue); 368 assert(false); 369 return; 370 } 371 } 372 373 // Upload frame bitstream CPU data to ID3D12Resource buffer 374 pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize = 375 sliceDataStagingBufferSize; // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize. 376 assert(pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize <= 377 pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize); 378 379 /* One-shot transfer operation with data supplied in a user 380 * pointer. 381 */ 382 pipe_resource *pPipeCompressedBufferObj = 383 d3d12_resource_from_resource(&pD3D12Screen->base, pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get()); 384 assert(pPipeCompressedBufferObj); 385 pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context, // context 386 pPipeCompressedBufferObj, // dst buffer 387 PIPE_MAP_WRITE, // usage PIPE_MAP_x 388 0, // offset 389 sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize, // size 390 sliceDataStagingBufferPtr // data 391 ); 392 393 // Flush buffer_subdata batch and wait on this CPU thread for GPU work completion 394 // before deleting the source CPU buffer below 395 struct pipe_fence_handle *pUploadGPUCompletionFence = NULL; 396 pD3D12Dec->base.context->flush(pD3D12Dec->base.context, 397 &pUploadGPUCompletionFence, 398 PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); 399 assert(pUploadGPUCompletionFence); 400 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for " 401 "buffer_subdata to upload compressed bitstream.\n"); 402 pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, pUploadGPUCompletionFence, PIPE_TIMEOUT_INFINITE); 403 pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &pUploadGPUCompletionFence, NULL); 404 405 // [After buffer_subdata GPU work is finished] Clear CPU staging buffer now that end_frame is called and was uploaded 406 // to GPU for DecodeFrame call. 407 pD3D12Dec->m_stagingDecodeBitstream.resize(0); 408 409 /// 410 /// Proceed to record the GPU Decode commands 411 /// 412 413 // Requested conversions by caller upper layer (none for now) 414 d3d12_video_decode_output_conversion_arguments requestedConversionArguments = {}; 415 416 /// 417 /// Record DecodeFrame operation and resource state transitions. 418 /// 419 420 // Translate input D3D12 structure 421 D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {}; 422 423 d3d12InputArguments.CompressedBitstream.pBuffer = pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get(); 424 d3d12InputArguments.CompressedBitstream.Offset = 0u; 425 constexpr uint64_t d3d12BitstreamOffsetAlignment = 426 128u; // specified in 427 // https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier 428 assert((d3d12InputArguments.CompressedBitstream.Offset == 0) || 429 ((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0)); 430 d3d12InputArguments.CompressedBitstream.Size = pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize; 431 432 D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { 433 CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer, 434 D3D12_RESOURCE_STATE_COMMON, 435 D3D12_RESOURCE_STATE_VIDEO_DECODE_READ), 436 }; 437 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode); 438 439 // Schedule reverse (back to common) transitions before command list closes for current frame 440 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back( 441 CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer, 442 D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, 443 D3D12_RESOURCE_STATE_COMMON)); 444 445 /// 446 /// Clear texture (no reference only flags in resource allocation) to use as decode output to send downstream for 447 /// display/consumption 448 /// 449 ID3D12Resource *pOutputD3D12Texture; 450 uint outputD3D12Subresource = 0; 451 452 /// 453 /// Ref Only texture (with reference only flags in resource allocation) to use as reconstructed picture decode output 454 /// and to store as future reference in DPB 455 /// 456 ID3D12Resource *pRefOnlyOutputD3D12Texture; 457 uint refOnlyOutputD3D12Subresource = 0; 458 459 if(!d3d12_video_decoder_prepare_for_decode_frame(pD3D12Dec, 460 target, 461 pD3D12VideoBuffer, 462 &pOutputD3D12Texture, // output 463 &outputD3D12Subresource, // output 464 &pRefOnlyOutputD3D12Texture, // output 465 &refOnlyOutputD3D12Subresource, // output 466 requestedConversionArguments)) { 467 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on " 468 "d3d12_video_decoder_prepare_for_decode_frame\n"); 469 debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n", 470 pD3D12Dec->m_fenceValue); 471 assert(false); 472 return; 473 } 474 475 /// 476 /// Set codec picture parameters CPU buffer 477 /// 478 479 d3d12InputArguments.NumFrameArguments = 480 1u; // Only the codec data received from the above layer with picture params 481 d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { 482 D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS, 483 static_cast<uint32_t>(pD3D12Dec->m_picParamsBuffer.size()), 484 pD3D12Dec->m_picParamsBuffer.data(), 485 }; 486 487 if (pD3D12Dec->m_SliceControlBuffer.size() > 0) { 488 d3d12InputArguments.NumFrameArguments++; 489 d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { 490 D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL, 491 static_cast<uint32_t>(pD3D12Dec->m_SliceControlBuffer.size()), 492 pD3D12Dec->m_SliceControlBuffer.data(), 493 }; 494 } 495 496 if (pD3D12Dec->m_InverseQuantMatrixBuffer.size() > 0) { 497 d3d12InputArguments.NumFrameArguments++; 498 d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { 499 D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX, 500 static_cast<uint32_t>(pD3D12Dec->m_InverseQuantMatrixBuffer.size()), 501 pD3D12Dec->m_InverseQuantMatrixBuffer.data(), 502 }; 503 } 504 505 d3d12InputArguments.ReferenceFrames = pD3D12Dec->m_spDPBManager->get_current_reference_frames(); 506 if (D3D12_DEBUG_VERBOSE & d3d12_debug) { 507 pD3D12Dec->m_spDPBManager->print_dpb(); 508 } 509 510 d3d12InputArguments.pHeap = pD3D12Dec->m_spVideoDecoderHeap.Get(); 511 512 // translate output D3D12 structure 513 D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS1 d3d12OutputArguments = {}; 514 d3d12OutputArguments.pOutputTexture2D = pOutputD3D12Texture; 515 d3d12OutputArguments.OutputSubresource = outputD3D12Subresource; 516 517 bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags & 518 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0; 519 if (fReferenceOnly) { 520 d3d12OutputArguments.ConversionArguments.Enable = TRUE; 521 522 assert(pRefOnlyOutputD3D12Texture); 523 d3d12OutputArguments.ConversionArguments.pReferenceTexture2D = pRefOnlyOutputD3D12Texture; 524 d3d12OutputArguments.ConversionArguments.ReferenceSubresource = refOnlyOutputD3D12Subresource; 525 526 const D3D12_RESOURCE_DESC &descReference = GetDesc(d3d12OutputArguments.ConversionArguments.pReferenceTexture2D); 527 d3d12OutputArguments.ConversionArguments.DecodeColorSpace = d3d12_convert_from_legacy_color_space( 528 !util_format_is_yuv(d3d12_get_pipe_format(descReference.Format)), 529 util_format_get_blocksize(d3d12_get_pipe_format(descReference.Format)) * 8 /*bytes to bits conversion*/, 530 /* StudioRGB= */ false, 531 /* P709= */ true, 532 /* StudioYUV= */ true); 533 534 const D3D12_RESOURCE_DESC &descOutput = GetDesc(d3d12OutputArguments.pOutputTexture2D); 535 d3d12OutputArguments.ConversionArguments.OutputColorSpace = d3d12_convert_from_legacy_color_space( 536 !util_format_is_yuv(d3d12_get_pipe_format(descOutput.Format)), 537 util_format_get_blocksize(d3d12_get_pipe_format(descOutput.Format)) * 8 /*bytes to bits conversion*/, 538 /* StudioRGB= */ false, 539 /* P709= */ true, 540 /* StudioYUV= */ true); 541 542 const D3D12_VIDEO_DECODER_HEAP_DESC &HeapDesc = GetDesc(pD3D12Dec->m_spVideoDecoderHeap.Get()); 543 d3d12OutputArguments.ConversionArguments.OutputWidth = HeapDesc.DecodeWidth; 544 d3d12OutputArguments.ConversionArguments.OutputHeight = HeapDesc.DecodeHeight; 545 } else { 546 d3d12OutputArguments.ConversionArguments.Enable = FALSE; 547 } 548 549 CD3DX12_RESOURCE_DESC outputDesc(GetDesc(d3d12OutputArguments.pOutputTexture2D)); 550 uint32_t MipLevel, PlaneSlice, ArraySlice; 551 D3D12DecomposeSubresource(d3d12OutputArguments.OutputSubresource, 552 outputDesc.MipLevels, 553 outputDesc.ArraySize(), 554 MipLevel, 555 ArraySlice, 556 PlaneSlice); 557 558 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { 559 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); 560 561 D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { 562 CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D, 563 D3D12_RESOURCE_STATE_COMMON, 564 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, 565 planeOutputSubresource), 566 }; 567 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode); 568 } 569 570 // Schedule reverse (back to common) transitions before command list closes for current frame 571 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { 572 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); 573 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back( 574 CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D, 575 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, 576 D3D12_RESOURCE_STATE_COMMON, 577 planeOutputSubresource)); 578 } 579 580 // Record DecodeFrame 581 582 pD3D12Dec->m_spDecodeCommandList->DecodeFrame1(pD3D12Dec->m_spVideoDecoder.Get(), 583 &d3d12OutputArguments, 584 &d3d12InputArguments); 585 586 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n", 587 pD3D12Dec->m_fenceValue); 588 589 /// 590 /// Flush work to the GPU and blocking wait until decode finishes 591 /// 592 pD3D12Dec->m_needsGPUFlush = true; 593 d3d12_video_decoder_flush(codec); 594 595 if (!pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) { 596 /// 597 /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation() 598 /// We cannot use the standalone video buffer allocation directly and we must use instead 599 /// either a ID3D12Resource with DECODE_REFERENCE only flag or a texture array within the same 600 /// allocation 601 /// Do GPU->GPU texture copy from decode output to pipe target decode texture sampler view planes 602 /// 603 604 // Get destination resource 605 struct pipe_sampler_view **pPipeDstViews = target->get_sampler_view_planes(target); 606 607 // Get source pipe_resource 608 pipe_resource *pPipeSrc = 609 d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D); 610 assert(pPipeSrc); 611 612 // Copy all format subresources/texture planes 613 614 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { 615 assert(d3d12OutputArguments.OutputSubresource < INT16_MAX); 616 struct pipe_box box = { 0, 617 0, 618 // src array slice, taken as Z for TEXTURE_2D_ARRAY 619 static_cast<int16_t>(d3d12OutputArguments.OutputSubresource), 620 static_cast<int>(pPipeDstViews[PlaneSlice]->texture->width0), 621 static_cast<int16_t>(pPipeDstViews[PlaneSlice]->texture->height0), 622 1 }; 623 624 pD3D12Dec->base.context->resource_copy_region(pD3D12Dec->base.context, 625 pPipeDstViews[PlaneSlice]->texture, // dst 626 0, // dst level 627 0, // dstX 628 0, // dstY 629 0, // dstZ 630 (PlaneSlice == 0) ? pPipeSrc : pPipeSrc->next, // src 631 0, // src level 632 &box); 633 } 634 // Flush resource_copy_region batch and wait on this CPU thread for GPU work completion 635 struct pipe_fence_handle *completion_fence = NULL; 636 pD3D12Dec->base.context->flush(pD3D12Dec->base.context, 637 &completion_fence, 638 PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); 639 assert(completion_fence); 640 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for " 641 "resource_copy_region on decoded frame.\n"); 642 pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, completion_fence, PIPE_TIMEOUT_INFINITE); 643 pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &completion_fence, NULL); 644 } 645} 646 647/** 648 * flush any outstanding command buffers to the hardware 649 * should be called before a video_buffer is acessed by the gallium frontend again 650 */ 651void 652d3d12_video_decoder_flush(struct pipe_video_codec *codec) 653{ 654 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; 655 assert(pD3D12Dec); 656 assert(pD3D12Dec->m_spD3D12VideoDevice); 657 assert(pD3D12Dec->m_spDecodeCommandQueue); 658 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Will flush video queue work and CPU wait on " 659 "fenceValue: %d\n", 660 pD3D12Dec->m_fenceValue); 661 662 if (!pD3D12Dec->m_needsGPUFlush) { 663 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Nothing to flush, all up to date.\n"); 664 } else { 665 HRESULT hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason(); 666 if (hr != S_OK) { 667 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush" 668 " - D3D12Device was removed BEFORE commandlist " 669 "execution with HR %x.\n", 670 hr); 671 goto flush_fail; 672 } 673 674 // Close and execute command list and wait for idle on CPU blocking 675 // this method before resetting list and allocator for next submission. 676 677 if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) { 678 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(), 679 pD3D12Dec->m_transitionsBeforeCloseCmdList.data()); 680 pD3D12Dec->m_transitionsBeforeCloseCmdList.clear(); 681 } 682 683 hr = pD3D12Dec->m_spDecodeCommandList->Close(); 684 if (FAILED(hr)) { 685 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - Can't close command list with HR %x\n", hr); 686 goto flush_fail; 687 } 688 689 ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() }; 690 pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists); 691 pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue); 692 pD3D12Dec->m_spFence->SetEventOnCompletion(pD3D12Dec->m_fenceValue, nullptr); 693 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - ExecuteCommandLists finished on signal with " 694 "fenceValue: %d\n", 695 pD3D12Dec->m_fenceValue); 696 697 hr = pD3D12Dec->m_spCommandAllocator->Reset(); 698 if (FAILED(hr)) { 699 debug_printf( 700 "[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12CommandAllocator failed with HR %x\n", 701 hr); 702 goto flush_fail; 703 } 704 705 hr = pD3D12Dec->m_spDecodeCommandList->Reset(pD3D12Dec->m_spCommandAllocator.Get()); 706 if (FAILED(hr)) { 707 debug_printf( 708 "[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12GraphicsCommandList failed with HR %x\n", 709 hr); 710 goto flush_fail; 711 } 712 713 // Validate device was not removed 714 hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason(); 715 if (hr != S_OK) { 716 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush" 717 " - D3D12Device was removed AFTER commandlist " 718 "execution with HR %x, but wasn't before.\n", 719 hr); 720 goto flush_fail; 721 } 722 723 debug_printf( 724 "[d3d12_video_decoder] d3d12_video_decoder_flush - GPU signaled execution finalized for fenceValue: %d\n", 725 pD3D12Dec->m_fenceValue); 726 727 pD3D12Dec->m_fenceValue++; 728 pD3D12Dec->m_needsGPUFlush = false; 729 } 730 return; 731 732flush_fail: 733 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush failed for fenceValue: %d\n", pD3D12Dec->m_fenceValue); 734 assert(false); 735} 736 737bool 738d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Screen, 739 struct d3d12_video_decoder *pD3D12Dec) 740{ 741 assert(pD3D12Dec->m_spD3D12VideoDevice); 742 743 D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE }; 744 HRESULT hr = pD3D12Screen->dev->CreateCommandQueue(&commandQueueDesc, 745 IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandQueue.GetAddressOf())); 746 if (FAILED(hr)) { 747 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue " 748 "failed with HR %x\n", 749 hr); 750 return false; 751 } 752 753 hr = pD3D12Screen->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pD3D12Dec->m_spFence)); 754 if (FAILED(hr)) { 755 debug_printf( 756 "[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateFence failed with HR %x\n", 757 hr); 758 return false; 759 } 760 761 hr = pD3D12Screen->dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, 762 IID_PPV_ARGS(pD3D12Dec->m_spCommandAllocator.GetAddressOf())); 763 if (FAILED(hr)) { 764 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to " 765 "CreateCommandAllocator failed with HR %x\n", 766 hr); 767 return false; 768 } 769 770 hr = pD3D12Screen->dev->CreateCommandList(0, 771 D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, 772 pD3D12Dec->m_spCommandAllocator.Get(), 773 nullptr, 774 IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf())); 775 776 if (FAILED(hr)) { 777 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList " 778 "failed with HR %x\n", 779 hr); 780 return false; 781 } 782 783 return true; 784} 785 786bool 787d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen *pD3D12Screen, 788 struct d3d12_video_decoder *pD3D12Dec) 789{ 790 assert(pD3D12Dec->m_spD3D12VideoDevice); 791 792 pD3D12Dec->m_decoderDesc = {}; 793 794 D3D12_VIDEO_DECODE_CONFIGURATION decodeConfiguration = { pD3D12Dec->m_d3d12DecProfile, 795 D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE, 796 D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE }; 797 798 D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {}; 799 decodeSupport.NodeIndex = pD3D12Dec->m_NodeIndex; 800 decodeSupport.Configuration = decodeConfiguration; 801 decodeSupport.Width = pD3D12Dec->base.width; 802 decodeSupport.Height = pD3D12Dec->base.height; 803 decodeSupport.DecodeFormat = pD3D12Dec->m_decodeFormat; 804 // no info from above layer on framerate/bitrate 805 decodeSupport.FrameRate.Numerator = 0; 806 decodeSupport.FrameRate.Denominator = 0; 807 decodeSupport.BitRate = 0; 808 809 HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT, 810 &decodeSupport, 811 sizeof(decodeSupport)); 812 if (FAILED(hr)) { 813 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CheckFeatureSupport " 814 "failed with HR %x\n", 815 hr); 816 return false; 817 } 818 819 if (!(decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) { 820 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - " 821 "D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED was false when checking caps \n"); 822 return false; 823 } 824 825 pD3D12Dec->m_configurationFlags = decodeSupport.ConfigurationFlags; 826 pD3D12Dec->m_tier = decodeSupport.DecodeTier; 827 828 if (d3d12_video_decoder_supports_aot_dpb(decodeSupport, pD3D12Dec->m_d3d12DecProfileType)) { 829 pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_array_of_textures; 830 } 831 832 if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) { 833 pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_alignment_height; 834 } 835 836 if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) { 837 pD3D12Dec->m_ConfigDecoderSpecificFlags |= 838 d3d12_video_decode_config_specific_flag_reference_only_textures_required; 839 } 840 841 pD3D12Dec->m_decoderDesc.NodeMask = pD3D12Dec->m_NodeMask; 842 pD3D12Dec->m_decoderDesc.Configuration = decodeConfiguration; 843 844 hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&pD3D12Dec->m_decoderDesc, 845 IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf())); 846 if (FAILED(hr)) { 847 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CreateVideoDecoder " 848 "failed with HR %x\n", 849 hr); 850 return false; 851 } 852 853 return true; 854} 855 856bool 857d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen *pD3D12Screen, 858 struct d3d12_video_decoder *pD3D12Dec) 859{ 860 assert(pD3D12Dec->m_spD3D12VideoDevice); 861 if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, 862 pD3D12Dec, 863 pD3D12Dec->m_InitialCompBitstreamGPUBufferSize)) { 864 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_video_state_buffers - Failure on " 865 "d3d12_video_decoder_create_staging_bitstream_buffer\n"); 866 return false; 867 } 868 869 return true; 870} 871 872bool 873d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *pD3D12Screen, 874 struct d3d12_video_decoder *pD3D12Dec, 875 uint64_t bufSize) 876{ 877 assert(pD3D12Dec->m_spD3D12VideoDevice); 878 879 if (pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get() != nullptr) { 880 pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Reset(); 881 } 882 883 auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask); 884 auto descResource = CD3DX12_RESOURCE_DESC::Buffer(bufSize); 885 HRESULT hr = pD3D12Screen->dev->CreateCommittedResource( 886 &descHeap, 887 D3D12_HEAP_FLAG_NONE, 888 &descResource, 889 D3D12_RESOURCE_STATE_COMMON, 890 nullptr, 891 IID_PPV_ARGS(pD3D12Dec->m_curFrameCompressedBitstreamBuffer.GetAddressOf())); 892 if (FAILED(hr)) { 893 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - " 894 "CreateCommittedResource failed with HR %x\n", 895 hr); 896 return false; 897 } 898 899 pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize; 900 return true; 901} 902 903bool 904d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec, 905 struct pipe_video_buffer *pCurrentDecodeTarget, 906 struct d3d12_video_buffer *pD3D12VideoBuffer, 907 ID3D12Resource **ppOutTexture2D, 908 uint32_t *pOutSubresourceIndex, 909 ID3D12Resource **ppRefOnlyOutTexture2D, 910 uint32_t *pRefOnlyOutSubresourceIndex, 911 const d3d12_video_decode_output_conversion_arguments &conversionArgs) 912{ 913 if(!d3d12_video_decoder_reconfigure_dpb(pD3D12Dec, pD3D12VideoBuffer, conversionArgs)) { 914 debug_printf("d3d12_video_decoder_reconfigure_dpb failed!\n"); 915 return false; 916 } 917 918 // Refresh DPB active references for current frame, release memory for unused references. 919 d3d12_video_decoder_refresh_dpb_active_references(pD3D12Dec); 920 921 // Get the output texture for the current frame to be decoded 922 pD3D12Dec->m_spDPBManager->get_current_frame_decode_output_texture(pCurrentDecodeTarget, 923 ppOutTexture2D, 924 pOutSubresourceIndex); 925 926 auto vidBuffer = (struct d3d12_video_buffer *)(pCurrentDecodeTarget); 927 // If is_pipe_buffer_underlying_output_decode_allocation is enabled, 928 // we can just use the underlying allocation in pCurrentDecodeTarget 929 // and avoid an extra copy after decoding the frame. 930 // If this is the case, we need to handle the residency of this resource 931 // (if not we're actually creating the resources with CreateCommitedResource with 932 // residency by default) 933 if(pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) { 934 assert(d3d12_resource_resource(vidBuffer->texture) == *ppOutTexture2D); 935 // Make it permanently resident for video use 936 d3d12_promote_to_permanent_residency(pD3D12Dec->m_pD3D12Screen, vidBuffer->texture); 937 } 938 939 // Get the reference only texture for the current frame to be decoded (if applicable) 940 bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags & 941 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0; 942 if (fReferenceOnly) { 943 bool needsTransitionToDecodeWrite = false; 944 pD3D12Dec->m_spDPBManager->get_reference_only_output(pCurrentDecodeTarget, 945 ppRefOnlyOutTexture2D, 946 pRefOnlyOutSubresourceIndex, 947 needsTransitionToDecodeWrite); 948 assert(needsTransitionToDecodeWrite); 949 950 CD3DX12_RESOURCE_DESC outputDesc(GetDesc(*ppRefOnlyOutTexture2D)); 951 uint32_t MipLevel, PlaneSlice, ArraySlice; 952 D3D12DecomposeSubresource(*pRefOnlyOutSubresourceIndex, 953 outputDesc.MipLevels, 954 outputDesc.ArraySize(), 955 MipLevel, 956 ArraySlice, 957 PlaneSlice); 958 959 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { 960 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); 961 962 D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { 963 CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D, 964 D3D12_RESOURCE_STATE_COMMON, 965 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, 966 planeOutputSubresource), 967 }; 968 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode); 969 } 970 971 // Schedule reverse (back to common) transitions before command list closes for current frame 972 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { 973 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); 974 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back( 975 CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D, 976 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, 977 D3D12_RESOURCE_STATE_COMMON, 978 planeOutputSubresource)); 979 } 980 } 981 982 // If decoded needs reference_only entries in the dpb, use the reference_only allocation for current frame 983 // otherwise, use the standard output resource 984 ID3D12Resource *pCurrentFrameDPBEntry = fReferenceOnly ? *ppRefOnlyOutTexture2D : *ppOutTexture2D; 985 uint32_t currentFrameDPBEntrySubresource = fReferenceOnly ? *pRefOnlyOutSubresourceIndex : *pOutSubresourceIndex; 986 987 switch (pD3D12Dec->m_d3d12DecProfileType) { 988 case d3d12_video_decode_profile_type_h264: 989 { 990 d3d12_video_decoder_prepare_current_frame_references_h264(pD3D12Dec, 991 pCurrentFrameDPBEntry, 992 currentFrameDPBEntrySubresource); 993 } break; 994 995 default: 996 { 997 unreachable("Unsupported d3d12_video_decode_profile_type"); 998 } break; 999 } 1000 1001 return true; 1002} 1003 1004bool 1005d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder *pD3D12Dec, 1006 struct d3d12_video_buffer *pD3D12VideoBuffer, 1007 const d3d12_video_decode_output_conversion_arguments &conversionArguments) 1008{ 1009 uint32_t width; 1010 uint32_t height; 1011 uint16_t maxDPB; 1012 bool isInterlaced; 1013 d3d12_video_decoder_get_frame_info(pD3D12Dec, &width, &height, &maxDPB, isInterlaced); 1014 1015 ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture); 1016 D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource); 1017 1018 pD3D12VideoBuffer->base.interlaced = isInterlaced; 1019 D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE interlaceTypeRequested = 1020 isInterlaced ? D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_FIELD_BASED : D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE; 1021 if ((pD3D12Dec->m_decodeFormat != outputResourceDesc.Format) || 1022 (pD3D12Dec->m_decoderDesc.Configuration.InterlaceType != interlaceTypeRequested)) { 1023 // Copy current pD3D12Dec->m_decoderDesc, modify decodeprofile and re-create decoder. 1024 D3D12_VIDEO_DECODER_DESC decoderDesc = pD3D12Dec->m_decoderDesc; 1025 decoderDesc.Configuration.InterlaceType = interlaceTypeRequested; 1026 decoderDesc.Configuration.DecodeProfile = 1027 d3d12_video_decoder_resolve_profile(pD3D12Dec->m_d3d12DecProfileType); 1028 pD3D12Dec->m_spVideoDecoder.Reset(); 1029 HRESULT hr = 1030 pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&decoderDesc, 1031 IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf())); 1032 if (FAILED(hr)) { 1033 debug_printf( 1034 "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoder failed with HR %x\n", 1035 hr); 1036 return false; 1037 } 1038 // Update state after CreateVideoDecoder succeeds only. 1039 pD3D12Dec->m_decoderDesc = decoderDesc; 1040 } 1041 1042 if (!pD3D12Dec->m_spDPBManager || !pD3D12Dec->m_spVideoDecoderHeap || 1043 pD3D12Dec->m_decodeFormat != outputResourceDesc.Format || pD3D12Dec->m_decoderHeapDesc.DecodeWidth != width || 1044 pD3D12Dec->m_decoderHeapDesc.DecodeHeight != height || 1045 pD3D12Dec->m_decoderHeapDesc.MaxDecodePictureBufferCount < maxDPB) { 1046 // Detect the combination of AOT/ReferenceOnly to configure the DPB manager 1047 uint16_t referenceCount = (conversionArguments.Enable) ? (uint16_t) conversionArguments.ReferenceFrameCount + 1048 1 /*extra slot for current picture*/ : 1049 maxDPB; 1050 d3d12_video_decode_dpb_descriptor dpbDesc = {}; 1051 dpbDesc.Width = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Width : width; 1052 dpbDesc.Height = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Height : height; 1053 dpbDesc.Format = 1054 (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Format.Format : outputResourceDesc.Format; 1055 dpbDesc.fArrayOfTexture = 1056 ((pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_array_of_textures) != 0); 1057 dpbDesc.dpbSize = referenceCount; 1058 dpbDesc.m_NodeMask = pD3D12Dec->m_NodeMask; 1059 dpbDesc.fReferenceOnly = ((pD3D12Dec->m_ConfigDecoderSpecificFlags & 1060 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0); 1061 1062 // Create DPB manager 1063 if (pD3D12Dec->m_spDPBManager == nullptr) { 1064 pD3D12Dec->m_spDPBManager.reset(new d3d12_video_decoder_references_manager(pD3D12Dec->m_pD3D12Screen, 1065 pD3D12Dec->m_NodeMask, 1066 pD3D12Dec->m_d3d12DecProfileType, 1067 dpbDesc)); 1068 } 1069 1070 // 1071 // (Re)-create decoder heap 1072 // 1073 D3D12_VIDEO_DECODER_HEAP_DESC decoderHeapDesc = {}; 1074 decoderHeapDesc.NodeMask = pD3D12Dec->m_NodeMask; 1075 decoderHeapDesc.Configuration = pD3D12Dec->m_decoderDesc.Configuration; 1076 decoderHeapDesc.DecodeWidth = dpbDesc.Width; 1077 decoderHeapDesc.DecodeHeight = dpbDesc.Height; 1078 decoderHeapDesc.Format = dpbDesc.Format; 1079 decoderHeapDesc.MaxDecodePictureBufferCount = maxDPB; 1080 pD3D12Dec->m_spVideoDecoderHeap.Reset(); 1081 HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoderHeap( 1082 &decoderHeapDesc, 1083 IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoderHeap.GetAddressOf())); 1084 if (FAILED(hr)) { 1085 debug_printf( 1086 "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoderHeap failed with HR %x\n", 1087 hr); 1088 return false; 1089 } 1090 // Update pD3D12Dec after CreateVideoDecoderHeap succeeds only. 1091 pD3D12Dec->m_decoderHeapDesc = decoderHeapDesc; 1092 } 1093 1094 pD3D12Dec->m_decodeFormat = outputResourceDesc.Format; 1095 1096 return true; 1097} 1098 1099void 1100d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec) 1101{ 1102 switch (pD3D12Dec->m_d3d12DecProfileType) { 1103 case d3d12_video_decode_profile_type_h264: 1104 { 1105 d3d12_video_decoder_refresh_dpb_active_references_h264(pD3D12Dec); 1106 } break; 1107 1108 default: 1109 { 1110 unreachable("Unsupported d3d12_video_decode_profile_type"); 1111 } break; 1112 } 1113} 1114 1115void 1116d3d12_video_decoder_get_frame_info( 1117 struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced) 1118{ 1119 *pWidth = 0; 1120 *pHeight = 0; 1121 *pMaxDPB = 0; 1122 isInterlaced = false; 1123 1124 switch (pD3D12Dec->m_d3d12DecProfileType) { 1125 case d3d12_video_decode_profile_type_h264: 1126 { 1127 d3d12_video_decoder_get_frame_info_h264(pD3D12Dec, pWidth, pHeight, pMaxDPB, isInterlaced); 1128 } break; 1129 1130 default: 1131 { 1132 unreachable("Unsupported d3d12_video_decode_profile_type"); 1133 } break; 1134 } 1135 1136 if (pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_alignment_height) { 1137 const uint32_t AlignmentMask = 31; 1138 *pHeight = (*pHeight + AlignmentMask) & ~AlignmentMask; 1139 } 1140} 1141 1142/// 1143/// Returns the number of bytes starting from [buf.data() + buffsetOffset] where the _targetCode_ is found 1144/// Returns -1 if start code not found 1145/// 1146int 1147d3d12_video_decoder_get_next_startcode_offset(std::vector<uint8_t> &buf, 1148 unsigned int bufferOffset, 1149 unsigned int targetCode, 1150 unsigned int targetCodeBitSize, 1151 unsigned int numBitsToSearchIntoBuffer) 1152{ 1153 struct vl_vlc vlc = { 0 }; 1154 1155 // Shorten the buffer to be [buffetOffset, endOfBuf) 1156 unsigned int bufSize = buf.size() - bufferOffset; 1157 uint8_t *bufPtr = buf.data(); 1158 bufPtr += bufferOffset; 1159 1160 /* search the first numBitsToSearchIntoBuffer bytes for a startcode */ 1161 vl_vlc_init(&vlc, 1, (const void *const *) &bufPtr, &bufSize); 1162 for (uint i = 0; i < numBitsToSearchIntoBuffer && vl_vlc_bits_left(&vlc) >= targetCodeBitSize; ++i) { 1163 if (vl_vlc_peekbits(&vlc, targetCodeBitSize) == targetCode) 1164 return i; 1165 vl_vlc_eatbits(&vlc, 8); // Stride is 8 bits = 1 byte 1166 vl_vlc_fillbits(&vlc); 1167 } 1168 1169 return -1; 1170} 1171 1172void 1173d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input( 1174 struct d3d12_video_decoder *codec, // input argument, current decoder 1175 struct pipe_picture_desc 1176 *picture, // input argument, base structure of pipe_XXX_picture_desc where XXX is the codec name 1177 struct d3d12_video_buffer *pD3D12VideoBuffer // input argument, target video buffer 1178) 1179{ 1180 assert(picture); 1181 assert(codec); 1182 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; 1183 1184 d3d12_video_decode_profile_type profileType = 1185 d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile); 1186 switch (profileType) { 1187 case d3d12_video_decode_profile_type_h264: 1188 { 1189 size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_H264); 1190 pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture; 1191 ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture); 1192 D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource); 1193 DXVA_PicParams_H264 dxvaPicParamsH264 = 1194 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(pD3D12Dec->m_fenceValue, 1195 codec->base.profile, 1196 outputResourceDesc.Width, 1197 outputResourceDesc.Height, 1198 pPicControlH264); 1199 1200 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, 1201 &dxvaPicParamsH264, 1202 dxvaPicParamsBufferSize); 1203 1204 size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_H264); 1205 DXVA_Qmatrix_H264 dxvaQmatrixH264 = {}; 1206 d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture, 1207 dxvaQmatrixH264); 1208 d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize); 1209 } break; 1210 default: 1211 { 1212 unreachable("Unsupported d3d12_video_decode_profile_type"); 1213 } break; 1214 } 1215} 1216 1217void 1218d3d12_video_decoder_prepare_dxva_slices_control( 1219 struct d3d12_video_decoder *pD3D12Dec, // input argument, current decoder 1220 struct pipe_picture_desc *picture 1221) 1222{ 1223 d3d12_video_decode_profile_type profileType = 1224 d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(pD3D12Dec->base.profile); 1225 switch (profileType) { 1226 case d3d12_video_decode_profile_type_h264: 1227 { 1228 1229 std::vector<DXVA_Slice_H264_Short> pOutSliceControlBuffers; 1230 struct pipe_h264_picture_desc* picture_h264 = (struct pipe_h264_picture_desc*) picture; 1231 d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec, pOutSliceControlBuffers, picture_h264); 1232 1233 assert(sizeof(pOutSliceControlBuffers.data()[0]) == sizeof(DXVA_Slice_H264_Short)); 1234 uint64_t DXVAStructSize = pOutSliceControlBuffers.size() * sizeof((pOutSliceControlBuffers.data()[0])); 1235 assert((DXVAStructSize % sizeof(DXVA_Slice_H264_Short)) == 0); 1236 d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(pD3D12Dec, 1237 pOutSliceControlBuffers.data(), 1238 DXVAStructSize); 1239 assert(pD3D12Dec->m_SliceControlBuffer.size() == DXVAStructSize); 1240 } break; 1241 default: 1242 { 1243 unreachable("Unsupported d3d12_video_decode_profile_type"); 1244 } break; 1245 } 1246} 1247 1248void 1249d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(struct d3d12_video_decoder *pD3D12Dec, 1250 void *pDXVAStruct, 1251 uint64_t DXVAStructSize) 1252{ 1253 if (pD3D12Dec->m_SliceControlBuffer.capacity() < DXVAStructSize) { 1254 pD3D12Dec->m_SliceControlBuffer.reserve(DXVAStructSize); 1255 } 1256 1257 pD3D12Dec->m_SliceControlBuffer.resize(DXVAStructSize); 1258 memcpy(pD3D12Dec->m_SliceControlBuffer.data(), pDXVAStruct, DXVAStructSize); 1259} 1260 1261void 1262d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec, 1263 void *pDXVAStruct, 1264 uint64_t DXVAStructSize) 1265{ 1266 if (pD3D12Dec->m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) { 1267 pD3D12Dec->m_InverseQuantMatrixBuffer.reserve(DXVAStructSize); 1268 } 1269 1270 pD3D12Dec->m_InverseQuantMatrixBuffer.resize(DXVAStructSize); 1271 memcpy(pD3D12Dec->m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize); 1272} 1273 1274void 1275d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *pD3D12Dec, 1276 void *pDXVAStruct, 1277 uint64_t DXVAStructSize) 1278{ 1279 if (pD3D12Dec->m_picParamsBuffer.capacity() < DXVAStructSize) { 1280 pD3D12Dec->m_picParamsBuffer.reserve(DXVAStructSize); 1281 } 1282 1283 pD3D12Dec->m_picParamsBuffer.resize(DXVAStructSize); 1284 memcpy(pD3D12Dec->m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize); 1285} 1286 1287bool 1288d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport, 1289 d3d12_video_decode_profile_type profileType) 1290{ 1291 bool supportedProfile = false; 1292 switch (profileType) { 1293 case d3d12_video_decode_profile_type_h264: 1294 supportedProfile = true; 1295 break; 1296 default: 1297 supportedProfile = false; 1298 break; 1299 } 1300 1301 return (decodeSupport.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2) && supportedProfile; 1302} 1303 1304d3d12_video_decode_profile_type 1305d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile) 1306{ 1307 switch (profile) { 1308 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: 1309 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: 1310 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: 1311 case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED: 1312 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: 1313 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10: 1314 return d3d12_video_decode_profile_type_h264; 1315 default: 1316 { 1317 unreachable("Unsupported pipe video profile"); 1318 } break; 1319 } 1320} 1321 1322GUID 1323d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile) 1324{ 1325 switch (profile) { 1326 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: 1327 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: 1328 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: 1329 case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED: 1330 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: 1331 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10: 1332 return D3D12_VIDEO_DECODE_PROFILE_H264; 1333 default: 1334 return {}; 1335 } 1336} 1337 1338GUID 1339d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType) 1340{ 1341 switch (profileType) { 1342 case d3d12_video_decode_profile_type_h264: 1343 return D3D12_VIDEO_DECODE_PROFILE_H264; 1344 break; 1345 default: 1346 { 1347 unreachable("Unsupported d3d12_video_decode_profile_type"); 1348 } break; 1349 } 1350} 1351