1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "d3d12_context.h"
25#include "d3d12_format.h"
26#include "d3d12_resource.h"
27#include "d3d12_screen.h"
28#include "d3d12_surface.h"
29#include "d3d12_video_dec.h"
30#include "d3d12_video_dec_h264.h"
31#include "d3d12_video_buffer.h"
32#include "d3d12_residency.h"
33
34#include "vl/vl_video_buffer.h"
35#include "util/format/u_format.h"
36#include "util/u_inlines.h"
37#include "util/u_memory.h"
38#include "util/u_video.h"
39#include "util/vl_vlc.h"
40
41struct pipe_video_codec *
42d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec)
43{
44   ///
45   /// Initialize d3d12_video_decoder
46   ///
47
48
49   // Not using new doesn't call ctor and the initializations in the class declaration are lost
50   struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder;
51
52   pD3D12Dec->base = *codec;
53   pD3D12Dec->m_screen = context->screen;
54
55   pD3D12Dec->base.context = context;
56   pD3D12Dec->base.width = codec->width;
57   pD3D12Dec->base.height = codec->height;
58   // Only fill methods that are supported by the d3d12 decoder, leaving null the rest (ie. encode_* / decode_macroblock
59   // / get_feedback for encode)
60   pD3D12Dec->base.destroy = d3d12_video_decoder_destroy;
61   pD3D12Dec->base.begin_frame = d3d12_video_decoder_begin_frame;
62   pD3D12Dec->base.decode_bitstream = d3d12_video_decoder_decode_bitstream;
63   pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame;
64   pD3D12Dec->base.flush = d3d12_video_decoder_flush;
65
66   pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile);
67   pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile);
68   pD3D12Dec->m_d3d12DecProfile = d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(codec->profile);
69
70   ///
71   /// Try initializing D3D12 Video device and check for device caps
72   ///
73
74   struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context;
75   pD3D12Dec->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen);
76
77   ///
78   /// Create decode objects
79   ///
80   HRESULT hr = S_OK;
81   if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(
82          IID_PPV_ARGS(pD3D12Dec->m_spD3D12VideoDevice.GetAddressOf())))) {
83      debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - D3D12 Device has no Video support\n");
84      goto failed;
85   }
86
87   if (!d3d12_video_decoder_check_caps_and_create_decoder(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
88      debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
89                      "d3d12_video_decoder_check_caps_and_create_decoder\n");
90      goto failed;
91   }
92
93   if (!d3d12_video_decoder_create_command_objects(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
94      debug_printf(
95         "[d3d12_video_decoder] d3d12_video_create_decoder - Failure on d3d12_video_decoder_create_command_objects\n");
96      goto failed;
97   }
98
99   if (!d3d12_video_decoder_create_video_state_buffers(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
100      debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
101                      "d3d12_video_decoder_create_video_state_buffers\n");
102      goto failed;
103   }
104
105   pD3D12Dec->m_decodeFormatInfo = { pD3D12Dec->m_decodeFormat };
106   hr = pD3D12Dec->m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO,
107                                                                        &pD3D12Dec->m_decodeFormatInfo,
108                                                                        sizeof(pD3D12Dec->m_decodeFormatInfo));
109   if(FAILED(hr)) {
110      debug_printf("CheckFeatureSupport failed with HR %x\n", hr);
111      goto failed;
112   }
113
114   return &pD3D12Dec->base;
115
116failed:
117   if (pD3D12Dec != nullptr) {
118      d3d12_video_decoder_destroy((struct pipe_video_codec *) pD3D12Dec);
119   }
120
121   return nullptr;
122}
123
124/**
125 * Destroys a d3d12_video_decoder
126 * Call destroy_XX for applicable XX nested member types before deallocating
127 * Destroy methods should check != nullptr on their input target argument as this method can be called as part of
128 * cleanup from failure on the creation method
129 */
130void
131d3d12_video_decoder_destroy(struct pipe_video_codec *codec)
132{
133   if (codec == nullptr) {
134      return;
135   }
136
137   d3d12_video_decoder_flush(codec);   // Flush pending work before destroying.
138
139   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
140
141   //
142   // Destroys a decoder
143   // Call destroy_XX for applicable XX nested member types before deallocating
144   // Destroy methods should check != nullptr on their input target argument as this method can be called as part of
145   // cleanup from failure on the creation method
146   //
147
148   // No need for d3d12_destroy_video_objects
149   //    All the objects created here are smart pointer members of d3d12_video_decoder
150   // No need for d3d12_destroy_video_decoder_and_heap
151   //    All the objects created here are smart pointer members of d3d12_video_decoder
152   // No need for d3d12_destroy_video_dpbmanagers
153   //    All the objects created here are smart pointer members of d3d12_video_decoder
154
155   // No need for m_pD3D12Screen as it is not managed by d3d12_video_decoder
156
157   // Call dtor to make ComPtr work
158   delete pD3D12Dec;
159}
160
161/**
162 * start decoding of a new frame
163 */
164void
165d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec,
166                                struct pipe_video_buffer *target,
167                                struct pipe_picture_desc *picture)
168{
169   // Do nothing here. Initialize happens on decoder creation, re-config (if any) happens in
170   // d3d12_video_decoder_decode_bitstream
171   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
172   assert(pD3D12Dec);
173   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n",
174                 pD3D12Dec->m_fenceValue);
175}
176
177/**
178 * decode a bitstream
179 */
180void
181d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec,
182                                     struct pipe_video_buffer *target,
183                                     struct pipe_picture_desc *picture,
184                                     unsigned num_buffers,
185                                     const void *const *buffers,
186                                     const unsigned *sizes)
187{
188   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
189   assert(pD3D12Dec);
190   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream started for fenceValue: %d\n",
191                 pD3D12Dec->m_fenceValue);
192   assert(pD3D12Dec->m_spD3D12VideoDevice);
193   assert(pD3D12Dec->m_spDecodeCommandQueue);
194   assert(pD3D12Dec->m_pD3D12Screen);
195   struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
196   assert(pD3D12VideoBuffer);
197
198   ///
199   /// Compressed bitstream buffers
200   ///
201
202   /// Mesa VA frontend Video buffer passing semantics for H264, HEVC, MPEG4, VC1 and PIPE_VIDEO_PROFILE_VC1_ADVANCED
203   /// are: If num_buffers == 1 -> buf[0] has the compressed bitstream WITH the starting code If num_buffers == 2 ->
204   /// buf[0] has the NALU starting code and buf[1] has the compressed bitstream WITHOUT any starting code. If
205   /// num_buffers = 3 -> It's JPEG, not supported in D3D12. num_buffers is at most 3.
206   /// Mesa VDPAU frontend passes the buffers as they get passed in VdpDecoderRender without fixing any start codes
207   /// except for PIPE_VIDEO_PROFILE_VC1_ADVANCED
208   // In https://http.download.nvidia.com/XFree86/vdpau/doxygen/html/index.html#video_mixer_usage it's mentioned that:
209   // It is recommended that applications pass solely the slice data to VDPAU; specifically that any header data
210   // structures be excluded from the portion of the bitstream passed to VDPAU. VDPAU implementations must operate
211   // correctly if non-slice data is included, at least for formats employing start codes to delimit slice data. For all
212   // codecs/profiles it's highly recommended (when the codec/profile has such codes...) that the start codes are passed
213   // to VDPAU, even when not included in the bitstream the VDPAU client is parsing. Let's assume we get all the start
214   // codes for VDPAU. The doc also says "VDPAU implementations must operate correctly if non-slice data is included, at
215   // least for formats employing start codes to delimit slice data" if we ever get an issue with VDPAU start codes we
216   // should consider adding the code that handles this in the VDPAU layer above the gallium driver like mesa VA does.
217
218   // To handle the multi-slice case end_frame already takes care of this by parsing the start codes from the
219   // combined bitstream of all decode_bitstream calls.
220
221   // VAAPI seems to send one decode_bitstream command per slice, but we should also support the VDPAU case where the
222   // buffers have multiple buffer array entry per slice {startCode (optional), slice1, slice2, ..., startCode
223   // (optional) , sliceN}
224
225   if (num_buffers > 2)   // Assume this means multiple slices at once in a decode_bitstream call
226   {
227      // Based on VA frontend codebase, this never happens for video (no JPEG)
228      // Based on VDPAU frontends codebase, this only happens when sending more than one slice at once in decode bitstream
229
230      // To handle the case where VDPAU send all the slices at once in a single decode_bitstream call, let's pretend it
231      // was a series of different calls
232
233      // group by start codes and buffers and perform calls for the number of slices
234      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream multiple slices on same call detected "
235                     "for fenceValue: %d, breaking down the calls into one per slice\n",
236                     pD3D12Dec->m_fenceValue);
237
238      size_t curBufferIdx = 0;
239
240      // Vars to be used for the delegation calls to decode_bitstream
241      unsigned call_num_buffers = 0;
242      const void *const *call_buffers = nullptr;
243      const unsigned *call_sizes = nullptr;
244
245      while (curBufferIdx < num_buffers) {
246         // Store the current buffer as the base array pointer for the delegated call, later decide if it'll be a
247         // startcode+slicedata or just slicedata call
248         call_buffers = &buffers[curBufferIdx];
249         call_sizes = &sizes[curBufferIdx];
250
251         // Usually start codes are less or equal than 4 bytes
252         // If the current buffer is a start code buffer, send it along with the next buffer. Otherwise, just send the
253         // current buffer.
254         call_num_buffers = (sizes[curBufferIdx] <= 4) ? 2 : 1;
255
256         // Delegate call with one or two buffers only
257         d3d12_video_decoder_decode_bitstream(codec, target, picture, call_num_buffers, call_buffers, call_sizes);
258
259         curBufferIdx += call_num_buffers;   // Consume from the loop the buffers sent in the last call
260      }
261   } else {
262      ///
263      /// Handle single slice buffer path, maybe with an extra start code buffer at buffers[0].
264      ///
265
266      // Both the start codes being present at buffers[0] and the rest in buffers [1] or full buffer at [0] cases can be
267      // handled by flattening all the buffers into a single one and passing that to HW.
268
269      size_t totalReceivedBuffersSize = 0u;   // Combined size of all sizes[]
270      for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
271         totalReceivedBuffersSize += sizes[bufferIdx];
272      }
273
274      // Bytes of data pre-staged before this decode_frame call
275      size_t preStagedDataSize = pD3D12Dec->m_stagingDecodeBitstream.size();
276
277      // Extend the staging buffer size, as decode_frame can be called several times before end_frame
278      pD3D12Dec->m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize);
279
280      // Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new
281      // buffers will be appended
282      uint8_t *newSliceDataPositionDstBase = pD3D12Dec->m_stagingDecodeBitstream.data() + preStagedDataSize;
283
284      // Append new data at the end.
285      size_t dstOffset = 0u;
286      for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
287         memcpy(newSliceDataPositionDstBase + dstOffset, buffers[bufferIdx], sizes[bufferIdx]);
288         dstOffset += sizes[bufferIdx];
289      }
290
291      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream finalized for fenceValue: %d\n",
292                    pD3D12Dec->m_fenceValue);
293   }
294}
295
296void
297d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec,
298                                                 struct pipe_video_buffer *target,
299                                                 struct pipe_picture_desc *picture)
300{
301   switch (pD3D12Dec->m_d3d12DecProfileType) {
302      case d3d12_video_decode_profile_type_h264:
303      {
304         pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
305         pD3D12Dec->m_pCurrentDecodeTarget = target;
306         pD3D12Dec->m_pCurrentReferenceTargets = pPicControlH264->ref;
307      } break;
308
309      default:
310      {
311         unreachable("Unsupported d3d12_video_decode_profile_type");
312      } break;
313   }
314}
315
316/**
317 * end decoding of the current frame
318 */
319void
320d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
321                              struct pipe_video_buffer *target,
322                              struct pipe_picture_desc *picture)
323{
324   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
325   assert(pD3D12Dec);
326   struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
327   assert(pD3D12Screen);
328   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame started for fenceValue: %d\n",
329                 pD3D12Dec->m_fenceValue);
330   assert(pD3D12Dec->m_spD3D12VideoDevice);
331   assert(pD3D12Dec->m_spDecodeCommandQueue);
332   struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
333   assert(pD3D12VideoBuffer);
334
335   ///
336   /// Store current decode output target texture and reference textures from upper layer
337   ///
338   d3d12_video_decoder_store_upper_layer_references(pD3D12Dec, target, picture);
339
340   ///
341   /// Codec header picture parameters buffers
342   ///
343
344   d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer);
345   assert(pD3D12Dec->m_picParamsBuffer.size() > 0);
346
347   ///
348   /// Prepare Slice control buffers before clearing staging buffer
349   ///
350   assert(pD3D12Dec->m_stagingDecodeBitstream.size() > 0);   // Make sure the staging wasn't cleared yet in end_frame
351   d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture);
352   assert(pD3D12Dec->m_SliceControlBuffer.size() > 0);
353
354   ///
355   /// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer
356   ///
357
358   uint64_t sliceDataStagingBufferSize = pD3D12Dec->m_stagingDecodeBitstream.size();
359   uint8_t *sliceDataStagingBufferPtr = pD3D12Dec->m_stagingDecodeBitstream.data();
360
361   // Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory
362   if (pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) {
363      if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) {
364         debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
365                         "d3d12_video_decoder_create_staging_bitstream_buffer\n");
366         debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
367                pD3D12Dec->m_fenceValue);
368         assert(false);
369         return;
370      }
371   }
372
373   // Upload frame bitstream CPU data to ID3D12Resource buffer
374   pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize =
375      sliceDataStagingBufferSize;   // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize.
376   assert(pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize <=
377          pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize);
378
379   /* One-shot transfer operation with data supplied in a user
380    * pointer.
381    */
382   pipe_resource *pPipeCompressedBufferObj =
383      d3d12_resource_from_resource(&pD3D12Screen->base, pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get());
384   assert(pPipeCompressedBufferObj);
385   pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context,    // context
386                                           pPipeCompressedBufferObj,   // dst buffer
387                                           PIPE_MAP_WRITE,             // usage PIPE_MAP_x
388                                           0,                          // offset
389                                           sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize,   // size
390                                           sliceDataStagingBufferPtr                                          // data
391   );
392
393   // Flush buffer_subdata batch and wait on this CPU thread for GPU work completion
394   // before deleting the source CPU buffer below
395   struct pipe_fence_handle *pUploadGPUCompletionFence = NULL;
396   pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
397                                  &pUploadGPUCompletionFence,
398                                  PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
399   assert(pUploadGPUCompletionFence);
400   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for "
401                  "buffer_subdata to upload compressed bitstream.\n");
402   pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, pUploadGPUCompletionFence, PIPE_TIMEOUT_INFINITE);
403   pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &pUploadGPUCompletionFence, NULL);
404
405   // [After buffer_subdata GPU work is finished] Clear CPU staging buffer now that end_frame is called and was uploaded
406   // to GPU for DecodeFrame call.
407   pD3D12Dec->m_stagingDecodeBitstream.resize(0);
408
409   ///
410   /// Proceed to record the GPU Decode commands
411   ///
412
413   // Requested conversions by caller upper layer (none for now)
414   d3d12_video_decode_output_conversion_arguments requestedConversionArguments = {};
415
416   ///
417   /// Record DecodeFrame operation and resource state transitions.
418   ///
419
420   // Translate input D3D12 structure
421   D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {};
422
423   d3d12InputArguments.CompressedBitstream.pBuffer = pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get();
424   d3d12InputArguments.CompressedBitstream.Offset = 0u;
425   constexpr uint64_t d3d12BitstreamOffsetAlignment =
426      128u;   // specified in
427              // https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier
428   assert((d3d12InputArguments.CompressedBitstream.Offset == 0) ||
429         ((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0));
430   d3d12InputArguments.CompressedBitstream.Size = pD3D12Dec->m_curFrameCompressedBitstreamBufferPayloadSize;
431
432   D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
433      CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
434                                           D3D12_RESOURCE_STATE_COMMON,
435                                           D3D12_RESOURCE_STATE_VIDEO_DECODE_READ),
436   };
437   pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
438
439   // Schedule reverse (back to common) transitions before command list closes for current frame
440   pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
441      CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
442                                           D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,
443                                           D3D12_RESOURCE_STATE_COMMON));
444
445   ///
446   /// Clear texture (no reference only flags in resource allocation) to use as decode output to send downstream for
447   /// display/consumption
448   ///
449   ID3D12Resource *pOutputD3D12Texture;
450   uint outputD3D12Subresource = 0;
451
452   ///
453   /// Ref Only texture (with reference only flags in resource allocation) to use as reconstructed picture decode output
454   /// and to store as future reference in DPB
455   ///
456   ID3D12Resource *pRefOnlyOutputD3D12Texture;
457   uint refOnlyOutputD3D12Subresource = 0;
458
459   if(!d3d12_video_decoder_prepare_for_decode_frame(pD3D12Dec,
460                                                target,
461                                                pD3D12VideoBuffer,
462                                                &pOutputD3D12Texture,             // output
463                                                &outputD3D12Subresource,          // output
464                                                &pRefOnlyOutputD3D12Texture,      // output
465                                                &refOnlyOutputD3D12Subresource,   // output
466                                                requestedConversionArguments)) {
467      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
468                      "d3d12_video_decoder_prepare_for_decode_frame\n");
469      debug_printf("[d3d12_video_encoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
470                pD3D12Dec->m_fenceValue);
471      assert(false);
472      return;
473   }
474
475   ///
476   /// Set codec picture parameters CPU buffer
477   ///
478
479   d3d12InputArguments.NumFrameArguments =
480      1u;   // Only the codec data received from the above layer with picture params
481   d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
482      D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
483      static_cast<uint32_t>(pD3D12Dec->m_picParamsBuffer.size()),
484      pD3D12Dec->m_picParamsBuffer.data(),
485   };
486
487   if (pD3D12Dec->m_SliceControlBuffer.size() > 0) {
488      d3d12InputArguments.NumFrameArguments++;
489      d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
490         D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL,
491         static_cast<uint32_t>(pD3D12Dec->m_SliceControlBuffer.size()),
492         pD3D12Dec->m_SliceControlBuffer.data(),
493      };
494   }
495
496   if (pD3D12Dec->m_InverseQuantMatrixBuffer.size() > 0) {
497      d3d12InputArguments.NumFrameArguments++;
498      d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
499         D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX,
500         static_cast<uint32_t>(pD3D12Dec->m_InverseQuantMatrixBuffer.size()),
501         pD3D12Dec->m_InverseQuantMatrixBuffer.data(),
502      };
503   }
504
505   d3d12InputArguments.ReferenceFrames = pD3D12Dec->m_spDPBManager->get_current_reference_frames();
506   if (D3D12_DEBUG_VERBOSE & d3d12_debug) {
507      pD3D12Dec->m_spDPBManager->print_dpb();
508   }
509
510   d3d12InputArguments.pHeap = pD3D12Dec->m_spVideoDecoderHeap.Get();
511
512   // translate output D3D12 structure
513   D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS1 d3d12OutputArguments = {};
514   d3d12OutputArguments.pOutputTexture2D = pOutputD3D12Texture;
515   d3d12OutputArguments.OutputSubresource = outputD3D12Subresource;
516
517   bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
518                          d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
519   if (fReferenceOnly) {
520      d3d12OutputArguments.ConversionArguments.Enable = TRUE;
521
522      assert(pRefOnlyOutputD3D12Texture);
523      d3d12OutputArguments.ConversionArguments.pReferenceTexture2D = pRefOnlyOutputD3D12Texture;
524      d3d12OutputArguments.ConversionArguments.ReferenceSubresource = refOnlyOutputD3D12Subresource;
525
526      const D3D12_RESOURCE_DESC &descReference = GetDesc(d3d12OutputArguments.ConversionArguments.pReferenceTexture2D);
527      d3d12OutputArguments.ConversionArguments.DecodeColorSpace = d3d12_convert_from_legacy_color_space(
528         !util_format_is_yuv(d3d12_get_pipe_format(descReference.Format)),
529         util_format_get_blocksize(d3d12_get_pipe_format(descReference.Format)) * 8 /*bytes to bits conversion*/,
530         /* StudioRGB= */ false,
531         /* P709= */ true,
532         /* StudioYUV= */ true);
533
534      const D3D12_RESOURCE_DESC &descOutput = GetDesc(d3d12OutputArguments.pOutputTexture2D);
535      d3d12OutputArguments.ConversionArguments.OutputColorSpace = d3d12_convert_from_legacy_color_space(
536         !util_format_is_yuv(d3d12_get_pipe_format(descOutput.Format)),
537         util_format_get_blocksize(d3d12_get_pipe_format(descOutput.Format)) * 8 /*bytes to bits conversion*/,
538         /* StudioRGB= */ false,
539         /* P709= */ true,
540         /* StudioYUV= */ true);
541
542      const D3D12_VIDEO_DECODER_HEAP_DESC &HeapDesc = GetDesc(pD3D12Dec->m_spVideoDecoderHeap.Get());
543      d3d12OutputArguments.ConversionArguments.OutputWidth = HeapDesc.DecodeWidth;
544      d3d12OutputArguments.ConversionArguments.OutputHeight = HeapDesc.DecodeHeight;
545   } else {
546      d3d12OutputArguments.ConversionArguments.Enable = FALSE;
547   }
548
549   CD3DX12_RESOURCE_DESC outputDesc(GetDesc(d3d12OutputArguments.pOutputTexture2D));
550   uint32_t MipLevel, PlaneSlice, ArraySlice;
551   D3D12DecomposeSubresource(d3d12OutputArguments.OutputSubresource,
552                             outputDesc.MipLevels,
553                             outputDesc.ArraySize(),
554                             MipLevel,
555                             ArraySlice,
556                             PlaneSlice);
557
558   for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
559      uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
560
561      D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
562         CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
563                                              D3D12_RESOURCE_STATE_COMMON,
564                                              D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
565                                              planeOutputSubresource),
566      };
567      pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
568   }
569
570   // Schedule reverse (back to common) transitions before command list closes for current frame
571   for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
572      uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
573      pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
574         CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
575                                              D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
576                                              D3D12_RESOURCE_STATE_COMMON,
577                                              planeOutputSubresource));
578   }
579
580   // Record DecodeFrame
581
582   pD3D12Dec->m_spDecodeCommandList->DecodeFrame1(pD3D12Dec->m_spVideoDecoder.Get(),
583                                                  &d3d12OutputArguments,
584                                                  &d3d12InputArguments);
585
586   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n",
587                 pD3D12Dec->m_fenceValue);
588
589   ///
590   /// Flush work to the GPU and blocking wait until decode finishes
591   ///
592   pD3D12Dec->m_needsGPUFlush = true;
593   d3d12_video_decoder_flush(codec);
594
595   if (!pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
596      ///
597      /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()
598      /// We cannot use the standalone video buffer allocation directly and we must use instead
599      /// either a ID3D12Resource with DECODE_REFERENCE only flag or a texture array within the same
600      /// allocation
601      /// Do GPU->GPU texture copy from decode output to pipe target decode texture sampler view planes
602      ///
603
604      // Get destination resource
605      struct pipe_sampler_view **pPipeDstViews = target->get_sampler_view_planes(target);
606
607      // Get source pipe_resource
608      pipe_resource *pPipeSrc =
609         d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D);
610      assert(pPipeSrc);
611
612      // Copy all format subresources/texture planes
613
614      for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
615         assert(d3d12OutputArguments.OutputSubresource < INT16_MAX);
616         struct pipe_box box = { 0,
617                                 0,
618                                 // src array slice, taken as Z for TEXTURE_2D_ARRAY
619                                 static_cast<int16_t>(d3d12OutputArguments.OutputSubresource),
620                                 static_cast<int>(pPipeDstViews[PlaneSlice]->texture->width0),
621                                 static_cast<int16_t>(pPipeDstViews[PlaneSlice]->texture->height0),
622                                 1 };
623
624         pD3D12Dec->base.context->resource_copy_region(pD3D12Dec->base.context,
625                                                       pPipeDstViews[PlaneSlice]->texture,              // dst
626                                                       0,                                               // dst level
627                                                       0,                                               // dstX
628                                                       0,                                               // dstY
629                                                       0,                                               // dstZ
630                                                       (PlaneSlice == 0) ? pPipeSrc : pPipeSrc->next,   // src
631                                                       0,                                               // src level
632                                                       &box);
633      }
634      // Flush resource_copy_region batch and wait on this CPU thread for GPU work completion
635      struct pipe_fence_handle *completion_fence = NULL;
636      pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
637                                     &completion_fence,
638                                     PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
639      assert(completion_fence);
640      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Waiting on GPU completion fence for "
641                     "resource_copy_region on decoded frame.\n");
642      pD3D12Screen->base.fence_finish(&pD3D12Screen->base, NULL, completion_fence, PIPE_TIMEOUT_INFINITE);
643      pD3D12Screen->base.fence_reference(&pD3D12Screen->base, &completion_fence, NULL);
644   }
645}
646
647/**
648 * flush any outstanding command buffers to the hardware
649 * should be called before a video_buffer is acessed by the gallium frontend again
650 */
651void
652d3d12_video_decoder_flush(struct pipe_video_codec *codec)
653{
654   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
655   assert(pD3D12Dec);
656   assert(pD3D12Dec->m_spD3D12VideoDevice);
657   assert(pD3D12Dec->m_spDecodeCommandQueue);
658   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Will flush video queue work and CPU wait on "
659                 "fenceValue: %d\n",
660                 pD3D12Dec->m_fenceValue);
661
662   if (!pD3D12Dec->m_needsGPUFlush) {
663      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Nothing to flush, all up to date.\n");
664   } else {
665      HRESULT hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
666      if (hr != S_OK) {
667         debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
668                         " - D3D12Device was removed BEFORE commandlist "
669                         "execution with HR %x.\n",
670                         hr);
671         goto flush_fail;
672      }
673
674      // Close and execute command list and wait for idle on CPU blocking
675      // this method before resetting list and allocator for next submission.
676
677      if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) {
678         pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(),
679                                                           pD3D12Dec->m_transitionsBeforeCloseCmdList.data());
680         pD3D12Dec->m_transitionsBeforeCloseCmdList.clear();
681      }
682
683      hr = pD3D12Dec->m_spDecodeCommandList->Close();
684      if (FAILED(hr)) {
685         debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - Can't close command list with HR %x\n", hr);
686         goto flush_fail;
687      }
688
689      ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() };
690      pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists);
691      pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue);
692      pD3D12Dec->m_spFence->SetEventOnCompletion(pD3D12Dec->m_fenceValue, nullptr);
693      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - ExecuteCommandLists finished on signal with "
694                    "fenceValue: %d\n",
695                    pD3D12Dec->m_fenceValue);
696
697      hr = pD3D12Dec->m_spCommandAllocator->Reset();
698      if (FAILED(hr)) {
699         debug_printf(
700            "[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12CommandAllocator failed with HR %x\n",
701            hr);
702         goto flush_fail;
703      }
704
705      hr = pD3D12Dec->m_spDecodeCommandList->Reset(pD3D12Dec->m_spCommandAllocator.Get());
706      if (FAILED(hr)) {
707         debug_printf(
708            "[d3d12_video_decoder] d3d12_video_decoder_flush - resetting ID3D12GraphicsCommandList failed with HR %x\n",
709            hr);
710         goto flush_fail;
711      }
712
713      // Validate device was not removed
714      hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
715      if (hr != S_OK) {
716         debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
717                         " - D3D12Device was removed AFTER commandlist "
718                         "execution with HR %x, but wasn't before.\n",
719                         hr);
720         goto flush_fail;
721      }
722
723      debug_printf(
724         "[d3d12_video_decoder] d3d12_video_decoder_flush - GPU signaled execution finalized for fenceValue: %d\n",
725         pD3D12Dec->m_fenceValue);
726
727      pD3D12Dec->m_fenceValue++;
728      pD3D12Dec->m_needsGPUFlush = false;
729   }
730   return;
731
732flush_fail:
733   debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush failed for fenceValue: %d\n", pD3D12Dec->m_fenceValue);
734   assert(false);
735}
736
737bool
738d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Screen,
739                                           struct d3d12_video_decoder *pD3D12Dec)
740{
741   assert(pD3D12Dec->m_spD3D12VideoDevice);
742
743   D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE };
744   HRESULT hr = pD3D12Screen->dev->CreateCommandQueue(&commandQueueDesc,
745                                                      IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandQueue.GetAddressOf()));
746   if (FAILED(hr)) {
747      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue "
748                      "failed with HR %x\n",
749                      hr);
750      return false;
751   }
752
753   hr = pD3D12Screen->dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pD3D12Dec->m_spFence));
754   if (FAILED(hr)) {
755      debug_printf(
756         "[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateFence failed with HR %x\n",
757         hr);
758      return false;
759   }
760
761   hr = pD3D12Screen->dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
762                                                  IID_PPV_ARGS(pD3D12Dec->m_spCommandAllocator.GetAddressOf()));
763   if (FAILED(hr)) {
764      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to "
765                      "CreateCommandAllocator failed with HR %x\n",
766                      hr);
767      return false;
768   }
769
770   hr = pD3D12Screen->dev->CreateCommandList(0,
771                                             D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
772                                             pD3D12Dec->m_spCommandAllocator.Get(),
773                                             nullptr,
774                                             IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf()));
775
776   if (FAILED(hr)) {
777      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList "
778                      "failed with HR %x\n",
779                      hr);
780      return false;
781   }
782
783   return true;
784}
785
786bool
787d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen *pD3D12Screen,
788                                                  struct d3d12_video_decoder *pD3D12Dec)
789{
790   assert(pD3D12Dec->m_spD3D12VideoDevice);
791
792   pD3D12Dec->m_decoderDesc = {};
793
794   D3D12_VIDEO_DECODE_CONFIGURATION decodeConfiguration = { pD3D12Dec->m_d3d12DecProfile,
795                                                            D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE,
796                                                            D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE };
797
798   D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {};
799   decodeSupport.NodeIndex = pD3D12Dec->m_NodeIndex;
800   decodeSupport.Configuration = decodeConfiguration;
801   decodeSupport.Width = pD3D12Dec->base.width;
802   decodeSupport.Height = pD3D12Dec->base.height;
803   decodeSupport.DecodeFormat = pD3D12Dec->m_decodeFormat;
804   // no info from above layer on framerate/bitrate
805   decodeSupport.FrameRate.Numerator = 0;
806   decodeSupport.FrameRate.Denominator = 0;
807   decodeSupport.BitRate = 0;
808
809   HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT,
810                                                                     &decodeSupport,
811                                                                     sizeof(decodeSupport));
812   if (FAILED(hr)) {
813      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CheckFeatureSupport "
814                      "failed with HR %x\n",
815                      hr);
816      return false;
817   }
818
819   if (!(decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) {
820      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - "
821                      "D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED was false when checking caps \n");
822      return false;
823   }
824
825   pD3D12Dec->m_configurationFlags = decodeSupport.ConfigurationFlags;
826   pD3D12Dec->m_tier = decodeSupport.DecodeTier;
827
828   if (d3d12_video_decoder_supports_aot_dpb(decodeSupport, pD3D12Dec->m_d3d12DecProfileType)) {
829      pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_array_of_textures;
830   }
831
832   if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) {
833      pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_alignment_height;
834   }
835
836   if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) {
837      pD3D12Dec->m_ConfigDecoderSpecificFlags |=
838         d3d12_video_decode_config_specific_flag_reference_only_textures_required;
839   }
840
841   pD3D12Dec->m_decoderDesc.NodeMask = pD3D12Dec->m_NodeMask;
842   pD3D12Dec->m_decoderDesc.Configuration = decodeConfiguration;
843
844   hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&pD3D12Dec->m_decoderDesc,
845                                                            IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
846   if (FAILED(hr)) {
847      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CreateVideoDecoder "
848                      "failed with HR %x\n",
849                      hr);
850      return false;
851   }
852
853   return true;
854}
855
856bool
857d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen *pD3D12Screen,
858                                               struct d3d12_video_decoder *pD3D12Dec)
859{
860   assert(pD3D12Dec->m_spD3D12VideoDevice);
861   if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen,
862                                                            pD3D12Dec,
863                                                            pD3D12Dec->m_InitialCompBitstreamGPUBufferSize)) {
864      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_video_state_buffers - Failure on "
865                      "d3d12_video_decoder_create_staging_bitstream_buffer\n");
866      return false;
867   }
868
869   return true;
870}
871
872bool
873d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *pD3D12Screen,
874                                                    struct d3d12_video_decoder *pD3D12Dec,
875                                                    uint64_t bufSize)
876{
877   assert(pD3D12Dec->m_spD3D12VideoDevice);
878
879   if (pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Get() != nullptr) {
880      pD3D12Dec->m_curFrameCompressedBitstreamBuffer.Reset();
881   }
882
883   auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask);
884   auto descResource = CD3DX12_RESOURCE_DESC::Buffer(bufSize);
885   HRESULT hr = pD3D12Screen->dev->CreateCommittedResource(
886      &descHeap,
887      D3D12_HEAP_FLAG_NONE,
888      &descResource,
889      D3D12_RESOURCE_STATE_COMMON,
890      nullptr,
891      IID_PPV_ARGS(pD3D12Dec->m_curFrameCompressedBitstreamBuffer.GetAddressOf()));
892   if (FAILED(hr)) {
893      debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - "
894                      "CreateCommittedResource failed with HR %x\n",
895                      hr);
896      return false;
897   }
898
899   pD3D12Dec->m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize;
900   return true;
901}
902
903bool
904d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec,
905                                             struct pipe_video_buffer *pCurrentDecodeTarget,
906                                             struct d3d12_video_buffer *pD3D12VideoBuffer,
907                                             ID3D12Resource **ppOutTexture2D,
908                                             uint32_t *pOutSubresourceIndex,
909                                             ID3D12Resource **ppRefOnlyOutTexture2D,
910                                             uint32_t *pRefOnlyOutSubresourceIndex,
911                                             const d3d12_video_decode_output_conversion_arguments &conversionArgs)
912{
913   if(!d3d12_video_decoder_reconfigure_dpb(pD3D12Dec, pD3D12VideoBuffer, conversionArgs)) {
914      debug_printf("d3d12_video_decoder_reconfigure_dpb failed!\n");
915      return false;
916   }
917
918   // Refresh DPB active references for current frame, release memory for unused references.
919   d3d12_video_decoder_refresh_dpb_active_references(pD3D12Dec);
920
921   // Get the output texture for the current frame to be decoded
922   pD3D12Dec->m_spDPBManager->get_current_frame_decode_output_texture(pCurrentDecodeTarget,
923                                                                      ppOutTexture2D,
924                                                                      pOutSubresourceIndex);
925
926   auto vidBuffer = (struct d3d12_video_buffer *)(pCurrentDecodeTarget);
927   // If is_pipe_buffer_underlying_output_decode_allocation is enabled,
928   // we can just use the underlying allocation in pCurrentDecodeTarget
929   // and avoid an extra copy after decoding the frame.
930   // If this is the case, we need to handle the residency of this resource
931   // (if not we're actually creating the resources with CreateCommitedResource with
932   // residency by default)
933   if(pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
934      assert(d3d12_resource_resource(vidBuffer->texture) == *ppOutTexture2D);
935      // Make it permanently resident for video use
936      d3d12_promote_to_permanent_residency(pD3D12Dec->m_pD3D12Screen, vidBuffer->texture);
937   }
938
939   // Get the reference only texture for the current frame to be decoded (if applicable)
940   bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
941                          d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
942   if (fReferenceOnly) {
943      bool needsTransitionToDecodeWrite = false;
944      pD3D12Dec->m_spDPBManager->get_reference_only_output(pCurrentDecodeTarget,
945                                                           ppRefOnlyOutTexture2D,
946                                                           pRefOnlyOutSubresourceIndex,
947                                                           needsTransitionToDecodeWrite);
948      assert(needsTransitionToDecodeWrite);
949
950      CD3DX12_RESOURCE_DESC outputDesc(GetDesc(*ppRefOnlyOutTexture2D));
951      uint32_t MipLevel, PlaneSlice, ArraySlice;
952      D3D12DecomposeSubresource(*pRefOnlyOutSubresourceIndex,
953                                outputDesc.MipLevels,
954                                outputDesc.ArraySize(),
955                                MipLevel,
956                                ArraySlice,
957                                PlaneSlice);
958
959      for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
960         uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
961
962         D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
963            CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
964                                                 D3D12_RESOURCE_STATE_COMMON,
965                                                 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
966                                                 planeOutputSubresource),
967         };
968         pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
969      }
970
971      // Schedule reverse (back to common) transitions before command list closes for current frame
972      for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
973         uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
974         pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
975            CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
976                                                 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
977                                                 D3D12_RESOURCE_STATE_COMMON,
978                                                 planeOutputSubresource));
979      }
980   }
981
982   // If decoded needs reference_only entries in the dpb, use the reference_only allocation for current frame
983   // otherwise, use the standard output resource
984   ID3D12Resource *pCurrentFrameDPBEntry = fReferenceOnly ? *ppRefOnlyOutTexture2D : *ppOutTexture2D;
985   uint32_t currentFrameDPBEntrySubresource = fReferenceOnly ? *pRefOnlyOutSubresourceIndex : *pOutSubresourceIndex;
986
987   switch (pD3D12Dec->m_d3d12DecProfileType) {
988      case d3d12_video_decode_profile_type_h264:
989      {
990         d3d12_video_decoder_prepare_current_frame_references_h264(pD3D12Dec,
991                                                                   pCurrentFrameDPBEntry,
992                                                                   currentFrameDPBEntrySubresource);
993      } break;
994
995      default:
996      {
997         unreachable("Unsupported d3d12_video_decode_profile_type");
998      } break;
999   }
1000
1001   return true;
1002}
1003
1004bool
1005d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder *pD3D12Dec,
1006                                    struct d3d12_video_buffer *pD3D12VideoBuffer,
1007                                    const d3d12_video_decode_output_conversion_arguments &conversionArguments)
1008{
1009   uint32_t width;
1010   uint32_t height;
1011   uint16_t maxDPB;
1012   bool isInterlaced;
1013   d3d12_video_decoder_get_frame_info(pD3D12Dec, &width, &height, &maxDPB, isInterlaced);
1014
1015   ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
1016   D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
1017
1018   pD3D12VideoBuffer->base.interlaced = isInterlaced;
1019   D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE interlaceTypeRequested =
1020      isInterlaced ? D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_FIELD_BASED : D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE;
1021   if ((pD3D12Dec->m_decodeFormat != outputResourceDesc.Format) ||
1022       (pD3D12Dec->m_decoderDesc.Configuration.InterlaceType != interlaceTypeRequested)) {
1023      // Copy current pD3D12Dec->m_decoderDesc, modify decodeprofile and re-create decoder.
1024      D3D12_VIDEO_DECODER_DESC decoderDesc = pD3D12Dec->m_decoderDesc;
1025      decoderDesc.Configuration.InterlaceType = interlaceTypeRequested;
1026      decoderDesc.Configuration.DecodeProfile =
1027         d3d12_video_decoder_resolve_profile(pD3D12Dec->m_d3d12DecProfileType);
1028      pD3D12Dec->m_spVideoDecoder.Reset();
1029      HRESULT hr =
1030         pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&decoderDesc,
1031                                                             IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
1032      if (FAILED(hr)) {
1033         debug_printf(
1034            "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoder failed with HR %x\n",
1035            hr);
1036         return false;
1037      }
1038      // Update state after CreateVideoDecoder succeeds only.
1039      pD3D12Dec->m_decoderDesc = decoderDesc;
1040   }
1041
1042   if (!pD3D12Dec->m_spDPBManager || !pD3D12Dec->m_spVideoDecoderHeap ||
1043       pD3D12Dec->m_decodeFormat != outputResourceDesc.Format || pD3D12Dec->m_decoderHeapDesc.DecodeWidth != width ||
1044       pD3D12Dec->m_decoderHeapDesc.DecodeHeight != height ||
1045       pD3D12Dec->m_decoderHeapDesc.MaxDecodePictureBufferCount < maxDPB) {
1046      // Detect the combination of AOT/ReferenceOnly to configure the DPB manager
1047      uint16_t referenceCount = (conversionArguments.Enable) ? (uint16_t) conversionArguments.ReferenceFrameCount +
1048                                                                  1 /*extra slot for current picture*/ :
1049                                                               maxDPB;
1050      d3d12_video_decode_dpb_descriptor dpbDesc = {};
1051      dpbDesc.Width = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Width : width;
1052      dpbDesc.Height = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Height : height;
1053      dpbDesc.Format =
1054         (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Format.Format : outputResourceDesc.Format;
1055      dpbDesc.fArrayOfTexture =
1056         ((pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_array_of_textures) != 0);
1057      dpbDesc.dpbSize = referenceCount;
1058      dpbDesc.m_NodeMask = pD3D12Dec->m_NodeMask;
1059      dpbDesc.fReferenceOnly = ((pD3D12Dec->m_ConfigDecoderSpecificFlags &
1060                                 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0);
1061
1062      // Create DPB manager
1063      if (pD3D12Dec->m_spDPBManager == nullptr) {
1064         pD3D12Dec->m_spDPBManager.reset(new d3d12_video_decoder_references_manager(pD3D12Dec->m_pD3D12Screen,
1065                                                                                    pD3D12Dec->m_NodeMask,
1066                                                                                    pD3D12Dec->m_d3d12DecProfileType,
1067                                                                                    dpbDesc));
1068      }
1069
1070      //
1071      // (Re)-create decoder heap
1072      //
1073      D3D12_VIDEO_DECODER_HEAP_DESC decoderHeapDesc = {};
1074      decoderHeapDesc.NodeMask = pD3D12Dec->m_NodeMask;
1075      decoderHeapDesc.Configuration = pD3D12Dec->m_decoderDesc.Configuration;
1076      decoderHeapDesc.DecodeWidth = dpbDesc.Width;
1077      decoderHeapDesc.DecodeHeight = dpbDesc.Height;
1078      decoderHeapDesc.Format = dpbDesc.Format;
1079      decoderHeapDesc.MaxDecodePictureBufferCount = maxDPB;
1080      pD3D12Dec->m_spVideoDecoderHeap.Reset();
1081      HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoderHeap(
1082         &decoderHeapDesc,
1083         IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoderHeap.GetAddressOf()));
1084      if (FAILED(hr)) {
1085         debug_printf(
1086            "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoderHeap failed with HR %x\n",
1087            hr);
1088         return false;
1089      }
1090      // Update pD3D12Dec after CreateVideoDecoderHeap succeeds only.
1091      pD3D12Dec->m_decoderHeapDesc = decoderHeapDesc;
1092   }
1093
1094   pD3D12Dec->m_decodeFormat = outputResourceDesc.Format;
1095
1096   return true;
1097}
1098
1099void
1100d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec)
1101{
1102   switch (pD3D12Dec->m_d3d12DecProfileType) {
1103      case d3d12_video_decode_profile_type_h264:
1104      {
1105         d3d12_video_decoder_refresh_dpb_active_references_h264(pD3D12Dec);
1106      } break;
1107
1108      default:
1109      {
1110         unreachable("Unsupported d3d12_video_decode_profile_type");
1111      } break;
1112   }
1113}
1114
1115void
1116d3d12_video_decoder_get_frame_info(
1117   struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB, bool &isInterlaced)
1118{
1119   *pWidth = 0;
1120   *pHeight = 0;
1121   *pMaxDPB = 0;
1122   isInterlaced = false;
1123
1124   switch (pD3D12Dec->m_d3d12DecProfileType) {
1125      case d3d12_video_decode_profile_type_h264:
1126      {
1127         d3d12_video_decoder_get_frame_info_h264(pD3D12Dec, pWidth, pHeight, pMaxDPB, isInterlaced);
1128      } break;
1129
1130      default:
1131      {
1132         unreachable("Unsupported d3d12_video_decode_profile_type");
1133      } break;
1134   }
1135
1136   if (pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_alignment_height) {
1137      const uint32_t AlignmentMask = 31;
1138      *pHeight = (*pHeight + AlignmentMask) & ~AlignmentMask;
1139   }
1140}
1141
1142///
1143/// Returns the number of bytes starting from [buf.data() + buffsetOffset] where the _targetCode_ is found
1144/// Returns -1 if start code not found
1145///
1146int
1147d3d12_video_decoder_get_next_startcode_offset(std::vector<uint8_t> &buf,
1148                                              unsigned int bufferOffset,
1149                                              unsigned int targetCode,
1150                                              unsigned int targetCodeBitSize,
1151                                              unsigned int numBitsToSearchIntoBuffer)
1152{
1153   struct vl_vlc vlc = { 0 };
1154
1155   // Shorten the buffer to be [buffetOffset, endOfBuf)
1156   unsigned int bufSize = buf.size() - bufferOffset;
1157   uint8_t *bufPtr = buf.data();
1158   bufPtr += bufferOffset;
1159
1160   /* search the first numBitsToSearchIntoBuffer bytes for a startcode */
1161   vl_vlc_init(&vlc, 1, (const void *const *) &bufPtr, &bufSize);
1162   for (uint i = 0; i < numBitsToSearchIntoBuffer && vl_vlc_bits_left(&vlc) >= targetCodeBitSize; ++i) {
1163      if (vl_vlc_peekbits(&vlc, targetCodeBitSize) == targetCode)
1164         return i;
1165      vl_vlc_eatbits(&vlc, 8);   // Stride is 8 bits = 1 byte
1166      vl_vlc_fillbits(&vlc);
1167   }
1168
1169   return -1;
1170}
1171
1172void
1173d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
1174   struct d3d12_video_decoder *codec,   // input argument, current decoder
1175   struct pipe_picture_desc
1176      *picture,   // input argument, base structure of pipe_XXX_picture_desc where XXX is the codec name
1177   struct d3d12_video_buffer *pD3D12VideoBuffer   // input argument, target video buffer
1178)
1179{
1180   assert(picture);
1181   assert(codec);
1182   struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
1183
1184   d3d12_video_decode_profile_type profileType =
1185      d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile);
1186   switch (profileType) {
1187      case d3d12_video_decode_profile_type_h264:
1188      {
1189         size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_H264);
1190         pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
1191         ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
1192         D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
1193         DXVA_PicParams_H264 dxvaPicParamsH264 =
1194            d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(pD3D12Dec->m_fenceValue,
1195                                                                        codec->base.profile,
1196                                                                        outputResourceDesc.Width,
1197                                                                        outputResourceDesc.Height,
1198                                                                        pPicControlH264);
1199
1200         d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
1201                                                                      &dxvaPicParamsH264,
1202                                                                      dxvaPicParamsBufferSize);
1203
1204         size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_H264);
1205         DXVA_Qmatrix_H264 dxvaQmatrixH264 = {};
1206         d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture,
1207                                                                   dxvaQmatrixH264);
1208         d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize);
1209      } break;
1210      default:
1211      {
1212         unreachable("Unsupported d3d12_video_decode_profile_type");
1213      } break;
1214   }
1215}
1216
1217void
1218d3d12_video_decoder_prepare_dxva_slices_control(
1219   struct d3d12_video_decoder *pD3D12Dec,   // input argument, current decoder
1220   struct pipe_picture_desc *picture
1221)
1222{
1223   d3d12_video_decode_profile_type profileType =
1224      d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(pD3D12Dec->base.profile);
1225   switch (profileType) {
1226      case d3d12_video_decode_profile_type_h264:
1227      {
1228
1229         std::vector<DXVA_Slice_H264_Short> pOutSliceControlBuffers;
1230         struct pipe_h264_picture_desc* picture_h264 = (struct pipe_h264_picture_desc*) picture;
1231         d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec, pOutSliceControlBuffers, picture_h264);
1232
1233         assert(sizeof(pOutSliceControlBuffers.data()[0]) == sizeof(DXVA_Slice_H264_Short));
1234         uint64_t DXVAStructSize = pOutSliceControlBuffers.size() * sizeof((pOutSliceControlBuffers.data()[0]));
1235         assert((DXVAStructSize % sizeof(DXVA_Slice_H264_Short)) == 0);
1236         d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(pD3D12Dec,
1237                                                                            pOutSliceControlBuffers.data(),
1238                                                                            DXVAStructSize);
1239         assert(pD3D12Dec->m_SliceControlBuffer.size() == DXVAStructSize);
1240      } break;
1241      default:
1242      {
1243         unreachable("Unsupported d3d12_video_decode_profile_type");
1244      } break;
1245   }
1246}
1247
1248void
1249d3d12_video_decoder_store_dxva_slicecontrol_in_slicecontrol_buffer(struct d3d12_video_decoder *pD3D12Dec,
1250                                                                   void *pDXVAStruct,
1251                                                                   uint64_t DXVAStructSize)
1252{
1253   if (pD3D12Dec->m_SliceControlBuffer.capacity() < DXVAStructSize) {
1254      pD3D12Dec->m_SliceControlBuffer.reserve(DXVAStructSize);
1255   }
1256
1257   pD3D12Dec->m_SliceControlBuffer.resize(DXVAStructSize);
1258   memcpy(pD3D12Dec->m_SliceControlBuffer.data(), pDXVAStruct, DXVAStructSize);
1259}
1260
1261void
1262d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec,
1263                                                         void *pDXVAStruct,
1264                                                         uint64_t DXVAStructSize)
1265{
1266   if (pD3D12Dec->m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) {
1267      pD3D12Dec->m_InverseQuantMatrixBuffer.reserve(DXVAStructSize);
1268   }
1269
1270   pD3D12Dec->m_InverseQuantMatrixBuffer.resize(DXVAStructSize);
1271   memcpy(pD3D12Dec->m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize);
1272}
1273
1274void
1275d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *pD3D12Dec,
1276                                                             void *pDXVAStruct,
1277                                                             uint64_t DXVAStructSize)
1278{
1279   if (pD3D12Dec->m_picParamsBuffer.capacity() < DXVAStructSize) {
1280      pD3D12Dec->m_picParamsBuffer.reserve(DXVAStructSize);
1281   }
1282
1283   pD3D12Dec->m_picParamsBuffer.resize(DXVAStructSize);
1284   memcpy(pD3D12Dec->m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize);
1285}
1286
1287bool
1288d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,
1289                                     d3d12_video_decode_profile_type profileType)
1290{
1291   bool supportedProfile = false;
1292   switch (profileType) {
1293      case d3d12_video_decode_profile_type_h264:
1294         supportedProfile = true;
1295         break;
1296      default:
1297         supportedProfile = false;
1298         break;
1299   }
1300
1301   return (decodeSupport.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2) && supportedProfile;
1302}
1303
1304d3d12_video_decode_profile_type
1305d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile)
1306{
1307   switch (profile) {
1308      case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
1309      case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
1310      case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
1311      case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
1312      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
1313      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
1314         return d3d12_video_decode_profile_type_h264;
1315      default:
1316      {
1317         unreachable("Unsupported pipe video profile");
1318      } break;
1319   }
1320}
1321
1322GUID
1323d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile)
1324{
1325   switch (profile) {
1326      case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
1327      case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
1328      case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
1329      case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
1330      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
1331      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
1332         return D3D12_VIDEO_DECODE_PROFILE_H264;
1333      default:
1334         return {};
1335   }
1336}
1337
1338GUID
1339d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType)
1340{
1341   switch (profileType) {
1342      case d3d12_video_decode_profile_type_h264:
1343         return D3D12_VIDEO_DECODE_PROFILE_H264;
1344         break;
1345      default:
1346      {
1347         unreachable("Unsupported d3d12_video_decode_profile_type");
1348      } break;
1349   }
1350}
1351