1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Video Decoding Base Classe Functionality
22 *//*--------------------------------------------------------------------*/
23 /*
24 * Copyright 2020 NVIDIA Corporation.
25 *
26 * Licensed under the Apache License, Version 2.0 (the "License");
27 * you may not use this file except in compliance with the License.
28 * You may obtain a copy of the License at
29 *
30 * http://www.apache.org/licenses/LICENSE-2.0
31 *
32 * Unless required by applicable law or agreed to in writing, software
33 * distributed under the License is distributed on an "AS IS" BASIS,
34 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35 * See the License for the specific language governing permissions and
36 * limitations under the License.
37 */
38
39 #include "vktVideoBaseDecodeUtils.hpp"
40 #include "tcuPlatform.hpp"
41 #include "vkDefs.hpp"
42 #include "vkRef.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkMemUtil.hpp"
45 #include "vkCmdUtil.hpp"
46 #include "vkObjUtil.hpp"
47 #include "vkTypeUtil.hpp"
48 #include "vkStrUtil.hpp"
49 #include "deSTLUtil.hpp"
50 #include "deRandom.hpp"
51
52 #include <iostream>
53
54 namespace vkt
55 {
56 namespace video
57 {
58 using namespace vk;
59 using namespace std;
60 using de::MovePtr;
61 using de::SharedPtr;
62
63 static const uint32_t topFieldShift = 0;
64 static const uint32_t topFieldMask = (1 << topFieldShift);
65 static const uint32_t bottomFieldShift = 1;
66 static const uint32_t bottomFieldMask = (1 << bottomFieldShift);
67 static const uint32_t fieldIsReferenceMask = (topFieldMask | bottomFieldMask);
68 static const uint32_t EXTRA_DPB_SLOTS = 1;
69 static const uint32_t MAX_DPB_SLOTS_PLUS_1 = 16 + EXTRA_DPB_SLOTS;
70
71 #define HEVC_MAX_DPB_SLOTS 16
72 #define AVC_MAX_DPB_SLOTS 17
73
74 #define NVIDIA_FRAME_RATE_NUM(rate) ((rate) >> 14)
75 #define NVIDIA_FRAME_RATE_DEN(rate) ((rate)&0x3fff)
76
77 template<typename T>
dataOrNullPtr(const std::vector<T>& v)78 inline const T* dataOrNullPtr (const std::vector<T>& v)
79 {
80 return (v.empty() ? DE_NULL : &v[0]);
81 }
82
83 template<typename T>
dataOrNullPtr(std::vector<T>& v)84 inline T* dataOrNullPtr (std::vector<T>& v)
85 {
86 return (v.empty() ? DE_NULL : &v[0]);
87 }
88
89 template<typename T>
incSizeSafe(std::vector<T>& v)90 inline T& incSizeSafe (std::vector<T>& v)
91 {
92 DE_ASSERT(v.size() < v.capacity()); // Disable grow
93
94 v.resize(v.size() + 1);
95
96 return v.back();
97 }
98
99
100
101 /******************************************************/
102 //! \struct nvVideoH264PicParameters
103 //! H.264 picture parameters
104 /******************************************************/
105 struct nvVideoH264PicParameters
106 {
107 enum { MAX_REF_PICTURES_LIST_ENTRIES = 16 };
108
109 StdVideoDecodeH264PictureInfo stdPictureInfo;
110 VkVideoDecodeH264PictureInfoKHR pictureInfo;
111 VkVideoDecodeH264SessionParametersAddInfoKHR pictureParameters;
112 VkVideoDecodeH264DpbSlotInfoKHR mvcInfo;
113 NvidiaVideoDecodeH264DpbSlotInfo currentDpbSlotInfo;
114 NvidiaVideoDecodeH264DpbSlotInfo dpbRefList[MAX_REF_PICTURES_LIST_ENTRIES];
115 };
116
117 /*******************************************************/
118 //! \struct nvVideoH265PicParameters
119 //! HEVC picture parameters
120 /*******************************************************/
121 struct nvVideoH265PicParameters
122 {
123 enum { MAX_REF_PICTURES_LIST_ENTRIES = 16 };
124
125 StdVideoDecodeH265PictureInfo stdPictureInfo;
126 VkVideoDecodeH265PictureInfoKHR pictureInfo;
127 VkVideoDecodeH265SessionParametersAddInfoKHR pictureParameters;
128 NvidiaVideoDecodeH265DpbSlotInfo dpbRefList[MAX_REF_PICTURES_LIST_ENTRIES];
129 };
130
131
GetPic(INvidiaVulkanPicture* pPicBuf)132 inline NvidiaVulkanPictureBase* GetPic (INvidiaVulkanPicture* pPicBuf)
133 {
134 return (NvidiaVulkanPictureBase*)pPicBuf;
135 }
136
ConvertStdH264ChromaFormatToVulkan(StdVideoH264ChromaFormatIdc stdFormat)137 inline VkVideoChromaSubsamplingFlagBitsKHR ConvertStdH264ChromaFormatToVulkan (StdVideoH264ChromaFormatIdc stdFormat)
138 {
139 switch (stdFormat)
140 {
141 case STD_VIDEO_H264_CHROMA_FORMAT_IDC_420: return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR;
142 case STD_VIDEO_H264_CHROMA_FORMAT_IDC_422: return VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR;
143 case STD_VIDEO_H264_CHROMA_FORMAT_IDC_444: return VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR;
144 default: TCU_THROW(InternalError, "Invalid chroma sub-sampling format");
145 }
146 }
147
codecGetVkFormat(VkVideoChromaSubsamplingFlagBitsKHR chromaFormatIdc, int bitDepthLuma, bool isSemiPlanar)148 VkFormat codecGetVkFormat (VkVideoChromaSubsamplingFlagBitsKHR chromaFormatIdc,
149 int bitDepthLuma,
150 bool isSemiPlanar)
151 {
152 switch (chromaFormatIdc)
153 {
154 case VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR:
155 {
156 switch (bitDepthLuma)
157 {
158 case VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR: return VK_FORMAT_R8_UNORM;
159 case VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR: return VK_FORMAT_R10X6_UNORM_PACK16;
160 case VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR: return VK_FORMAT_R12X4_UNORM_PACK16;
161 default: TCU_THROW(InternalError, "Cannot map monochrome format to VkFormat");
162 }
163 }
164 case VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR:
165 {
166 switch (bitDepthLuma)
167 {
168 case VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR: return (isSemiPlanar ? VK_FORMAT_G8_B8R8_2PLANE_420_UNORM : VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM);
169 case VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR: return (isSemiPlanar ? VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 : VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16);
170 case VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR: return (isSemiPlanar ? VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16 : VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16);
171 default: TCU_THROW(InternalError, "Cannot map 420 format to VkFormat");
172 }
173 }
174 case VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR:
175 {
176 switch (bitDepthLuma)
177 {
178 case VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR: return (isSemiPlanar ? VK_FORMAT_G8_B8R8_2PLANE_422_UNORM : VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM);
179 case VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR: return (isSemiPlanar ? VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16 : VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16);
180 case VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR: return (isSemiPlanar ? VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16 : VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16);
181 default: TCU_THROW(InternalError, "Cannot map 422 format to VkFormat");
182 }
183 }
184 case VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR:
185 {
186 switch (bitDepthLuma)
187 {
188 case VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR: return (isSemiPlanar ? VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT : VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM);
189 case VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR: return (isSemiPlanar ? VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT : VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16);
190 case VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR: return (isSemiPlanar ? VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT : VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16);
191 default: TCU_THROW(InternalError, "Cannot map 444 format to VkFormat");
192 }
193 }
194 default: TCU_THROW(InternalError, "Unknown input idc format");
195 }
196 }
197
getLumaBitDepth(deUint8 lumaBitDepthMinus8)198 VkVideoComponentBitDepthFlagsKHR getLumaBitDepth (deUint8 lumaBitDepthMinus8)
199 {
200 switch (lumaBitDepthMinus8)
201 {
202 case 0: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR;
203 case 2: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR;
204 case 4: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR;
205 default:TCU_THROW(InternalError, "Unhandler lumaBitDepthMinus8");
206 }
207 }
208
getChromaBitDepth(deUint8 chromaBitDepthMinus8)209 VkVideoComponentBitDepthFlagsKHR getChromaBitDepth (deUint8 chromaBitDepthMinus8)
210 {
211 switch (chromaBitDepthMinus8)
212 {
213 case 0: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR;
214 case 2: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR;
215 case 4: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR;
216 default:TCU_THROW(InternalError, "Unhandler chromaBitDepthMinus8");
217 }
218 }
219
setImageLayout(const DeviceInterface& vkd, VkCommandBuffer cmdBuffer, VkImage image, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkPipelineStageFlags2KHR srcStages, VkPipelineStageFlags2KHR dstStages, VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT)220 void setImageLayout (const DeviceInterface& vkd,
221 VkCommandBuffer cmdBuffer,
222 VkImage image,
223 VkImageLayout oldImageLayout,
224 VkImageLayout newImageLayout,
225 VkPipelineStageFlags2KHR srcStages,
226 VkPipelineStageFlags2KHR dstStages,
227 VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT)
228 {
229 VkAccessFlags2KHR srcAccessMask = 0;
230 VkAccessFlags2KHR dstAccessMask = 0;
231
232 switch (static_cast<VkImageLayout>(oldImageLayout))
233 {
234 case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; break;
235 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break;
236 case VK_IMAGE_LAYOUT_PREINITIALIZED: srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; break;
237 case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: srcAccessMask = VK_ACCESS_SHADER_READ_BIT; break;
238 case VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR: srcAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR; break;
239 default: srcAccessMask = 0; break;
240 }
241
242 switch (static_cast<VkImageLayout>(newImageLayout))
243 {
244 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; break;
245 case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; break;
246 case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: dstAccessMask = VK_ACCESS_SHADER_READ_BIT; break;
247 case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; break;
248 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; break;
249 case VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR: dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR; break;
250 case VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR: dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR; break;
251 case VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR: dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR; break;
252 case VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR: dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_WRITE_BIT_KHR | VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR; break;
253 case VK_IMAGE_LAYOUT_GENERAL: dstAccessMask = VK_ACCESS_HOST_WRITE_BIT; break;
254 default: dstAccessMask = 0; break;
255 }
256
257 const VkImageMemoryBarrier2KHR imageMemoryBarrier =
258 {
259 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR, // VkStructureType sType;
260 DE_NULL, // const void* pNext;
261 srcStages, // VkPipelineStageFlags2KHR srcStageMask;
262 srcAccessMask, // VkAccessFlags2KHR srcAccessMask;
263 dstStages, // VkPipelineStageFlags2KHR dstStageMask;
264 dstAccessMask, // VkAccessFlags2KHR dstAccessMask;
265 oldImageLayout, // VkImageLayout oldLayout;
266 newImageLayout, // VkImageLayout newLayout;
267 VK_QUEUE_FAMILY_IGNORED, // deUint32 srcQueueFamilyIndex;
268 VK_QUEUE_FAMILY_IGNORED, // deUint32 dstQueueFamilyIndex;
269 image, // VkImage image;
270 { aspectMask, 0, 1, 0, 1 }, // VkImageSubresourceRange subresourceRange;
271 };
272
273 const VkDependencyInfoKHR dependencyInfo =
274 {
275 VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR, // VkStructureType sType;
276 DE_NULL, // const void* pNext;
277 VK_DEPENDENCY_BY_REGION_BIT, // VkDependencyFlags dependencyFlags;
278 0, // deUint32 memoryBarrierCount;
279 DE_NULL, // const VkMemoryBarrier2KHR* pMemoryBarriers;
280 0, // deUint32 bufferMemoryBarrierCount;
281 DE_NULL, // const VkBufferMemoryBarrier2KHR* pBufferMemoryBarriers;
282 1, // deUint32 imageMemoryBarrierCount;
283 &imageMemoryBarrier, // const VkImageMemoryBarrier2KHR* pImageMemoryBarriers;
284 };
285
286 vkd.cmdPipelineBarrier2(cmdBuffer, &dependencyInfo);
287 }
288
NvidiaVideoDecodeH264DpbSlotInfo()289 NvidiaVideoDecodeH264DpbSlotInfo::NvidiaVideoDecodeH264DpbSlotInfo ()
290 : dpbSlotInfo()
291 , stdReferenceInfo()
292 {
293 }
294
Init(int32_t slotIndex)295 const VkVideoDecodeH264DpbSlotInfoKHR* NvidiaVideoDecodeH264DpbSlotInfo::Init (int32_t slotIndex)
296 {
297 DE_UNREF(slotIndex);
298
299 dpbSlotInfo.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR;
300 dpbSlotInfo.pNext = DE_NULL;
301 dpbSlotInfo.pStdReferenceInfo = &stdReferenceInfo;
302
303 return &dpbSlotInfo;
304 }
305
IsReference() const306 bool NvidiaVideoDecodeH264DpbSlotInfo::IsReference () const
307 {
308 return (dpbSlotInfo.pStdReferenceInfo == &stdReferenceInfo);
309 }
310
operator bool() const311 NvidiaVideoDecodeH264DpbSlotInfo::operator bool() const
312 {
313 return IsReference();
314 }
315
Invalidate()316 void NvidiaVideoDecodeH264DpbSlotInfo::Invalidate ()
317 {
318 deMemset(this, 0x00, sizeof(*this));
319 }
320
NvidiaVideoDecodeH265DpbSlotInfo()321 NvidiaVideoDecodeH265DpbSlotInfo::NvidiaVideoDecodeH265DpbSlotInfo ()
322 : dpbSlotInfo()
323 , stdReferenceInfo()
324 {
325 }
326
Init(int32_t slotIndex)327 const VkVideoDecodeH265DpbSlotInfoKHR* NvidiaVideoDecodeH265DpbSlotInfo::Init (int32_t slotIndex)
328 {
329 DE_UNREF(slotIndex);
330
331 dpbSlotInfo.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR;
332 dpbSlotInfo.pNext = DE_NULL;
333 dpbSlotInfo.pStdReferenceInfo = &stdReferenceInfo;
334
335 return &dpbSlotInfo;
336 }
337
IsReference() const338 bool NvidiaVideoDecodeH265DpbSlotInfo::IsReference() const
339 {
340 return (dpbSlotInfo.pStdReferenceInfo == &stdReferenceInfo);
341 }
342
operator bool() const343 NvidiaVideoDecodeH265DpbSlotInfo::operator bool() const
344 {
345 return IsReference();
346 }
347
Invalidate()348 void NvidiaVideoDecodeH265DpbSlotInfo::Invalidate()
349 {
350 deMemset(this, 0x00, sizeof(*this));
351 }
352
353 // Keeps track of data associated with active internal reference frames
isInUse(void)354 bool DpbSlot::isInUse (void)
355 {
356 return (m_reserved || m_inUse);
357 }
358
isAvailable(void)359 bool DpbSlot::isAvailable (void)
360 {
361 return !isInUse();
362 }
363
Invalidate(void)364 bool DpbSlot::Invalidate (void)
365 {
366 bool wasInUse = isInUse();
367
368 if (m_picBuf)
369 {
370 m_picBuf->Release();
371 m_picBuf = DE_NULL;
372 }
373
374 m_reserved = m_inUse = false;
375
376 return wasInUse;
377 }
378
getPictureResource(void)379 NvidiaVulkanPictureBase* DpbSlot::getPictureResource (void)
380 {
381 return m_picBuf;
382 }
383
setPictureResource(NvidiaVulkanPictureBase* picBuf)384 NvidiaVulkanPictureBase* DpbSlot::setPictureResource (NvidiaVulkanPictureBase* picBuf)
385 {
386 NvidiaVulkanPictureBase* oldPic = m_picBuf;
387
388 if (picBuf)
389 {
390 picBuf->AddRef();
391 }
392
393 m_picBuf = picBuf;
394
395 if (oldPic)
396 {
397 oldPic->Release();
398 }
399
400 return oldPic;
401 }
402
Reserve(void)403 void DpbSlot::Reserve (void)
404 {
405 m_reserved = true;
406 }
407
MarkInUse()408 void DpbSlot::MarkInUse ()
409 {
410 m_inUse = true;
411 }
412
DpbSlots(uint32_t dpbMaxSize)413 DpbSlots::DpbSlots (uint32_t dpbMaxSize)
414 : m_dpbMaxSize (0)
415 , m_slotInUseMask (0)
416 , m_dpb (m_dpbMaxSize)
417 , m_dpbSlotsAvailable ()
418 {
419 Init(dpbMaxSize, false);
420 }
421
Init(uint32_t newDpbMaxSize, bool reconfigure)422 int32_t DpbSlots::Init (uint32_t newDpbMaxSize, bool reconfigure)
423 {
424 DE_ASSERT(newDpbMaxSize <= MAX_DPB_SLOTS_PLUS_1);
425
426 if (!reconfigure)
427 {
428 Deinit();
429 }
430
431 if (reconfigure && newDpbMaxSize < m_dpbMaxSize)
432 {
433 return m_dpbMaxSize;
434 }
435
436 uint32_t oldDpbMaxSize = reconfigure ? m_dpbMaxSize : 0;
437 m_dpbMaxSize = newDpbMaxSize;
438
439 m_dpb.resize(m_dpbMaxSize);
440
441 for (uint32_t ndx = oldDpbMaxSize; ndx < m_dpbMaxSize; ndx++)
442 {
443 m_dpb[ndx].Invalidate();
444 }
445
446 for (uint32_t dpbIndx = oldDpbMaxSize; dpbIndx < m_dpbMaxSize; dpbIndx++)
447 {
448 m_dpbSlotsAvailable.push((uint8_t)dpbIndx);
449 }
450
451 return m_dpbMaxSize;
452 }
453
Deinit(void)454 void DpbSlots::Deinit (void)
455 {
456 for (uint32_t ndx = 0; ndx < m_dpbMaxSize; ndx++)
457 m_dpb[ndx].Invalidate();
458
459 while (!m_dpbSlotsAvailable.empty())
460 m_dpbSlotsAvailable.pop();
461
462 m_dpbMaxSize = 0;
463 m_slotInUseMask = 0;
464 }
465
~DpbSlots()466 DpbSlots::~DpbSlots ()
467 {
468 Deinit();
469 }
470
AllocateSlot(void)471 int8_t DpbSlots::AllocateSlot (void)
472 {
473 DE_ASSERT(!m_dpbSlotsAvailable.empty());
474
475 int8_t slot = (int8_t)m_dpbSlotsAvailable.front();
476
477 DE_ASSERT((slot >= 0) && ((uint8_t)slot < m_dpbMaxSize));
478
479 m_slotInUseMask |= (1 << slot);
480 m_dpbSlotsAvailable.pop();
481 m_dpb[slot].Reserve();
482
483 return slot;
484 }
485
FreeSlot(int8_t slot)486 void DpbSlots::FreeSlot (int8_t slot)
487 {
488 DE_ASSERT((uint8_t)slot < m_dpbMaxSize);
489 DE_ASSERT(m_dpb[slot].isInUse());
490 DE_ASSERT(m_slotInUseMask & (1 << slot));
491
492 m_dpb[slot].Invalidate();
493 m_dpbSlotsAvailable.push(slot);
494 m_slotInUseMask &= ~(1 << slot);
495 }
496
operator [](uint32_t slot)497 DpbSlot& DpbSlots::operator[] (uint32_t slot)
498 {
499 DE_ASSERT(slot < m_dpbMaxSize);
500
501 return m_dpb[slot];
502 }
503
504 void DpbSlots::MapPictureResource (NvidiaVulkanPictureBase* pPic, int32_t dpbSlot)
505 {
506 for (uint32_t slot = 0; slot < m_dpbMaxSize; slot++)
507 {
508 if ((uint8_t)slot == dpbSlot)
509 {
510 m_dpb[slot].setPictureResource(pPic);
511 }
512 else if (pPic)
513 {
514 if (m_dpb[slot].getPictureResource() == pPic)
515 {
516 FreeSlot((uint8_t)slot);
517 }
518 }
519 }
520 }
521
522 uint32_t DpbSlots::getSlotInUseMask ()
523 {
524 return m_slotInUseMask;
525 }
526
527 uint32_t DpbSlots::getMaxSize ()
528 {
529 return m_dpbMaxSize;
530 }
531
532 typedef struct dpbEntry
533 {
534 int8_t dpbSlot;
535 // bit0(used_for_reference)=1: top field used for reference,
536 // bit1(used_for_reference)=1: bottom field used for reference
537 uint32_t used_for_reference : 2;
538 uint32_t is_long_term : 1; // 0 = short-term, 1 = long-term
539 uint32_t is_non_existing : 1; // 1 = marked as non-existing
540 uint32_t is_field_ref : 1; // set if unpaired field or complementary field pair
541
542 union
543 {
544 int16_t FieldOrderCnt[2]; // h.264 : 2*32 [top/bottom].
545 int32_t PicOrderCnt; // HEVC PicOrderCnt
546 };
547
548 union
549 {
550 int16_t FrameIdx; // : 16 short-term: FrameNum (16 bits), long-term: LongTermFrameIdx (4 bits)
551 int8_t originalDpbIndex; // Original Dpb source Index.
552 };
553
554 NvidiaVulkanPictureBase* m_picBuff; // internal picture reference
555
556 void setReferenceAndTopBoottomField (bool isReference,
557 bool nonExisting,
558 bool isLongTerm,
559 bool isFieldRef,
560 bool topFieldIsReference,
561 bool bottomFieldIsReference,
562 int16_t frameIdx,
563 const int16_t fieldOrderCntList[2],
564 NvidiaVulkanPictureBase* picBuff)
565 {
566 is_non_existing = nonExisting;
567 is_long_term = isLongTerm;
568 is_field_ref = isFieldRef;
569
570 if (isReference && isFieldRef)
571 {
572 used_for_reference = (unsigned char)(3 & ((bottomFieldIsReference << bottomFieldShift) | (topFieldIsReference << topFieldShift)));
573 }
574 else
575 {
576 used_for_reference = isReference ? 3 : 0;
577 }
578
579 FrameIdx = frameIdx;
580 FieldOrderCnt[0] = fieldOrderCntList[used_for_reference == 2]; // 0: for progressive and top reference; 1: for bottom reference only.
581 FieldOrderCnt[1] = fieldOrderCntList[used_for_reference != 1]; // 0: for top reference only; 1: for bottom reference and progressive.
582 dpbSlot = -1;
583 m_picBuff = picBuff;
584 }
585
586 void setReference (bool isLongTerm,
587 int32_t picOrderCnt,
588 NvidiaVulkanPictureBase* picBuff)
589 {
590 is_non_existing = (picBuff == DE_NULL);
591 is_long_term = isLongTerm;
592 is_field_ref = false;
593 used_for_reference = (picBuff != DE_NULL) ? 3 : 0;
594
595 PicOrderCnt = picOrderCnt;
596
597 dpbSlot = -1;
598 m_picBuff = picBuff;
599 originalDpbIndex = -1;
600 }
601
602 bool isRef ()
603 {
604 return (used_for_reference != 0);
605 }
606
607 StdVideoDecodeH264ReferenceInfoFlags getPictureFlag ()
608 {
609 StdVideoDecodeH264ReferenceInfoFlags picFlags = StdVideoDecodeH264ReferenceInfoFlags();
610
611 if (used_for_reference)
612 {
613 // picFlags.is_reference = true;
614 }
615
616 if (is_long_term)
617 {
618 picFlags.used_for_long_term_reference = true;
619 }
620
621 if (is_non_existing)
622 {
623 picFlags.is_non_existing = true;
624 }
625
626 if (is_field_ref)
627 {
628 // picFlags.field_pic_flag = true;
629 }
630
631 if (used_for_reference & topFieldMask)
632 {
633 picFlags.top_field_flag = true;
634 }
635
636 if (used_for_reference & bottomFieldMask)
637 {
638 picFlags.bottom_field_flag = true;
639 }
640
641 return picFlags;
642 }
643
644 void setH264PictureData (NvidiaVideoDecodeH264DpbSlotInfo* pDpbRefList,
645 VkVideoReferenceSlotInfoKHR* pReferenceSlots,
646 uint32_t dpbEntryIdx,
647 uint32_t dpbSlotIndex)
648 {
649 DE_ASSERT(dpbEntryIdx < AVC_MAX_DPB_SLOTS);
650 DE_ASSERT(dpbSlotIndex < AVC_MAX_DPB_SLOTS);
651
652 DE_ASSERT((dpbSlotIndex == (uint32_t)dpbSlot) || is_non_existing);
653 pReferenceSlots[dpbEntryIdx].sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR;
654 pReferenceSlots[dpbEntryIdx].slotIndex = dpbSlotIndex;
655 pReferenceSlots[dpbEntryIdx].pNext = pDpbRefList[dpbEntryIdx].Init(dpbSlotIndex);
656
657 StdVideoDecodeH264ReferenceInfo* pRefPicInfo = &pDpbRefList[dpbEntryIdx].stdReferenceInfo;
658
659 pRefPicInfo->FrameNum = FrameIdx;
660 pRefPicInfo->flags = getPictureFlag();
661 pRefPicInfo->PicOrderCnt[0] = FieldOrderCnt[0];
662 pRefPicInfo->PicOrderCnt[1] = FieldOrderCnt[1];
663 }
664
665 void setH265PictureData (NvidiaVideoDecodeH265DpbSlotInfo* pDpbSlotInfo,
666 VkVideoReferenceSlotInfoKHR* pReferenceSlots,
667 uint32_t dpbEntryIdx,
668 uint32_t dpbSlotIndex)
669 {
670 DE_ASSERT(dpbEntryIdx < HEVC_MAX_DPB_SLOTS);
671 DE_ASSERT(dpbSlotIndex < HEVC_MAX_DPB_SLOTS);
672 DE_ASSERT(isRef());
673
674 DE_ASSERT((dpbSlotIndex == (uint32_t)dpbSlot) || is_non_existing);
675 pReferenceSlots[dpbEntryIdx].sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR;
676 pReferenceSlots[dpbEntryIdx].slotIndex = dpbSlotIndex;
677 pReferenceSlots[dpbEntryIdx].pNext = pDpbSlotInfo[dpbEntryIdx].Init(dpbSlotIndex);
678
679 StdVideoDecodeH265ReferenceInfo* pRefPicInfo = &pDpbSlotInfo[dpbEntryIdx].stdReferenceInfo;
680 pRefPicInfo->PicOrderCntVal = PicOrderCnt;
681 pRefPicInfo->flags.used_for_long_term_reference = is_long_term;
682 pRefPicInfo->flags.unused_for_reference = is_non_existing;
683
684 }
685
686 } dpbEntry;
687
688 int8_t VideoBaseDecoder::GetPicIdx (NvidiaVulkanPictureBase* pPicBuf)
689 {
690 if (pPicBuf)
691 {
692 int32_t picIndex = pPicBuf->m_picIdx;
693
694 if ((picIndex >= 0) && ((uint32_t)picIndex < m_maxNumDecodeSurfaces))
695 {
696 return (int8_t)picIndex;
697 }
698 }
699
700 return -1;
701 }
702
703 int8_t VideoBaseDecoder::GetPicIdx (INvidiaVulkanPicture* pPicBuf)
704 {
705 return GetPicIdx(GetPic(pPicBuf));
706 }
707
708 int8_t VideoBaseDecoder::GetPicDpbSlot (int8_t picIndex)
709 {
710 return m_pictureToDpbSlotMap[picIndex];
711 }
712
713 int8_t VideoBaseDecoder::GetPicDpbSlot (NvidiaVulkanPictureBase* pPicBuf)
714 {
715 int8_t picIndex = GetPicIdx(pPicBuf);
716 DE_ASSERT((picIndex >= 0) && ((uint32_t)picIndex < m_maxNumDecodeSurfaces));
717 return GetPicDpbSlot(picIndex);
718 }
719
720 bool VideoBaseDecoder::GetFieldPicFlag (int8_t picIndex)
721 {
722 DE_ASSERT((picIndex >= 0) && ((uint32_t)picIndex < m_maxNumDecodeSurfaces));
723
724 return !!(m_fieldPicFlagMask & (1 << (uint32_t)picIndex));
725 }
726
727 bool VideoBaseDecoder::SetFieldPicFlag (int8_t picIndex, bool fieldPicFlag)
728 {
729 DE_ASSERT((picIndex >= 0) && ((uint32_t)picIndex < m_maxNumDecodeSurfaces));
730
731 bool oldFieldPicFlag = GetFieldPicFlag(picIndex);
732
733 if (fieldPicFlag)
734 {
735 m_fieldPicFlagMask |= (1 << (uint32_t)picIndex);
736 }
737 else
738 {
739 m_fieldPicFlagMask &= ~(1 << (uint32_t)picIndex);
740 }
741
742 return oldFieldPicFlag;
743 }
744
745 int8_t VideoBaseDecoder::SetPicDpbSlot (int8_t picIndex, int8_t dpbSlot)
746 {
747 int8_t oldDpbSlot = m_pictureToDpbSlotMap[picIndex];
748
749 m_pictureToDpbSlotMap[picIndex] = dpbSlot;
750
751 if (dpbSlot >= 0)
752 {
753 m_dpbSlotsMask |= (1 << picIndex);
754 }
755 else
756 {
757 m_dpbSlotsMask &= ~(1 << picIndex);
758
759 if (oldDpbSlot >= 0)
760 {
761 m_dpb.FreeSlot(oldDpbSlot);
762 }
763 }
764
765 return oldDpbSlot;
766 }
767
768 int8_t VideoBaseDecoder::SetPicDpbSlot (NvidiaVulkanPictureBase* pPicBuf, int8_t dpbSlot)
769 {
770 int8_t picIndex = GetPicIdx(pPicBuf);
771
772 DE_ASSERT((picIndex >= 0) && ((uint32_t)picIndex < m_maxNumDecodeSurfaces));
773
774 return SetPicDpbSlot(picIndex, dpbSlot);
775 }
776
777 uint32_t VideoBaseDecoder::ResetPicDpbSlots (uint32_t picIndexSlotValidMask)
778 {
779 uint32_t resetSlotsMask = ~(picIndexSlotValidMask | ~m_dpbSlotsMask);
780
781 for (uint32_t picIdx = 0; (picIdx < m_maxNumDecodeSurfaces) && resetSlotsMask; picIdx++)
782 {
783 if (resetSlotsMask & (1 << picIdx))
784 {
785 resetSlotsMask &= ~(1 << picIdx);
786
787 SetPicDpbSlot((int8_t)picIdx, -1);
788 }
789 }
790
791 return m_dpbSlotsMask;
792 }
793
794 VideoBaseDecoder::VideoBaseDecoder (Context& context)
795 : m_context (context)
796 , m_nvFuncs (createIfcNvFunctions(context.getTestContext().getPlatform().getVulkanPlatform()))
797 , m_ffmpegFuncs (createIfcFfmpegFunctions())
798 , m_videoCodecOperation (VK_VIDEO_CODEC_OPERATION_NONE_KHR)
799 , m_vkd (DE_NULL)
800 , m_device (DE_NULL)
801 , m_queueFamilyIndexTransfer (VK_QUEUE_FAMILY_IGNORED)
802 , m_queueFamilyIndexDecode (VK_QUEUE_FAMILY_IGNORED)
803 , m_queueTransfer (DE_NULL)
804 , m_queueDecode (DE_NULL)
805 , m_allocator (DE_NULL)
806 , m_nCurrentPictureID (0)
807 , m_dpbSlotsMask (0)
808 , m_fieldPicFlagMask (0)
809 , m_dpb (3)
810 , m_pictureToDpbSlotMap ()
811 , m_maxNumDecodeSurfaces (1)
812 , m_maxNumDpbSurfaces (1)
813 , m_clockRate (0)
814 , m_minBitstreamBufferSizeAlignment (0)
815 , m_minBitstreamBufferOffsetAlignment (0)
816 , m_videoDecodeSession ()
817 , m_videoDecodeSessionAllocs ()
818 , m_numDecodeSurfaces ()
819 , m_videoCommandPool ()
820 , m_videoFrameBuffer (new VideoFrameBuffer())
821 , m_decodeFramesData (DE_NULL)
822 , m_maxDecodeFramesCount (0)
823 , m_maxDecodeFramesAllocated (0)
824 , m_width (0)
825 , m_height (0)
826 , m_codedWidth (0)
827 , m_codedHeight (0)
828 , m_chromaFormat ()
829 , m_bitLumaDepthMinus8 (0)
830 , m_bitChromaDepthMinus8 (0)
831 , m_decodePicCount (0)
832 , m_videoFormat ()
833 , m_lastSpsIdInQueue (-1)
834 , m_pictureParametersQueue ()
835 , m_lastSpsPictureParametersQueue ()
836 , m_lastPpsPictureParametersQueue ()
837 , m_currentPictureParameters ()
838 , m_randomOrSwapped (false)
839 , m_queryResultWithStatus (false)
840 , m_frameCountTrigger (0)
841 , m_submitAfter (false)
842 , m_gopSize (0)
843 , m_dpbCount (0)
844 , m_heaps ()
845 , m_pPerFrameDecodeParameters ()
846 , m_pVulkanParserDecodePictureInfo ()
847 , m_pFrameDatas ()
848 , m_bitstreamBufferMemoryBarriers ()
849 , m_imageBarriersVec ()
850 , m_frameSynchronizationInfos ()
851 , m_commandBufferSubmitInfos ()
852 , m_decodeBeginInfos ()
853 , m_pictureResourcesInfos ()
854 , m_dependencyInfos ()
855 , m_decodeEndInfos ()
856 , m_submitInfos ()
857 , m_frameCompleteFences ()
858 , m_frameConsumerDoneFences ()
859 , m_frameCompleteSemaphoreSubmitInfos ()
860 , m_frameConsumerDoneSemaphoreSubmitInfos ()
861 , m_distinctDstDpbImages (false)
862 {
863 deMemset(&m_nvidiaVulkanParserSequenceInfo, 0, sizeof(m_nvidiaVulkanParserSequenceInfo));
864
865 for (uint32_t picNdx = 0; picNdx < DE_LENGTH_OF_ARRAY(m_pictureToDpbSlotMap); picNdx++)
866 m_pictureToDpbSlotMap[picNdx] = -1;
867
868 ReinitCaches();
869 }
870
871 VideoBaseDecoder::~VideoBaseDecoder (void)
872 {
873 Deinitialize();
874 }
875
876 void VideoBaseDecoder::initialize (const VkVideoCodecOperationFlagBitsKHR videoCodecOperation,
877 const DeviceInterface& vkd,
878 const VkDevice device,
879 const deUint32 queueFamilyIndexTransfer,
880 const deUint32 queueFamilyIndexDecode,
881 Allocator& allocator)
882 {
883 DE_ASSERT(m_videoCodecOperation == VK_VIDEO_CODEC_OPERATION_NONE_KHR);
884 DE_ASSERT(m_vkd == DE_NULL);
885 DE_ASSERT(m_device == DE_NULL);
886 DE_ASSERT(queueFamilyIndexTransfer != VK_QUEUE_FAMILY_IGNORED);
887 DE_ASSERT(queueFamilyIndexDecode != VK_QUEUE_FAMILY_IGNORED);
888 DE_ASSERT(m_allocator == DE_NULL);
889
890 m_videoCodecOperation = videoCodecOperation;
891 m_vkd = &vkd;
892 m_device = device;
893 m_queueFamilyIndexTransfer = queueFamilyIndexTransfer;
894 m_queueFamilyIndexDecode = queueFamilyIndexDecode;
895 m_allocator = &allocator;
896 m_queueTransfer = getDeviceQueue(vkd, device, m_queueFamilyIndexTransfer, 0u);
897 m_queueDecode = getDeviceQueue(vkd, device, m_queueFamilyIndexDecode, 0u);
898 }
899
900 VkDevice VideoBaseDecoder::getDevice (void)
901 {
902 DE_ASSERT(m_device != DE_NULL);
903
904 return m_device;
905 }
906
907 const DeviceInterface& VideoBaseDecoder::getDeviceDriver (void)
908 {
909 DE_ASSERT(m_vkd != DE_NULL);
910
911 return *m_vkd;
912 }
913
914 deUint32 VideoBaseDecoder::getQueueFamilyIndexTransfer (void)
915 {
916 DE_ASSERT(m_queueFamilyIndexTransfer != VK_QUEUE_FAMILY_IGNORED);
917
918 return m_queueFamilyIndexTransfer;
919 }
920
921 VkQueue VideoBaseDecoder::getQueueTransfer (void)
922 {
923 DE_ASSERT(m_queueTransfer != DE_NULL);
924
925 return m_queueTransfer;
926 }
927
928 deUint32 VideoBaseDecoder::getQueueFamilyIndexDecode (void)
929 {
930 DE_ASSERT(m_queueFamilyIndexDecode != VK_QUEUE_FAMILY_IGNORED);
931
932 return m_queueFamilyIndexDecode;
933 }
934
935 VkQueue VideoBaseDecoder::getQueueDecode (void)
936 {
937 DE_ASSERT(m_queueDecode != DE_NULL);
938
939 return m_queueDecode;
940 }
941
942 Allocator& VideoBaseDecoder::getAllocator (void)
943 {
944 DE_ASSERT(m_allocator != DE_NULL);
945
946 return *m_allocator;
947 }
948
949
950 void VideoBaseDecoder::setDecodeParameters (bool randomOrSwapped,
951 bool queryResultWithStatus,
952 uint32_t frameCountTrigger,
953 bool submitAfter,
954 uint32_t gopSize,
955 uint32_t dpbCount)
956
957 {
958 m_randomOrSwapped = randomOrSwapped;
959 m_queryResultWithStatus = queryResultWithStatus;
960 m_frameCountTrigger = frameCountTrigger;
961 m_submitAfter = submitAfter;
962 m_gopSize = gopSize ? gopSize : frameCountTrigger;
963 m_dpbCount = dpbCount ? dpbCount : 1;
964
965 DEBUGLOG(std::cout << m_randomOrSwapped << " " << m_queryResultWithStatus << " " << m_frameCountTrigger << " " << m_submitAfter << " " << m_gopSize << " " << m_dpbCount << std::endl);
966
967 ReinitCaches();
968 }
969
970 int32_t VideoBaseDecoder::BeginSequence (const NvidiaVulkanParserSequenceInfo* pnvsi)
971 {
972 DEBUGLOG(std::cout << "VideoBaseDecoder::BeginSequence " << std::dec << pnvsi->nCodedWidth << "x" << pnvsi->nCodedHeight << std::endl);
973
974 const int32_t maxDbpSlots = MAX_DPB_SLOTS_PLUS_1 - ((pnvsi->eCodec == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR) ? 0 : EXTRA_DPB_SLOTS);
975 const int32_t configDpbSlotsPre = (pnvsi->nMinNumDecodeSurfaces > 0)
976 ? (pnvsi->nMinNumDecodeSurfaces - (pnvsi->isSVC ? 3 : 1))
977 : 0;
978 const int32_t configDpbSlots = std::min(maxDbpSlots, configDpbSlotsPre);
979 const int32_t configDpbSlotsPlus1 = std::min(configDpbSlots + 1, (int32_t)MAX_DPB_SLOTS_PLUS_1);
980 const bool sequenceUpdate = (m_nvidiaVulkanParserSequenceInfo.nMaxWidth != 0) && (m_nvidiaVulkanParserSequenceInfo.nMaxHeight != 0);
981 const bool formatChange = (pnvsi->eCodec != m_nvidiaVulkanParserSequenceInfo.eCodec)
982 || (pnvsi->codecProfile != m_nvidiaVulkanParserSequenceInfo.codecProfile)
983 || (pnvsi->nChromaFormat != m_nvidiaVulkanParserSequenceInfo.nChromaFormat)
984 || (pnvsi->uBitDepthLumaMinus8 != m_nvidiaVulkanParserSequenceInfo.uBitDepthLumaMinus8)
985 || (pnvsi->uBitDepthChromaMinus8 != m_nvidiaVulkanParserSequenceInfo.uBitDepthChromaMinus8)
986 || (pnvsi->bProgSeq != m_nvidiaVulkanParserSequenceInfo.bProgSeq);
987 const bool extentChange = (pnvsi->nCodedWidth != m_nvidiaVulkanParserSequenceInfo.nCodedWidth)
988 || (pnvsi->nCodedHeight != m_nvidiaVulkanParserSequenceInfo.nCodedHeight);
989 const bool sequenceReconfigireFormat = sequenceUpdate && formatChange;
990 const bool sequenceReconfigireCodedExtent = sequenceUpdate && extentChange;
991 const VkVideoChromaSubsamplingFlagBitsKHR chromaSubsampling = ConvertStdH264ChromaFormatToVulkan((StdVideoH264ChromaFormatIdc)pnvsi->nChromaFormat);
992 const VulkanParserDetectedVideoFormat detectedFormat =
993 {
994 pnvsi->eCodec, // vk::VkVideoCodecOperationFlagBitsKHR codec;
995 pnvsi->codecProfile, // uint32_t codecProfile;
996 getLumaBitDepth(pnvsi->uBitDepthLumaMinus8), // VkVideoComponentBitDepthFlagsKHR lumaBitDepth;
997 getChromaBitDepth(pnvsi->uBitDepthChromaMinus8), // VkVideoComponentBitDepthFlagsKHR chromaBitDepth;
998 chromaSubsampling, // VkVideoChromaSubsamplingFlagBitsKHR chromaSubsampling;
999 NVIDIA_FRAME_RATE_NUM(pnvsi->frameRate), // uint32_t frame_rate_numerator;
1000 NVIDIA_FRAME_RATE_DEN(pnvsi->frameRate), // uint32_t frame_rate_denominator;
1001 (uint8_t)(sequenceUpdate != 0 ? 1 : 0), // uint8_t sequenceUpdate : 1;
1002 (uint8_t)(sequenceReconfigireFormat != 0 ? 1 : 0), // uint8_t sequenceReconfigireFormat : 1;
1003 (uint8_t)(sequenceReconfigireCodedExtent != 0 ? 1 : 0), // uint8_t sequenceReconfigireCodedExtent : 1;
1004 (uint8_t)(pnvsi->bProgSeq != 0 ? 1 : 0), // uint8_t progressive_sequence : 1;
1005 pnvsi->uBitDepthLumaMinus8, // uint8_t bit_depth_luma_minus8;
1006 pnvsi->uBitDepthChromaMinus8, // uint8_t bit_depth_chroma_minus8;
1007 0u, // uint8_t reserved1;
1008 (uint32_t)pnvsi->nCodedWidth, // uint32_t coded_width;
1009 (uint32_t)pnvsi->nCodedHeight, // uint32_t coded_height;
1010
1011 {
1012 0u, // int32_t left;
1013 0u, // int32_t top;
1014 pnvsi->nDisplayWidth, // int32_t right;
1015 pnvsi->nDisplayHeight, // int32_t bottom;
1016 },
1017
1018 (uint32_t)pnvsi->lBitrate, // uint32_t bitrate;
1019 (int32_t)pnvsi->lDARWidth, // int32_t display_aspect_ratio_x;
1020 (int32_t)pnvsi->lDARHeight, // int32_t display_aspect_ratio_y;
1021 (uint32_t)pnvsi->nMinNumDecodeSurfaces, // uint32_t minNumDecodeSurfaces;
1022 (uint32_t)configDpbSlotsPlus1, // uint32_t maxNumDpbSlots;
1023
1024 {
1025 (uint8_t)(7 & pnvsi->lVideoFormat), // uint8_t video_format : 3;
1026 (uint8_t)(pnvsi->uVideoFullRange != 0 ? 1 : 0), // uint8_t video_full_range_flag : 1;
1027 0u, // uint8_t reserved_zero_bits : 4;
1028 (uint8_t)pnvsi->lColorPrimaries, // uint8_t color_primaries;
1029 (uint8_t)pnvsi->lTransferCharacteristics, // uint8_t transfer_characteristics;
1030 (uint8_t)pnvsi->lMatrixCoefficients, // uint8_t matrix_coefficients;
1031 },
1032
1033 0u, // uint32_t seqhdr_data_length;
1034 };
1035
1036 m_nvidiaVulkanParserSequenceInfo = *pnvsi;
1037 m_nvidiaVulkanParserSequenceInfo.nMaxWidth = pnvsi->nCodedWidth;
1038 m_nvidiaVulkanParserSequenceInfo.nMaxHeight = pnvsi->nCodedHeight;
1039
1040 int maxDecodeRTs = StartVideoSequence(&detectedFormat);
1041
1042 // nDecodeRTs = 0 means SequenceCallback failed
1043 // nDecodeRTs = 1 means SequenceCallback succeeded
1044 // nDecodeRTs > 1 means we need to overwrite the MaxNumDecodeSurfaces
1045 if (!maxDecodeRTs)
1046 {
1047 return 0;
1048 }
1049 // MaxNumDecodeSurface may not be correctly calculated by the client while
1050 // parser creation so overwrite it with NumDecodeSurface. (only if nDecodeRT
1051 // > 1)
1052 if (maxDecodeRTs > 1)
1053 {
1054 m_maxNumDecodeSurfaces = maxDecodeRTs;
1055 }
1056
1057 // The number of minNumDecodeSurfaces can be overwritten.
1058 // Add one for the current Dpb setup slot.
1059 m_maxNumDpbSurfaces = configDpbSlotsPlus1;
1060
1061 m_dpb.Init(m_maxNumDpbSurfaces, sequenceUpdate);
1062
1063 // NOTE: Important Tegra parser requires the maxDpbSlotsPlus1 and not dpbSlots.
1064 return configDpbSlotsPlus1;
1065 }
1066
1067 bool VideoBaseDecoder::AllocPictureBuffer (INvidiaVulkanPicture** ppNvidiaVulkanPicture)
1068 {
1069 DEBUGLOG(std::cout << "VideoBaseDecoder::AllocPictureBuffer" << std::endl);
1070 bool result = false;
1071
1072 *ppNvidiaVulkanPicture = m_videoFrameBuffer->ReservePictureBuffer();
1073
1074 if (*ppNvidiaVulkanPicture)
1075 {
1076 result = true;
1077
1078 DEBUGLOG(std::cout << "\tVideoBaseDecoder::AllocPictureBuffer " << (void*)*ppNvidiaVulkanPicture << std::endl);
1079 }
1080
1081 if (!result)
1082 {
1083 *ppNvidiaVulkanPicture = (INvidiaVulkanPicture*)DE_NULL;
1084 }
1085
1086 return result;
1087 }
1088
1089 bool VideoBaseDecoder::DecodePicture (NvidiaVulkanParserPictureData* pNvidiaVulkanParserPictureData)
1090 {
1091 DEBUGLOG(std::cout << "VideoBaseDecoder::DecodePicture" << std::endl);
1092
1093 VulkanParserDecodePictureInfo decodePictureInfo = VulkanParserDecodePictureInfo();
1094 bool result = false;
1095
1096 if (!pNvidiaVulkanParserPictureData->pCurrPic)
1097 {
1098 return result;
1099 }
1100
1101 NvidiaVulkanPictureBase* pVkPicBuff = GetPic(pNvidiaVulkanParserPictureData->pCurrPic);
1102 const int32_t picIdx = pVkPicBuff ? pVkPicBuff->m_picIdx : -1;
1103
1104 DEBUGLOG(std::cout << "\tVideoBaseDecoder::DecodePicture " << (void*)pVkPicBuff << std::endl);
1105
1106 DE_ASSERT(picIdx < MAX_FRM_CNT);
1107
1108 decodePictureInfo.pictureIndex = picIdx;
1109 decodePictureInfo.flags.progressiveFrame = pNvidiaVulkanParserPictureData->progressive_frame ? 1 : 0;
1110 decodePictureInfo.flags.fieldPic = pNvidiaVulkanParserPictureData->field_pic_flag ? 1 : 0; // 0 = frame picture, 1 = field picture
1111 decodePictureInfo.flags.repeatFirstField = 3 & (uint32_t)pNvidiaVulkanParserPictureData->repeat_first_field; // For 3:2 pulldown (number of additional fields, 2 = frame doubling, 4 = frame tripling)
1112 decodePictureInfo.flags.refPic = pNvidiaVulkanParserPictureData->ref_pic_flag ? 1 : 0; // Frame is a reference frame
1113
1114 // Mark the first field as unpaired Detect unpaired fields
1115 if (pNvidiaVulkanParserPictureData->field_pic_flag)
1116 {
1117 decodePictureInfo.flags.bottomField = pNvidiaVulkanParserPictureData->bottom_field_flag ? 1 : 0; // 0 = top field, 1 = bottom field (ignored if field_pic_flag=0)
1118 decodePictureInfo.flags.secondField = pNvidiaVulkanParserPictureData->second_field ? 1 : 0; // Second field of a complementary field pair
1119 decodePictureInfo.flags.topFieldFirst = pNvidiaVulkanParserPictureData->top_field_first ? 1 : 0; // Frame pictures only
1120
1121 if (!pNvidiaVulkanParserPictureData->second_field)
1122 {
1123 decodePictureInfo.flags.unpairedField = true; // Incomplete (half) frame.
1124 }
1125 else
1126 {
1127 if (decodePictureInfo.flags.unpairedField)
1128 {
1129 decodePictureInfo.flags.syncToFirstField = true;
1130 decodePictureInfo.flags.unpairedField = false;
1131 }
1132 }
1133 }
1134
1135 decodePictureInfo.frameSyncinfo.unpairedField = decodePictureInfo.flags.unpairedField;
1136 decodePictureInfo.frameSyncinfo.syncToFirstField = decodePictureInfo.flags.syncToFirstField;
1137
1138 return DecodePicture(pNvidiaVulkanParserPictureData, &decodePictureInfo);
1139 }
1140
1141 bool VideoBaseDecoder::UpdatePictureParameters (NvidiaVulkanPictureParameters* pNvidiaVulkanPictureParameters,
1142 NvidiaSharedBaseObj<NvidiaParserVideoRefCountBase>& pictureParametersObject,
1143 uint64_t updateSequenceCount)
1144 {
1145 DEBUGLOG(std::cout << "VideoBaseDecoder::UpdatePictureParameters " << (void*)pNvidiaVulkanPictureParameters << " " << updateSequenceCount << std::endl);
1146
1147 if (pNvidiaVulkanPictureParameters == DE_NULL)
1148 return DE_NULL;
1149
1150 return UpdatePictureParametersHandler(pNvidiaVulkanPictureParameters, pictureParametersObject, updateSequenceCount);
1151 }
1152
1153 bool VideoBaseDecoder::DisplayPicture (INvidiaVulkanPicture* pNvidiaVulkanPicture,
1154 int64_t llPTS)
1155 {
1156 DEBUGLOG(std::cout << "VideoBaseDecoder::DisplayPicture" << std::endl);
1157
1158 bool result = false;
1159
1160 NvidiaVulkanPictureBase* pVkPicBuff = GetPic(pNvidiaVulkanPicture);
1161
1162 DE_ASSERT(pVkPicBuff != DE_NULL);
1163
1164 int32_t picIdx = pVkPicBuff ? pVkPicBuff->m_picIdx : -1;
1165
1166 DE_ASSERT(picIdx != -1);
1167
1168 if (m_videoFrameBuffer != DE_NULL && picIdx != -1)
1169 {
1170 DisplayPictureInfo dispInfo = DisplayPictureInfo();
1171
1172 dispInfo.timestamp = (int64_t)llPTS;
1173
1174 const int32_t retVal = m_videoFrameBuffer->QueueDecodedPictureForDisplay((int8_t)picIdx, &dispInfo);
1175
1176 DE_ASSERT(picIdx == retVal);
1177 DE_UNREF(retVal);
1178
1179 result = true;
1180 }
1181
1182 return result;
1183 }
1184
1185 void VideoBaseDecoder::UnhandledNALU (const uint8_t* pbData,
1186 int32_t cbData)
1187 {
1188 const vector<uint8_t> data (pbData, pbData + cbData);
1189 ostringstream css;
1190
1191 css << "UnhandledNALU=";
1192
1193 for (const auto& i: data)
1194 css << std::hex << std::setw(2) << std::setfill('0') << (deUint32)i << ' ';
1195
1196 TCU_THROW(InternalError, css.str());
1197 }
1198
1199 bool VideoBaseDecoder::DecodePicture (NvidiaVulkanParserPictureData* pNvidiaVulkanParserPictureData,
1200 VulkanParserDecodePictureInfo* pDecodePictureInfo)
1201 {
1202 DEBUGLOG(std::cout << "\tDecodePicture sps_sid:" << (uint32_t)pNvidiaVulkanParserPictureData->CodecSpecific.h264.pStdSps->seq_parameter_set_id << " pps_sid:" << (uint32_t)pNvidiaVulkanParserPictureData->CodecSpecific.h264.pStdPps->seq_parameter_set_id << " pps_pid:" << (uint32_t)pNvidiaVulkanParserPictureData->CodecSpecific.h264.pStdPps->pic_parameter_set_id << std::endl);
1203 bool bRet = false;
1204
1205 if (!pNvidiaVulkanParserPictureData->pCurrPic)
1206 {
1207 return false;
1208 }
1209
1210 const uint32_t PicIdx = GetPicIdx(pNvidiaVulkanParserPictureData->pCurrPic);
1211
1212 if (PicIdx >= MAX_FRM_CNT)
1213 {
1214 DE_ASSERT(0);
1215 return false;
1216 }
1217
1218 HeapType heap;
1219
1220 PerFrameDecodeParameters* pPictureParams = ALLOC_HEAP_OBJECT(heap, PerFrameDecodeParameters);
1221 VkVideoReferenceSlotInfoKHR* pReferenceSlots = ALLOC_HEAP_OBJECT_ARRAY(heap, VkVideoReferenceSlotInfoKHR, PerFrameDecodeParameters::MAX_DPB_REF_SLOTS);
1222 VkVideoReferenceSlotInfoKHR* pSetupReferenceSlot = ALLOC_HEAP_OBJECT(heap, VkVideoReferenceSlotInfoKHR);
1223
1224 *pSetupReferenceSlot =
1225 {
1226 VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, // VkStructureType sType;
1227 DE_NULL, // const void* pNext;
1228 -1, // deInt8 slotIndex;
1229 DE_NULL // const VkVideoPictureResourceInfoKHR* pPictureResource;
1230 };
1231
1232 PerFrameDecodeParameters* pPerFrameDecodeParameters = pPictureParams;
1233
1234 pPerFrameDecodeParameters->currPicIdx = PicIdx;
1235 pPerFrameDecodeParameters->bitstreamDataLen = pNvidiaVulkanParserPictureData->nBitstreamDataLen;
1236 pPerFrameDecodeParameters->pBitstreamData = pNvidiaVulkanParserPictureData->pBitstreamData;
1237
1238 pPerFrameDecodeParameters->decodeFrameInfo.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR;
1239 pPerFrameDecodeParameters->decodeFrameInfo.dstPictureResource.sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR;
1240
1241 if (m_videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR)
1242 {
1243 const NvidiaVulkanParserH264PictureData*const pin = &pNvidiaVulkanParserPictureData->CodecSpecific.h264;
1244 nvVideoH264PicParameters* pH264 = ALLOC_HEAP_OBJECT(heap, nvVideoH264PicParameters);
1245 VkVideoDecodeH264PictureInfoKHR* pPictureInfo = &pH264->pictureInfo;
1246 NvidiaVideoDecodeH264DpbSlotInfo* pDpbRefList = pH264->dpbRefList;
1247 StdVideoDecodeH264PictureInfo* pStdPictureInfo = &pH264->stdPictureInfo;
1248
1249 *pH264 = nvVideoH264PicParameters();
1250
1251 pPerFrameDecodeParameters->pCurrentPictureParameters = StdVideoPictureParametersSet::StdVideoPictureParametersSetFromBase(pin->pPpsClientObject);
1252 DEBUGLOG(std::cout << "\tDecodePicture SPS:" << (void*)pin->pSpsClientObject << " PPS:" << (void*)pin->pPpsClientObject << std::endl);
1253
1254 pDecodePictureInfo->videoFrameType = 0; // pd->CodecSpecific.h264.slice_type;
1255 // FIXME: If mvcext is enabled.
1256 pDecodePictureInfo->viewId = (uint16_t)pNvidiaVulkanParserPictureData->CodecSpecific.h264.mvcext.view_id;
1257
1258 pPictureInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR;
1259 pPictureInfo->pNext = DE_NULL;
1260 pPictureInfo->pStdPictureInfo = &pH264->stdPictureInfo;
1261 pPictureInfo->sliceCount = pNvidiaVulkanParserPictureData->nNumSlices;
1262 pPictureInfo->pSliceOffsets = pNvidiaVulkanParserPictureData->pSliceDataOffsets;
1263
1264 pPerFrameDecodeParameters->decodeFrameInfo.pNext = &pH264->pictureInfo;
1265
1266 pStdPictureInfo->pic_parameter_set_id = pin->pic_parameter_set_id; // PPS ID
1267 pStdPictureInfo->seq_parameter_set_id = pin->seq_parameter_set_id; // SPS ID;
1268 pStdPictureInfo->frame_num = (uint16_t)pin->frame_num;
1269
1270 pH264->mvcInfo.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR;
1271 pH264->mvcInfo.pNext = DE_NULL; // No more extension structures.
1272
1273 StdVideoDecodeH264ReferenceInfo referenceInfo = StdVideoDecodeH264ReferenceInfo();
1274 pH264->mvcInfo.pStdReferenceInfo = &referenceInfo;
1275 pSetupReferenceSlot->pNext = &pH264->mvcInfo;
1276
1277 StdVideoDecodeH264PictureInfoFlags currPicFlags = StdVideoDecodeH264PictureInfoFlags();
1278
1279 currPicFlags.is_intra = (pNvidiaVulkanParserPictureData->intra_pic_flag != 0);
1280
1281 // 0 = frame picture, 1 = field picture
1282 if (pNvidiaVulkanParserPictureData->field_pic_flag)
1283 {
1284 // 0 = top field, 1 = bottom field (ignored if field_pic_flag = 0)
1285 currPicFlags.field_pic_flag = true;
1286 if (pNvidiaVulkanParserPictureData->bottom_field_flag)
1287 {
1288 currPicFlags.bottom_field_flag = true;
1289 }
1290 }
1291 // Second field of a complementary field pair
1292 if (pNvidiaVulkanParserPictureData->second_field)
1293 {
1294 currPicFlags.complementary_field_pair = true;
1295 }
1296
1297 // Frame is a reference frame
1298 if (pNvidiaVulkanParserPictureData->ref_pic_flag)
1299 {
1300 currPicFlags.is_reference = true;
1301 }
1302
1303 pStdPictureInfo->flags = currPicFlags;
1304
1305 if (!pNvidiaVulkanParserPictureData->field_pic_flag)
1306 {
1307 pStdPictureInfo->PicOrderCnt[0] = pin->CurrFieldOrderCnt[0];
1308 pStdPictureInfo->PicOrderCnt[1] = pin->CurrFieldOrderCnt[1];
1309 }
1310 else
1311 {
1312 pStdPictureInfo->PicOrderCnt[pNvidiaVulkanParserPictureData->bottom_field_flag] = pin->CurrFieldOrderCnt[pNvidiaVulkanParserPictureData->bottom_field_flag];
1313 }
1314
1315 pPerFrameDecodeParameters->numGopReferenceSlots = FillDpbH264State(pNvidiaVulkanParserPictureData,
1316 pin->dpb,
1317 DE_LENGTH_OF_ARRAY(pin->dpb),
1318 pDpbRefList,
1319 pReferenceSlots,
1320 pPerFrameDecodeParameters->pGopReferenceImagesIndexes,
1321 pH264->stdPictureInfo.flags,
1322 &pSetupReferenceSlot->slotIndex);
1323
1324 DEBUGLOG(cout<<"pSetupReferenceSlot->slotIndex=" << dec << pSetupReferenceSlot->slotIndex <<endl);
1325
1326 if (pSetupReferenceSlot->slotIndex >= 0)
1327 {
1328 if (m_distinctDstDpbImages)
1329 {
1330 const int32_t setupSlotNdx = pPerFrameDecodeParameters->numGopReferenceSlots;
1331
1332 DE_ASSERT(setupSlotNdx < PerFrameDecodeParameters::MAX_DPB_REF_SLOTS);
1333
1334 pReferenceSlots[setupSlotNdx] = *pSetupReferenceSlot;
1335
1336 pSetupReferenceSlot = &pReferenceSlots[setupSlotNdx];
1337
1338 pPerFrameDecodeParameters->pictureResources[setupSlotNdx] = pPerFrameDecodeParameters->decodeFrameInfo.dstPictureResource;
1339
1340 pSetupReferenceSlot->pPictureResource = &pPerFrameDecodeParameters->pictureResources[setupSlotNdx];
1341 }
1342 else
1343 {
1344 pSetupReferenceSlot->pPictureResource = &pPerFrameDecodeParameters->decodeFrameInfo.dstPictureResource;
1345 }
1346
1347 pPerFrameDecodeParameters->decodeFrameInfo.pSetupReferenceSlot = pSetupReferenceSlot;
1348 }
1349
1350 ostringstream s;
1351 s << "numGopReferenceSlots:" << std::dec << pPerFrameDecodeParameters->numGopReferenceSlots << "(";
1352 if (pPerFrameDecodeParameters->numGopReferenceSlots)
1353 {
1354 for (int32_t dpbEntryIdx = 0; dpbEntryIdx < pPerFrameDecodeParameters->numGopReferenceSlots; dpbEntryIdx++)
1355 {
1356 pPerFrameDecodeParameters->pictureResources[dpbEntryIdx].sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR;
1357 pReferenceSlots[dpbEntryIdx].pPictureResource = &pPerFrameDecodeParameters->pictureResources[dpbEntryIdx];
1358
1359 DE_ASSERT(pDpbRefList[dpbEntryIdx].IsReference());
1360
1361 s << std::dec << pReferenceSlots[dpbEntryIdx].slotIndex << " ";
1362 }
1363
1364 pPerFrameDecodeParameters->decodeFrameInfo.pReferenceSlots = pReferenceSlots;
1365 pPerFrameDecodeParameters->decodeFrameInfo.referenceSlotCount = pPerFrameDecodeParameters->numGopReferenceSlots;
1366 }
1367 else
1368 {
1369 pPerFrameDecodeParameters->decodeFrameInfo.pReferenceSlots = DE_NULL;
1370 pPerFrameDecodeParameters->decodeFrameInfo.referenceSlotCount = 0;
1371 }
1372 s << ")";
1373
1374 DEBUGLOG(cout << s.str() <<endl);
1375 }
1376 else if (m_videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR)
1377 {
1378 const NvidiaVulkanParserH265PictureData* const pin = &pNvidiaVulkanParserPictureData->CodecSpecific.h265;
1379 nvVideoH265PicParameters* pHevc = ALLOC_HEAP_OBJECT(heap, nvVideoH265PicParameters);
1380 VkVideoDecodeH265PictureInfoKHR* pPictureInfo = &pHevc->pictureInfo;
1381 StdVideoDecodeH265PictureInfo* pStdPictureInfo = &pHevc->stdPictureInfo;
1382 NvidiaVideoDecodeH265DpbSlotInfo* pDpbRefList = pHevc->dpbRefList;
1383
1384 *pHevc = nvVideoH265PicParameters();
1385
1386 pPerFrameDecodeParameters->pCurrentPictureParameters = StdVideoPictureParametersSet::StdVideoPictureParametersSetFromBase(pin->pPpsClientObject);
1387 pPerFrameDecodeParameters->decodeFrameInfo.pNext = &pHevc->pictureInfo;
1388
1389 pPictureInfo->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR;
1390 pPictureInfo->pNext = DE_NULL;
1391 pPictureInfo->pStdPictureInfo = &pHevc->stdPictureInfo;
1392
1393 pDecodePictureInfo->videoFrameType = 0;
1394 if (pNvidiaVulkanParserPictureData->CodecSpecific.h265.mv_hevc_enable)
1395 {
1396 pDecodePictureInfo->viewId = pNvidiaVulkanParserPictureData->CodecSpecific.h265.nuh_layer_id;
1397 }
1398 else
1399 {
1400 pDecodePictureInfo->viewId = 0;
1401 }
1402
1403 pPictureInfo->sliceSegmentCount = pNvidiaVulkanParserPictureData->nNumSlices;
1404 pPictureInfo->pSliceSegmentOffsets = pNvidiaVulkanParserPictureData->pSliceDataOffsets;
1405
1406 pStdPictureInfo->pps_pic_parameter_set_id = pin->pic_parameter_set_id; // PPS ID
1407 pStdPictureInfo->flags.IrapPicFlag = (pin->IrapPicFlag ? 1 : 0); // Intra Random Access Point for current picture.
1408 pStdPictureInfo->flags.IdrPicFlag = (pin->IdrPicFlag ? 1 : 0); // Instantaneous Decoding Refresh for current picture.
1409 pStdPictureInfo->NumBitsForSTRefPicSetInSlice = (uint16_t)pin->NumBitsForShortTermRPSInSlice;
1410 pStdPictureInfo->NumDeltaPocsOfRefRpsIdx = (uint8_t)pin->NumDeltaPocsOfRefRpsIdx;
1411 pStdPictureInfo->PicOrderCntVal = pin->CurrPicOrderCntVal;
1412
1413 int8_t dpbSlot = AllocateDpbSlotForCurrentH265(GetPic(pNvidiaVulkanParserPictureData->pCurrPic), true);
1414
1415 pSetupReferenceSlot->slotIndex = dpbSlot;
1416 // slotLayer requires NVIDIA specific extension VK_KHR_video_layers, not
1417 // enabled, just yet. setupReferenceSlot.slotLayerIndex = 0;
1418 DE_ASSERT(!(dpbSlot < 0));
1419
1420 if (dpbSlot >= 0)
1421 {
1422 DE_ASSERT(pNvidiaVulkanParserPictureData->ref_pic_flag);
1423 }
1424
1425 pPerFrameDecodeParameters->numGopReferenceSlots = FillDpbH265State(pNvidiaVulkanParserPictureData,
1426 pin,
1427 pDpbRefList,
1428 pStdPictureInfo,
1429 pReferenceSlots,
1430 pPerFrameDecodeParameters->pGopReferenceImagesIndexes);
1431
1432 DE_ASSERT(!pNvidiaVulkanParserPictureData->ref_pic_flag || (pSetupReferenceSlot->slotIndex >= 0));
1433
1434
1435 if (pSetupReferenceSlot->slotIndex >= 0)
1436 {
1437 if (m_distinctDstDpbImages)
1438 {
1439 const int32_t setupSlotNdx = pPerFrameDecodeParameters->numGopReferenceSlots;
1440
1441 DE_ASSERT(setupSlotNdx < PerFrameDecodeParameters::MAX_DPB_REF_SLOTS);
1442
1443 pReferenceSlots[setupSlotNdx] = *pSetupReferenceSlot;
1444
1445 pSetupReferenceSlot = &pReferenceSlots[setupSlotNdx];
1446
1447 pPerFrameDecodeParameters->pictureResources[setupSlotNdx] = pPerFrameDecodeParameters->decodeFrameInfo.dstPictureResource;
1448
1449 pSetupReferenceSlot->pPictureResource = &pPerFrameDecodeParameters->pictureResources[setupSlotNdx];
1450 }
1451 else
1452 {
1453 pSetupReferenceSlot->pPictureResource = &pPerFrameDecodeParameters->decodeFrameInfo.dstPictureResource;
1454 }
1455
1456 pPerFrameDecodeParameters->decodeFrameInfo.pSetupReferenceSlot = pSetupReferenceSlot;
1457 }
1458
1459 if (pPerFrameDecodeParameters->numGopReferenceSlots)
1460 {
1461 for (int32_t dpbEntryIdx = 0; dpbEntryIdx < pPerFrameDecodeParameters->numGopReferenceSlots; dpbEntryIdx++)
1462 {
1463 pPerFrameDecodeParameters->pictureResources[dpbEntryIdx].sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR;
1464 pReferenceSlots[dpbEntryIdx].pPictureResource = &pPerFrameDecodeParameters->pictureResources[dpbEntryIdx];
1465
1466 DE_ASSERT(pDpbRefList[dpbEntryIdx].IsReference());
1467 }
1468
1469 pPerFrameDecodeParameters->decodeFrameInfo.pReferenceSlots = pReferenceSlots;
1470 pPerFrameDecodeParameters->decodeFrameInfo.referenceSlotCount = pPerFrameDecodeParameters->numGopReferenceSlots;
1471 }
1472 else
1473 {
1474 pPerFrameDecodeParameters->decodeFrameInfo.pReferenceSlots = DE_NULL;
1475 pPerFrameDecodeParameters->decodeFrameInfo.referenceSlotCount = 0;
1476 }
1477 }
1478
1479 pDecodePictureInfo->displayWidth = m_nvidiaVulkanParserSequenceInfo.nDisplayWidth;
1480 pDecodePictureInfo->displayHeight = m_nvidiaVulkanParserSequenceInfo.nDisplayHeight;
1481
1482 bRet = DecodePictureWithParameters(pPictureParams, pDecodePictureInfo, heap) >= 0;
1483
1484 m_nCurrentPictureID++;
1485
1486 return bRet;
1487 }
1488
1489 // FillDpbState
1490 uint32_t VideoBaseDecoder::FillDpbH264State (const NvidiaVulkanParserPictureData* pNvidiaVulkanParserPictureData,
1491 const NvidiaVulkanParserH264DpbEntry* dpbIn,
1492 uint32_t maxDpbInSlotsInUse,
1493 NvidiaVideoDecodeH264DpbSlotInfo* pDpbRefList,
1494 VkVideoReferenceSlotInfoKHR* pReferenceSlots,
1495 int8_t* pGopReferenceImagesIndexes,
1496 StdVideoDecodeH264PictureInfoFlags currPicFlags,
1497 int32_t* pCurrAllocatedSlotIndex)
1498 {
1499 // #### Update m_dpb based on dpb parameters ####
1500 // Create unordered DPB and generate a bitmask of all render targets present in DPB
1501 uint32_t num_ref_frames = pNvidiaVulkanParserPictureData->CodecSpecific.h264.pStdSps->max_num_ref_frames;
1502
1503 DE_ASSERT(num_ref_frames <= m_maxNumDpbSurfaces);
1504 DE_UNREF(num_ref_frames);
1505
1506 dpbEntry refOnlyDpbIn[AVC_MAX_DPB_SLOTS]; // max number of Dpb // surfaces
1507 deMemset(&refOnlyDpbIn, 0, m_maxNumDpbSurfaces * sizeof(refOnlyDpbIn[0]));
1508
1509 uint32_t refDpbUsedAndValidMask = 0;
1510 uint32_t numUsedRef = 0;
1511
1512 for (uint32_t inIdx = 0; inIdx < maxDpbInSlotsInUse; inIdx++)
1513 {
1514 // used_for_reference: 0 = unused, 1 = top_field, 2 = bottom_field, 3 = both_fields
1515 const uint32_t used_for_reference = dpbIn[inIdx].used_for_reference & fieldIsReferenceMask;
1516
1517 if (used_for_reference)
1518 {
1519 const int8_t picIdx = (!dpbIn[inIdx].not_existing && dpbIn[inIdx].pNvidiaVulkanPicture)
1520 ? GetPicIdx(dpbIn[inIdx].pNvidiaVulkanPicture)
1521 : -1;
1522 const bool isFieldRef = (picIdx >= 0)
1523 ? GetFieldPicFlag(picIdx)
1524 : (used_for_reference && (used_for_reference != fieldIsReferenceMask));
1525 const int16_t fieldOrderCntList[2] =
1526 {
1527 (int16_t)dpbIn[inIdx].FieldOrderCnt[0],
1528 (int16_t)dpbIn[inIdx].FieldOrderCnt[1]
1529 };
1530
1531 refOnlyDpbIn[numUsedRef].setReferenceAndTopBoottomField(
1532 !!used_for_reference,
1533 (picIdx < 0), /* not_existing is frame inferred by the decoding process for gaps in frame_num */
1534 !!dpbIn[inIdx].is_long_term,
1535 isFieldRef,
1536 !!(used_for_reference & topFieldMask),
1537 !!(used_for_reference & bottomFieldMask),
1538 (int16_t)dpbIn[inIdx].FrameIdx,
1539 fieldOrderCntList,
1540 GetPic(dpbIn[inIdx].pNvidiaVulkanPicture));
1541
1542 if (picIdx >= 0)
1543 {
1544 refDpbUsedAndValidMask |= (1 << picIdx);
1545 }
1546
1547 numUsedRef++;
1548 }
1549 // Invalidate all slots.
1550 pReferenceSlots[inIdx].slotIndex = -1;
1551 pGopReferenceImagesIndexes[inIdx] = -1;
1552 }
1553
1554 DE_ASSERT(numUsedRef <= 16);
1555 DE_ASSERT(numUsedRef <= m_maxNumDpbSurfaces);
1556 DE_ASSERT(numUsedRef <= num_ref_frames);
1557
1558 // Map all frames not present in DPB as non-reference, and generate a mask of all used DPB entries
1559 /* uint32_t destUsedDpbMask = */ ResetPicDpbSlots(refDpbUsedAndValidMask);
1560
1561 // Now, map DPB render target indices to internal frame buffer index,
1562 // assign each reference a unique DPB entry, and create the ordered DPB
1563 // This is an undocumented MV restriction: the position in the DPB is stored
1564 // along with the co-located data, so once a reference frame is assigned a DPB
1565 // entry, it can no longer change.
1566
1567 // Find or allocate slots for existing dpb items.
1568 // Take into account the reference picture now.
1569 int8_t currPicIdx = GetPicIdx(pNvidiaVulkanParserPictureData->pCurrPic);
1570 int8_t bestNonExistingPicIdx = currPicIdx;
1571
1572 DE_ASSERT(currPicIdx >= 0);
1573
1574 if (refDpbUsedAndValidMask)
1575 {
1576 int32_t minFrameNumDiff = 0x10000;
1577
1578 for (int32_t dpbIdx = 0; (uint32_t)dpbIdx < numUsedRef; dpbIdx++)
1579 {
1580 if (!refOnlyDpbIn[dpbIdx].is_non_existing)
1581 {
1582 NvidiaVulkanPictureBase* picBuff = refOnlyDpbIn[dpbIdx].m_picBuff;
1583 int8_t picIdx = GetPicIdx(picBuff); // should always be valid at this point
1584
1585 DE_ASSERT(picIdx >= 0);
1586
1587 // We have up to 17 internal frame buffers, but only MAX_DPB_SIZE dpb
1588 // entries, so we need to re-map the index from the [0..MAX_DPB_SIZE]
1589 // range to [0..15]
1590 int8_t dpbSlot = GetPicDpbSlot(picIdx);
1591
1592 if (dpbSlot < 0)
1593 {
1594 dpbSlot = m_dpb.AllocateSlot();
1595
1596 DE_ASSERT((dpbSlot >= 0) && ((uint32_t)dpbSlot < m_maxNumDpbSurfaces));
1597
1598 SetPicDpbSlot(picIdx, dpbSlot);
1599
1600 m_dpb[dpbSlot].setPictureResource(picBuff);
1601 }
1602
1603 m_dpb[dpbSlot].MarkInUse();
1604
1605 DE_ASSERT(dpbSlot >= 0); // DPB mapping logic broken!
1606
1607 refOnlyDpbIn[dpbIdx].dpbSlot = dpbSlot;
1608
1609 int32_t frameNumDiff = ((int32_t)pNvidiaVulkanParserPictureData->CodecSpecific.h264.frame_num - refOnlyDpbIn[dpbIdx].FrameIdx);
1610
1611 if (frameNumDiff <= 0)
1612 {
1613 frameNumDiff = 0xffff;
1614 }
1615
1616 if (frameNumDiff < minFrameNumDiff)
1617 {
1618 bestNonExistingPicIdx = picIdx;
1619 minFrameNumDiff = frameNumDiff;
1620 }
1621 else if (bestNonExistingPicIdx == currPicIdx)
1622 {
1623 bestNonExistingPicIdx = picIdx;
1624 }
1625 }
1626 }
1627 }
1628
1629 // In Vulkan, we always allocate a Dbp slot for the current picture,
1630 // regardless if it is going to become a reference or not. Non-reference slots
1631 // get freed right after usage. if (pNvidiaVulkanParserPictureData->ref_pic_flag)
1632 int8_t currPicDpbSlot = AllocateDpbSlotForCurrentH264(GetPic(pNvidiaVulkanParserPictureData->pCurrPic), currPicFlags);
1633
1634 DE_ASSERT(currPicDpbSlot >= 0);
1635
1636 *pCurrAllocatedSlotIndex = currPicDpbSlot;
1637
1638 if (refDpbUsedAndValidMask)
1639 {
1640 // Find or allocate slots for non existing dpb items and populate the slots.
1641 uint32_t dpbInUseMask = m_dpb.getSlotInUseMask();
1642 int8_t firstNonExistingDpbSlot = 0;
1643
1644 for (uint32_t dpbIdx = 0; dpbIdx < numUsedRef; dpbIdx++)
1645 {
1646 int8_t dpbSlot = -1;
1647 int8_t picIdx = -1;
1648
1649 if (refOnlyDpbIn[dpbIdx].is_non_existing)
1650 {
1651 DE_ASSERT(refOnlyDpbIn[dpbIdx].m_picBuff == DE_NULL);
1652
1653 while (((uint32_t)firstNonExistingDpbSlot < m_maxNumDpbSurfaces) && (dpbSlot == -1))
1654 {
1655 if (!(dpbInUseMask & (1 << firstNonExistingDpbSlot)))
1656 {
1657 dpbSlot = firstNonExistingDpbSlot;
1658 }
1659
1660 firstNonExistingDpbSlot++;
1661 }
1662
1663 picIdx = bestNonExistingPicIdx;
1664
1665 // Find the closest valid refpic already in the DPB
1666 uint32_t minDiffPOC = 0x7fff;
1667
1668 for (uint32_t j = 0; j < numUsedRef; j++)
1669 {
1670 if (!refOnlyDpbIn[j].is_non_existing && (refOnlyDpbIn[j].used_for_reference & refOnlyDpbIn[dpbIdx].used_for_reference) == refOnlyDpbIn[dpbIdx].used_for_reference)
1671 {
1672 uint32_t diffPOC = abs((int32_t)(refOnlyDpbIn[j].FieldOrderCnt[0] - refOnlyDpbIn[dpbIdx].FieldOrderCnt[0]));
1673
1674 if (diffPOC <= minDiffPOC)
1675 {
1676 minDiffPOC = diffPOC;
1677 picIdx = GetPicIdx(refOnlyDpbIn[j].m_picBuff);
1678 }
1679 }
1680 }
1681 }
1682 else
1683 {
1684 DE_ASSERT(refOnlyDpbIn[dpbIdx].m_picBuff != DE_NULL);
1685
1686 dpbSlot = refOnlyDpbIn[dpbIdx].dpbSlot;
1687 picIdx = GetPicIdx(refOnlyDpbIn[dpbIdx].m_picBuff);
1688 }
1689
1690 DE_ASSERT((dpbSlot >= 0) && ((uint32_t)dpbSlot < m_maxNumDpbSurfaces));
1691
1692 refOnlyDpbIn[dpbIdx].setH264PictureData(pDpbRefList, pReferenceSlots, dpbIdx, dpbSlot);
1693
1694 pGopReferenceImagesIndexes[dpbIdx] = picIdx;
1695 }
1696 }
1697
1698 return refDpbUsedAndValidMask ? numUsedRef : 0;
1699 }
1700
1701 uint32_t VideoBaseDecoder::FillDpbH265State (const NvidiaVulkanParserPictureData* pNvidiaVulkanParserPictureData,
1702 const NvidiaVulkanParserH265PictureData* pin,
1703 NvidiaVideoDecodeH265DpbSlotInfo* pDpbSlotInfo,
1704 StdVideoDecodeH265PictureInfo* pStdPictureInfo,
1705 VkVideoReferenceSlotInfoKHR* pReferenceSlots,
1706 int8_t* pGopReferenceImagesIndexes)
1707 {
1708 // #### Update m_dpb based on dpb parameters ####
1709 // Create unordered DPB and generate a bitmask of all render targets present in DPB
1710
1711 dpbEntry refOnlyDpbIn[AVC_MAX_DPB_SLOTS];
1712 deMemset(&refOnlyDpbIn, 0, m_maxNumDpbSurfaces * sizeof(refOnlyDpbIn[0]));
1713 uint32_t refDpbUsedAndValidMask = 0;
1714 uint32_t numUsedRef = 0;
1715
1716 for (int32_t inIdx = 0; inIdx < HEVC_MAX_DPB_SLOTS; inIdx++)
1717 {
1718 // used_for_reference: 0 = unused, 1 = top_field, 2 = bottom_field, 3 = both_fields
1719 int8_t picIdx = GetPicIdx(pin->RefPics[inIdx]);
1720 if (picIdx >= 0)
1721 {
1722 refOnlyDpbIn[numUsedRef].setReference((pin->IsLongTerm[inIdx] == 1), pin->PicOrderCntVal[inIdx], GetPic(pin->RefPics[inIdx]));
1723
1724 refDpbUsedAndValidMask |= (1 << picIdx);
1725
1726 refOnlyDpbIn[numUsedRef].originalDpbIndex = (int8_t)inIdx;
1727 numUsedRef++;
1728 }
1729 // Invalidate all slots.
1730 pReferenceSlots[inIdx].slotIndex = -1;
1731 pGopReferenceImagesIndexes[inIdx] = -1;
1732 }
1733
1734 DE_ASSERT(numUsedRef <= m_maxNumDpbSurfaces);
1735
1736 // Take into account the reference picture now.
1737 int8_t currPicIdx = GetPicIdx(pNvidiaVulkanParserPictureData->pCurrPic);
1738 int8_t currPicDpbSlot = -1;
1739
1740 DE_ASSERT(currPicIdx >= 0);
1741
1742 if (currPicIdx >= 0)
1743 {
1744 currPicDpbSlot = GetPicDpbSlot(currPicIdx);
1745 refDpbUsedAndValidMask |= (1 << currPicIdx);
1746 }
1747
1748 DE_UNREF(currPicDpbSlot);
1749 DE_ASSERT(currPicDpbSlot >= 0);
1750
1751 // Map all frames not present in DPB as non-reference, and generate a mask of
1752 // all used DPB entries
1753 /* uint32_t destUsedDpbMask = */ ResetPicDpbSlots(refDpbUsedAndValidMask);
1754
1755 // Now, map DPB render target indices to internal frame buffer index,
1756 // assign each reference a unique DPB entry, and create the ordered DPB
1757 // This is an undocumented MV restriction: the position in the DPB is stored
1758 // along with the co-located data, so once a reference frame is assigned a DPB
1759 // entry, it can no longer change.
1760
1761 int8_t frmListToDpb[HEVC_MAX_DPB_SLOTS]; // TODO change to -1 for invalid indexes.
1762 deMemset(&frmListToDpb, 0, sizeof(frmListToDpb));
1763
1764 // Find or allocate slots for existing dpb items.
1765 for (int32_t dpbIdx = 0; (uint32_t)dpbIdx < numUsedRef; dpbIdx++)
1766 {
1767 if (!refOnlyDpbIn[dpbIdx].is_non_existing)
1768 {
1769 NvidiaVulkanPictureBase* picBuff = refOnlyDpbIn[dpbIdx].m_picBuff;
1770
1771 int8_t picIdx = GetPicIdx(picBuff); // should always be valid at this point
1772
1773 DE_ASSERT(picIdx >= 0);
1774 // We have up to 17 internal frame buffers, but only HEVC_MAX_DPB_SLOTS
1775 // dpb entries, so we need to re-map the index from the
1776 // [0..HEVC_MAX_DPB_SLOTS] range to [0..15]
1777
1778 int8_t dpbSlot = GetPicDpbSlot(picIdx);
1779
1780 if (dpbSlot < 0)
1781 {
1782 dpbSlot = m_dpb.AllocateSlot();
1783
1784 DE_ASSERT(dpbSlot >= 0);
1785
1786 SetPicDpbSlot(picIdx, dpbSlot);
1787
1788 m_dpb[dpbSlot].setPictureResource(picBuff);
1789 }
1790
1791 m_dpb[dpbSlot].MarkInUse();
1792
1793 DE_ASSERT(dpbSlot >= 0); // DPB mapping logic broken!
1794
1795 refOnlyDpbIn[dpbIdx].dpbSlot = dpbSlot;
1796
1797 uint32_t originalDpbIndex = refOnlyDpbIn[dpbIdx].originalDpbIndex;
1798
1799 DE_ASSERT(originalDpbIndex < HEVC_MAX_DPB_SLOTS);
1800
1801 frmListToDpb[originalDpbIndex] = dpbSlot;
1802 }
1803 }
1804
1805 // Find or allocate slots for non existing dpb items and populate the slots.
1806 uint32_t dpbInUseMask = m_dpb.getSlotInUseMask();
1807 int8_t firstNonExistingDpbSlot = 0;
1808
1809 for (uint32_t dpbIdx = 0; dpbIdx < numUsedRef; dpbIdx++)
1810 {
1811 int8_t dpbSlot = -1;
1812
1813 if (refOnlyDpbIn[dpbIdx].is_non_existing)
1814 {
1815 // There shouldn't be not_existing in h.265
1816 DE_ASSERT(0);
1817 DE_ASSERT(refOnlyDpbIn[dpbIdx].m_picBuff == DE_NULL);
1818
1819 while (((uint32_t)firstNonExistingDpbSlot < m_maxNumDpbSurfaces) && (dpbSlot == -1))
1820 {
1821 if (!(dpbInUseMask & (1 << firstNonExistingDpbSlot)))
1822 {
1823 dpbSlot = firstNonExistingDpbSlot;
1824 }
1825 firstNonExistingDpbSlot++;
1826 }
1827
1828 DE_ASSERT((dpbSlot >= 0) && ((uint32_t)dpbSlot < m_maxNumDpbSurfaces));
1829 }
1830 else
1831 {
1832 DE_ASSERT(refOnlyDpbIn[dpbIdx].m_picBuff != DE_NULL);
1833 dpbSlot = refOnlyDpbIn[dpbIdx].dpbSlot;
1834 }
1835
1836 DE_ASSERT((dpbSlot >= 0) && (dpbSlot < HEVC_MAX_DPB_SLOTS));
1837
1838 refOnlyDpbIn[dpbIdx].setH265PictureData(pDpbSlotInfo, pReferenceSlots, dpbIdx, dpbSlot);
1839 pGopReferenceImagesIndexes[dpbIdx] = GetPicIdx(refOnlyDpbIn[dpbIdx].m_picBuff);
1840 }
1841
1842 int32_t numPocStCurrBefore = 0;
1843 const size_t maxNumPocStCurrBefore = sizeof(pStdPictureInfo->RefPicSetStCurrBefore) / sizeof(pStdPictureInfo->RefPicSetStCurrBefore[0]);
1844
1845 DE_UNREF(maxNumPocStCurrBefore);
1846 DE_ASSERT((size_t)pin->NumPocStCurrBefore < maxNumPocStCurrBefore);
1847
1848 for (int32_t i = 0; i < pin->NumPocStCurrBefore; i++)
1849 {
1850 uint8_t idx = (uint8_t)pin->RefPicSetStCurrBefore[i];
1851
1852 if (idx < HEVC_MAX_DPB_SLOTS)
1853 {
1854 pStdPictureInfo->RefPicSetStCurrBefore[numPocStCurrBefore++] = frmListToDpb[idx] & 0xf;
1855 }
1856 }
1857 while (numPocStCurrBefore < 8)
1858 {
1859 pStdPictureInfo->RefPicSetStCurrBefore[numPocStCurrBefore++] = 0xff;
1860 }
1861
1862 int32_t numPocStCurrAfter = 0;
1863 const size_t maxNumPocStCurrAfter = sizeof(pStdPictureInfo->RefPicSetStCurrAfter) / sizeof(pStdPictureInfo->RefPicSetStCurrAfter[0]);
1864
1865 DE_UNREF(maxNumPocStCurrAfter);
1866 DE_ASSERT((size_t)pin->NumPocStCurrAfter < maxNumPocStCurrAfter);
1867
1868 for (int32_t i = 0; i < pin->NumPocStCurrAfter; i++)
1869 {
1870 uint8_t idx = (uint8_t)pin->RefPicSetStCurrAfter[i];
1871
1872 if (idx < HEVC_MAX_DPB_SLOTS)
1873 {
1874 pStdPictureInfo->RefPicSetStCurrAfter[numPocStCurrAfter++] = frmListToDpb[idx] & 0xf;
1875 }
1876 }
1877
1878 while (numPocStCurrAfter < 8)
1879 {
1880 pStdPictureInfo->RefPicSetStCurrAfter[numPocStCurrAfter++] = 0xff;
1881 }
1882
1883 int32_t numPocLtCurr = 0;
1884 const size_t maxNumPocLtCurr = sizeof(pStdPictureInfo->RefPicSetLtCurr) / sizeof(pStdPictureInfo->RefPicSetLtCurr[0]);
1885
1886 DE_UNREF(maxNumPocLtCurr);
1887 DE_ASSERT((size_t)pin->NumPocLtCurr < maxNumPocLtCurr);
1888
1889 for (int32_t i = 0; i < pin->NumPocLtCurr; i++)
1890 {
1891 uint8_t idx = (uint8_t)pin->RefPicSetLtCurr[i];
1892
1893 if (idx < HEVC_MAX_DPB_SLOTS)
1894 {
1895 pStdPictureInfo->RefPicSetLtCurr[numPocLtCurr++] = frmListToDpb[idx] & 0xf;
1896 }
1897 }
1898
1899 while (numPocLtCurr < 8)
1900 {
1901 pStdPictureInfo->RefPicSetLtCurr[numPocLtCurr++] = 0xff;
1902 }
1903
1904 return numUsedRef;
1905 }
1906
1907 int8_t VideoBaseDecoder::AllocateDpbSlotForCurrentH264 (NvidiaVulkanPictureBase* pNvidiaVulkanPictureBase,
1908 StdVideoDecodeH264PictureInfoFlags currPicFlags)
1909 {
1910 // Now, map the current render target
1911 int8_t dpbSlot = -1;
1912 int8_t currPicIdx = GetPicIdx(pNvidiaVulkanPictureBase);
1913
1914 DE_ASSERT(currPicIdx >= 0);
1915
1916 SetFieldPicFlag(currPicIdx, currPicFlags.field_pic_flag);
1917 // In Vulkan we always allocate reference slot for the current picture.
1918 if (true /* currPicFlags.is_reference */)
1919 {
1920 dpbSlot = GetPicDpbSlot(currPicIdx);
1921
1922 if (dpbSlot < 0)
1923 {
1924 dpbSlot = m_dpb.AllocateSlot();
1925
1926 DE_ASSERT(dpbSlot >= 0);
1927
1928 SetPicDpbSlot(currPicIdx, dpbSlot);
1929
1930 m_dpb[dpbSlot].setPictureResource(pNvidiaVulkanPictureBase);
1931 }
1932
1933 DE_ASSERT(dpbSlot >= 0);
1934 }
1935
1936 return dpbSlot;
1937 }
1938
1939 int8_t VideoBaseDecoder::AllocateDpbSlotForCurrentH265 (NvidiaVulkanPictureBase* pNvidiaVulkanPictureBase,
1940 bool isReference)
1941 {
1942 // Now, map the current render target
1943 int8_t dpbSlot = -1;
1944 int8_t currPicIdx = GetPicIdx(pNvidiaVulkanPictureBase);
1945
1946 DE_ASSERT(currPicIdx >= 0);
1947 DE_ASSERT(isReference);
1948
1949 if (isReference)
1950 {
1951 dpbSlot = GetPicDpbSlot(currPicIdx);
1952
1953 if (dpbSlot < 0)
1954 {
1955 dpbSlot = m_dpb.AllocateSlot();
1956
1957 DE_ASSERT(dpbSlot >= 0);
1958
1959 SetPicDpbSlot(currPicIdx, dpbSlot);
1960
1961 m_dpb[dpbSlot].setPictureResource(pNvidiaVulkanPictureBase);
1962 }
1963
1964 DE_ASSERT(dpbSlot >= 0);
1965 }
1966
1967 return dpbSlot;
1968 }
1969
1970 VkFormat getRecommendedFormat (const vector<VkFormat>& formats, VkFormat recommendedFormat)
1971 {
1972 if (formats.empty())
1973 return VK_FORMAT_UNDEFINED;
1974 else if (recommendedFormat != VK_FORMAT_UNDEFINED && std::find(formats.begin(), formats.end(), recommendedFormat) != formats.end())
1975 return recommendedFormat;
1976 else
1977 return formats[0];
1978 }
1979
1980 vector<pair<VkFormat, VkImageUsageFlags>> getImageFormatAndUsageForOutputAndDPB (const InstanceInterface& vk,
1981 const VkPhysicalDevice physicalDevice,
1982 const VkVideoProfileListInfoKHR* videoProfileList,
1983 const VkFormat recommendedFormat,
1984 const bool distinctDstDpbImages)
1985 {
1986 const VkImageUsageFlags dstFormatUsages = VK_IMAGE_USAGE_TRANSFER_SRC_BIT
1987 | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
1988 const VkImageUsageFlags dpbFormatUsages = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
1989 const VkImageUsageFlags bothImageUsages = dstFormatUsages | dpbFormatUsages;
1990 VkFormat dstFormat = VK_FORMAT_UNDEFINED;
1991 VkFormat dpbFormat = VK_FORMAT_UNDEFINED;
1992 vector<pair<VkFormat, VkImageUsageFlags>> result;
1993
1994 // Check if both image usages are not supported on this platform
1995 if (!distinctDstDpbImages)
1996 {
1997 const MovePtr<vector<VkFormat>> bothUsageFormats = getSupportedFormats(vk, physicalDevice, bothImageUsages, videoProfileList);
1998 VkFormat pickedFormat = getRecommendedFormat(*bothUsageFormats, recommendedFormat);
1999
2000 result.push_back(pair<VkFormat, VkImageUsageFlags>(pickedFormat, bothImageUsages));
2001 result.push_back(pair<VkFormat, VkImageUsageFlags>(pickedFormat, VkImageUsageFlags()));
2002 }
2003 else
2004 {
2005 {
2006 const MovePtr<vector<VkFormat>> dstUsageFormats = getSupportedFormats(vk, physicalDevice, dstFormatUsages, videoProfileList);
2007
2008 if (dstUsageFormats == DE_NULL)
2009 TCU_FAIL("Implementation must report format for VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR");
2010
2011 dstFormat = getRecommendedFormat(*dstUsageFormats, recommendedFormat);
2012
2013 if (dstFormat == VK_FORMAT_UNDEFINED)
2014 TCU_FAIL("Implementation must report format for VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR");
2015
2016 result.push_back(pair<VkFormat, VkImageUsageFlags>(dstFormat, dstFormatUsages));
2017 }
2018
2019 {
2020 const MovePtr<vector<VkFormat>> dpbUsageFormats = getSupportedFormats(vk, physicalDevice, dpbFormatUsages, videoProfileList);
2021
2022 if (dpbUsageFormats == DE_NULL)
2023 TCU_FAIL("Implementation must report format for VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR");
2024
2025 dpbFormat = getRecommendedFormat(*dpbUsageFormats, recommendedFormat);
2026
2027 result.push_back(pair<VkFormat, VkImageUsageFlags>(dpbFormat, dpbFormatUsages));
2028 }
2029 }
2030
2031 DE_ASSERT(result.size() == 2);
2032
2033 return result;
2034 }
2035
2036 /* Callback function to be registered for getting a callback when decoding of
2037 * sequence starts. Return value from HandleVideoSequence() are interpreted as :
2038 * 0: fail, 1: suceeded, > 1: override dpb size of parser (set by
2039 * nvVideoParseParameters::ulMaxNumDecodeSurfaces while creating parser)
2040 */
2041 int32_t VideoBaseDecoder::StartVideoSequence (const VulkanParserDetectedVideoFormat* pVideoFormat)
2042 {
2043 const InstanceInterface& vki = m_context.getInstanceInterface();
2044 const VkPhysicalDevice physDevice = m_context.getPhysicalDevice();
2045 const VkDevice device = getDevice();
2046 const DeviceInterface& vkd = getDeviceDriver();
2047 const deUint32 queueFamilyIndex = getQueueFamilyIndexDecode();
2048 Allocator& allocator = getAllocator();
2049 const VkDeviceSize bufferSize = 4 * 1024 * 1024;
2050
2051 DE_ASSERT(m_videoFrameBuffer != DE_NULL);
2052 DE_ASSERT(m_videoCodecOperation == pVideoFormat->codec); // Make sure video have same format the queue was created for
2053 DE_ASSERT(VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR == pVideoFormat->chromaSubsampling || VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR == pVideoFormat->chromaSubsampling || VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR == pVideoFormat->chromaSubsampling || VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR == pVideoFormat->chromaSubsampling);
2054
2055 const VkVideoCodecOperationFlagBitsKHR videoCodec = pVideoFormat->codec;
2056 const uint32_t maxDpbSlotCount = pVideoFormat->maxNumDpbSlots; // This is currently configured by the parser to maxNumDpbSlots from the stream plus 1 for the current slot on the fly
2057 const bool semiPlanarFormat = pVideoFormat->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR;
2058 const VkVideoChromaSubsamplingFlagBitsKHR chromaSubsampling = pVideoFormat->chromaSubsampling;
2059 const VkVideoComponentBitDepthFlagsKHR lumaBitDepth = getLumaBitDepth(pVideoFormat->bit_depth_luma_minus8);
2060 const VkVideoComponentBitDepthFlagsKHR chromaBitDepth = getChromaBitDepth(pVideoFormat->bit_depth_chroma_minus8);
2061 const VkFormat videoVkFormat = codecGetVkFormat(chromaSubsampling, lumaBitDepth, semiPlanarFormat);
2062 const VkExtent2D codedExtent = { pVideoFormat->coded_width, pVideoFormat->coded_height };
2063 const bool h264 = (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
2064 const bool h265 = (videoCodec == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
2065
2066 DE_ASSERT(pVideoFormat->coded_width <= 3840);
2067 DE_ASSERT(videoVkFormat == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM);
2068 DE_ASSERT(h264 || h265);
2069
2070 m_numDecodeSurfaces = std::max(m_gopSize * m_dpbCount, 4u);
2071
2072 const MovePtr<VkVideoDecodeH264ProfileInfoKHR> videoProfileExtentionH264 = getVideoProfileExtensionH264D();
2073 const MovePtr<VkVideoDecodeH265ProfileInfoKHR> videoProfileExtentionH265 = getVideoProfileExtensionH265D();
2074 void* videoProfileExtention = h264 ? (void*)videoProfileExtentionH264.get()
2075 : h265 ? (void*)videoProfileExtentionH265.get()
2076 : DE_NULL;
2077 const MovePtr<VkVideoProfileInfoKHR> videoProfile = getVideoProfile(videoCodec, videoProfileExtention, chromaSubsampling, lumaBitDepth, chromaBitDepth);
2078 const MovePtr<VkVideoProfileListInfoKHR> videoProfileList = getVideoProfileList(videoProfile.get());
2079
2080
2081 const MovePtr<VkVideoDecodeH264CapabilitiesKHR> videoCapabilitiesExtension264D = getVideoCapabilitiesExtensionH264D();
2082 const MovePtr<VkVideoDecodeH265CapabilitiesKHR> videoCapabilitiesExtension265D = getVideoCapabilitiesExtensionH265D();
2083 void* videoCapabilitiesExtension = h264 ? (void*)videoCapabilitiesExtension264D.get()
2084 : h265 ? (void*)videoCapabilitiesExtension265D.get()
2085 : DE_NULL;
2086 MovePtr<VkVideoDecodeCapabilitiesKHR> videoDecodeCapabilities = getVideoDecodeCapabilities(videoCapabilitiesExtension);
2087 const MovePtr<VkVideoCapabilitiesKHR> videoCapabilites = getVideoCapabilities(vki, physDevice, videoProfile.get(), videoDecodeCapabilities.get());
2088 const bool videoExtentSupported = validateVideoExtent(codedExtent, *videoCapabilites);
2089
2090 m_distinctDstDpbImages = (videoDecodeCapabilities->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) ? true : false;
2091
2092 const vector<pair<VkFormat, VkImageUsageFlags>> imageFormatAndUsageForOutputAndDPB = getImageFormatAndUsageForOutputAndDPB(vki, physDevice, videoProfileList.get(), videoVkFormat, m_distinctDstDpbImages);
2093
2094 const bool outFormatValidate = validateFormatSupport(vki, physDevice, imageFormatAndUsageForOutputAndDPB[0].second, videoProfileList.get(), imageFormatAndUsageForOutputAndDPB[0].first);
2095 const bool isVideoProfileSutable = validateVideoProfileList(vki, physDevice, videoProfileList.get(), imageFormatAndUsageForOutputAndDPB[0].first, imageFormatAndUsageForOutputAndDPB[0].second);
2096
2097 DE_UNREF(outFormatValidate);
2098 DE_UNREF(isVideoProfileSutable);
2099
2100 const VkFormat outPictureFormat = imageFormatAndUsageForOutputAndDPB[0].first;
2101 const VkImageUsageFlags outPictureUsage = imageFormatAndUsageForOutputAndDPB[0].second;
2102 const VkFormat dpbPictureFormat = imageFormatAndUsageForOutputAndDPB[1].first;
2103 const VkImageUsageFlags dpbPictureUsage = imageFormatAndUsageForOutputAndDPB[1].second;
2104 const deUint32 dpbPictureImageLayers = (videoCapabilites->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR) ? 1 : m_numDecodeSurfaces;
2105 const VkImageCreateInfo outImageCreateInfo = makeImageCreateInfo(outPictureFormat, codedExtent, &queueFamilyIndex, outPictureUsage, videoProfileList.get());
2106 const VkImageCreateInfo dpbImageCreateInfo = makeImageCreateInfo(dpbPictureFormat, codedExtent, &queueFamilyIndex, dpbPictureUsage, videoProfileList.get(), dpbPictureImageLayers);
2107 const VkImageCreateInfo* pDpbImageCreateInfo = dpbPictureUsage == static_cast<VkImageUsageFlags>(0) ? DE_NULL : &dpbImageCreateInfo;
2108
2109 if (m_width == 0 || m_height == 0)
2110 {
2111 const MovePtr<VkVideoSessionCreateInfoKHR> videoSessionCreateInfo = getVideoSessionCreateInfo(queueFamilyIndex,
2112 videoProfile.get(),
2113 codedExtent,
2114 outPictureFormat,
2115 dpbPictureFormat,
2116 maxDpbSlotCount,
2117 maxDpbSlotCount);
2118 Move<VkVideoSessionKHR> videoSession = createVideoSessionKHR(vkd, device, videoSessionCreateInfo.get());
2119 vector<AllocationPtr> allocations = getAndBindVideoSessionMemory(vkd, device, *videoSession, allocator);
2120
2121 DE_UNREF(videoExtentSupported);
2122
2123 m_minBitstreamBufferSizeAlignment = videoCapabilites->minBitstreamBufferSizeAlignment;
2124 m_minBitstreamBufferOffsetAlignment = videoCapabilites->minBitstreamBufferOffsetAlignment;
2125 m_videoDecodeSessionAllocs.swap(allocations);
2126
2127 m_videoDecodeSession = videoSession;
2128 m_videoCodecOperation = pVideoFormat->codec;
2129 m_chromaFormat = pVideoFormat->chromaSubsampling;
2130 m_bitLumaDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
2131 m_bitChromaDepthMinus8 = pVideoFormat->bit_depth_chroma_minus8;
2132 m_videoFormat = *pVideoFormat;
2133 m_codedWidth = pVideoFormat->coded_width;
2134 m_codedHeight = pVideoFormat->coded_height;
2135 m_width = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
2136 m_height = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
2137 m_maxDecodeFramesAllocated = std::max((uint32_t)m_frameCountTrigger, m_numDecodeSurfaces);
2138 m_maxDecodeFramesCount = m_numDecodeSurfaces;
2139
2140 DE_ASSERT(m_maxDecodeFramesCount <= m_maxDecodeFramesAllocated);
2141
2142 m_videoFrameBuffer->InitImagePool(vkd, device, queueFamilyIndex, allocator, m_maxDecodeFramesCount, m_maxDecodeFramesAllocated, &outImageCreateInfo, pDpbImageCreateInfo, videoProfile.get());
2143
2144 m_decodeFramesData = new NvVkDecodeFrameData[m_maxDecodeFramesAllocated];
2145 m_videoCommandPool = makeCommandPool(vkd, device, queueFamilyIndex);
2146
2147 for (uint32_t decodeFrameId = 0; decodeFrameId < m_maxDecodeFramesCount; decodeFrameId++)
2148 {
2149 m_decodeFramesData[decodeFrameId].bitstreamBuffer.CreateVideoBitstreamBuffer(vkd, device, allocator, bufferSize, m_minBitstreamBufferOffsetAlignment, m_minBitstreamBufferSizeAlignment, videoProfileList.get());
2150 m_decodeFramesData[decodeFrameId].commandBuffer = allocateCommandBuffer(vkd, device, *m_videoCommandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2151 }
2152 }
2153 else if (m_maxDecodeFramesCount < m_maxDecodeFramesAllocated)
2154 {
2155 const uint32_t firstIndex = m_maxDecodeFramesCount;
2156
2157 DE_ASSERT(m_maxDecodeFramesCount > 0);
2158
2159 m_maxDecodeFramesCount += m_gopSize;
2160
2161 DE_ASSERT(m_maxDecodeFramesCount <= m_maxDecodeFramesAllocated);
2162
2163 m_numDecodeSurfaces = m_maxDecodeFramesCount;
2164 m_codedWidth = pVideoFormat->coded_width;
2165 m_codedHeight = pVideoFormat->coded_height;
2166 m_width = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
2167 m_height = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
2168
2169 m_videoFrameBuffer->InitImagePool(vkd, device, queueFamilyIndex, allocator, m_maxDecodeFramesCount, m_maxDecodeFramesAllocated, &outImageCreateInfo, pDpbImageCreateInfo, videoProfile.get());
2170
2171 for (uint32_t decodeFrameId = firstIndex; decodeFrameId < m_maxDecodeFramesCount; decodeFrameId++)
2172 {
2173 m_decodeFramesData[decodeFrameId].bitstreamBuffer.CreateVideoBitstreamBuffer(vkd, device, allocator, bufferSize, m_minBitstreamBufferOffsetAlignment, m_minBitstreamBufferSizeAlignment, videoProfileList.get());
2174 m_decodeFramesData[decodeFrameId].commandBuffer = allocateCommandBuffer(vkd, device, *m_videoCommandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2175 }
2176 }
2177
2178 return m_numDecodeSurfaces;
2179 }
2180
2181 bool VideoBaseDecoder::UpdatePictureParametersHandler (NvidiaVulkanPictureParameters* pNvidiaVulkanPictureParameters,
2182 NvidiaSharedBaseObj<NvidiaParserVideoRefCountBase>& pictureParametersObject,
2183 uint64_t updateSequenceCount)
2184 {
2185 NvidiaSharedBaseObj<StdVideoPictureParametersSet> pictureParametersSet(StdVideoPictureParametersSet::Create(pNvidiaVulkanPictureParameters, updateSequenceCount));
2186
2187 DEBUGLOG(std::cout << "\tUpdatePictureParametersHandler::PPS:" << (void*)pictureParametersSet.Get() << std::endl);
2188
2189 if (!pictureParametersSet)
2190 {
2191 DE_ASSERT(0 && "Invalid pictureParametersSet");
2192 return false;
2193 }
2194
2195 const bool hasSpsPpsPair = AddPictureParametersToQueue(pictureParametersSet);
2196
2197 if (m_videoDecodeSession.get() != DE_NULL && hasSpsPpsPair)
2198 {
2199 FlushPictureParametersQueue();
2200 }
2201
2202 pictureParametersObject = pictureParametersSet;
2203
2204 return true;
2205 }
2206
2207 bool VideoBaseDecoder::AddPictureParametersToQueue (NvidiaSharedBaseObj<StdVideoPictureParametersSet>& pictureParametersSet)
2208 {
2209 if (!m_pictureParametersQueue.empty())
2210 {
2211 m_pictureParametersQueue.push(pictureParametersSet);
2212
2213 return false;
2214 }
2215
2216 bool isSps = false;
2217 int32_t spsId = pictureParametersSet->GetSpsId(isSps);
2218
2219 // Attempt to combine the pair of SPS/PPS to avid creatingPicture Parameter Objects
2220 if ((!!m_lastSpsPictureParametersQueue && !!m_lastPpsPictureParametersQueue) || // the last slots are already occupied
2221 (isSps && !!m_lastSpsPictureParametersQueue) || // the current one is SPS but SPS slot is already occupied
2222 (!isSps && !!m_lastPpsPictureParametersQueue) || // the current one is PPS but PPS slot is already occupied
2223 ((m_lastSpsIdInQueue != -1) && (m_lastSpsIdInQueue != spsId))) // This has a different spsId
2224 {
2225 if (m_lastSpsPictureParametersQueue)
2226 {
2227 m_pictureParametersQueue.push(m_lastSpsPictureParametersQueue);
2228 m_lastSpsPictureParametersQueue = DE_NULL;
2229 }
2230
2231 if (m_lastPpsPictureParametersQueue)
2232 {
2233 m_pictureParametersQueue.push(m_lastPpsPictureParametersQueue);
2234 m_lastPpsPictureParametersQueue = DE_NULL;
2235 }
2236
2237 m_pictureParametersQueue.push(pictureParametersSet);
2238
2239 m_lastSpsIdInQueue = -1;
2240
2241 return false;
2242 }
2243
2244 if (m_lastSpsIdInQueue == -1)
2245 {
2246 m_lastSpsIdInQueue = spsId;
2247 }
2248
2249 DE_ASSERT(m_lastSpsIdInQueue != -1);
2250
2251 if (isSps)
2252 {
2253 m_lastSpsPictureParametersQueue = pictureParametersSet;
2254 }
2255 else
2256 {
2257 m_lastPpsPictureParametersQueue = pictureParametersSet;
2258 }
2259
2260 uint32_t count = 0;
2261 if (m_lastSpsPictureParametersQueue)
2262 {
2263 count++;
2264 }
2265
2266 if (m_lastPpsPictureParametersQueue)
2267 {
2268 count++;
2269 }
2270
2271 return (count == 2);
2272 }
2273
2274 uint32_t VideoBaseDecoder::FlushPictureParametersQueue ()
2275 {
2276 uint32_t numQueueItems = 0;
2277 while (!m_pictureParametersQueue.empty())
2278 {
2279 NvidiaSharedBaseObj<StdVideoPictureParametersSet>& ppItem = m_pictureParametersQueue.front();
2280
2281 bool isSps = false;
2282 ppItem->GetSpsId(isSps);
2283
2284 NvidiaSharedBaseObj<StdVideoPictureParametersSet> emptyStdPictureParametersSet;
2285
2286 AddPictureParameters(isSps ? ppItem : emptyStdPictureParametersSet, isSps ? emptyStdPictureParametersSet : ppItem);
2287
2288 m_pictureParametersQueue.pop();
2289 numQueueItems++;
2290 }
2291
2292 if (numQueueItems)
2293 {
2294 return numQueueItems;
2295 }
2296
2297 if (!(m_lastSpsPictureParametersQueue || m_lastPpsPictureParametersQueue))
2298 {
2299 return 0;
2300 }
2301
2302 AddPictureParameters(m_lastSpsPictureParametersQueue, m_lastPpsPictureParametersQueue);
2303
2304 if (m_lastSpsPictureParametersQueue)
2305 {
2306 numQueueItems++;
2307 m_lastSpsPictureParametersQueue = DE_NULL;
2308 }
2309
2310 if (m_lastPpsPictureParametersQueue)
2311 {
2312 numQueueItems++;
2313 m_lastPpsPictureParametersQueue = DE_NULL;
2314 }
2315
2316 m_lastSpsIdInQueue = -1;
2317
2318 DE_ASSERT(numQueueItems);
2319
2320 return numQueueItems;
2321 }
2322
2323 bool VideoBaseDecoder::CheckStdObjectBeforeUpdate (NvidiaSharedBaseObj<StdVideoPictureParametersSet>& stdPictureParametersSet)
2324 {
2325 if (!stdPictureParametersSet)
2326 {
2327 return false;
2328 }
2329
2330 bool stdObjectUpdate = (stdPictureParametersSet->m_updateSequenceCount > 0);
2331
2332 if (!m_currentPictureParameters || stdObjectUpdate)
2333 {
2334 DE_ASSERT(m_videoDecodeSession.get() != DE_NULL);
2335 DE_ASSERT(stdObjectUpdate || (stdPictureParametersSet->m_vkVideoDecodeSession == DE_NULL));
2336 // DE_ASSERT(!stdObjectUpdate || stdPictureParametersSet->m_vkObjectOwner);
2337 // Create new Vulkan Picture Parameters object
2338 return true;
2339
2340 }
2341 else
2342 {
2343 // new std object
2344 DE_ASSERT(!stdPictureParametersSet->m_vkObjectOwner);
2345 DE_ASSERT(stdPictureParametersSet->m_vkVideoDecodeSession == DE_NULL);
2346 DE_ASSERT(m_currentPictureParameters);
2347 // Update the existing Vulkan Picture Parameters object
2348
2349 return false;
2350 }
2351 }
2352
2353 NvidiaParserVideoPictureParameters* NvidiaParserVideoPictureParameters::VideoPictureParametersFromBase (NvidiaParserVideoRefCountBase* pBase)
2354 {
2355 if (!pBase)
2356 return DE_NULL;
2357
2358 NvidiaParserVideoPictureParameters* result = dynamic_cast<NvidiaParserVideoPictureParameters*>(pBase);
2359
2360 if (result)
2361 return result;
2362
2363 TCU_THROW(InternalError, "Invalid NvidiaParserVideoPictureParameters from base");
2364 }
2365
2366 VkVideoSessionParametersKHR NvidiaParserVideoPictureParameters::GetVideoSessionParametersKHR () const
2367 {
2368 return m_sessionParameters.get();
2369 }
2370
2371 int32_t NvidiaParserVideoPictureParameters::GetId () const
2372 {
2373 return m_Id;
2374 }
2375
2376 bool NvidiaParserVideoPictureParameters::HasSpsId (uint32_t spsId) const
2377 {
2378 return m_spsIdsUsed[spsId];
2379 }
2380
2381 bool NvidiaParserVideoPictureParameters::HasPpsId (uint32_t ppsId) const
2382 {
2383 return m_ppsIdsUsed[ppsId];
2384 }
2385
2386 NvidiaParserVideoPictureParameters::NvidiaParserVideoPictureParameters (VkDevice device)
2387 : m_Id (-1)
2388 , m_refCount (0)
2389 , m_device (device)
2390 , m_sessionParameters ()
2391 {
2392 }
2393
2394 NvidiaParserVideoPictureParameters::~NvidiaParserVideoPictureParameters ()
2395 {
2396 }
2397
2398 VulkanVideoBitstreamBuffer::VulkanVideoBitstreamBuffer ()
2399 : m_bufferSize (0)
2400 , m_bufferOffsetAlignment (0)
2401 , m_bufferSizeAlignment (0)
2402 , m_buffer (DE_NULL)
2403 {
2404 }
2405
2406 const VkBuffer& VulkanVideoBitstreamBuffer::get ()
2407 {
2408 return m_buffer->get();
2409 }
2410
2411 VkResult VulkanVideoBitstreamBuffer::CreateVideoBitstreamBuffer (const DeviceInterface& vkd,
2412 VkDevice device,
2413 Allocator& allocator,
2414 VkDeviceSize bufferSize,
2415 VkDeviceSize bufferOffsetAlignment,
2416 VkDeviceSize bufferSizeAlignment,
2417 void* pNext,
2418 const unsigned char* pBitstreamData,
2419 VkDeviceSize bitstreamDataSize)
2420 {
2421 DestroyVideoBitstreamBuffer();
2422
2423 m_bufferSizeAlignment = bufferSizeAlignment;
2424 m_bufferSize = deAlign64(bufferSize, bufferSizeAlignment);
2425 m_bufferOffsetAlignment = bufferOffsetAlignment;
2426
2427 const VkBufferCreateInfo bufferCreateInfo =
2428 {
2429 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2430 pNext, // const void* pNext;
2431 0, // VkBufferCreateFlags flags;
2432 m_bufferSize, // VkDeviceSize size;
2433 VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, // VkBufferUsageFlags usage;
2434 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2435 0, // deUint32 queueFamilyIndexCount;
2436 DE_NULL, // const deUint32* pQueueFamilyIndices;
2437 };
2438
2439 m_buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible));
2440
2441 VK_CHECK(CopyVideoBitstreamToBuffer(vkd, device, pBitstreamData, bitstreamDataSize));
2442
2443 return VK_SUCCESS;
2444 }
2445
2446 VkResult VulkanVideoBitstreamBuffer::CopyVideoBitstreamToBuffer (const DeviceInterface& vkd,
2447 VkDevice device,
2448 const unsigned char* pBitstreamData,
2449 VkDeviceSize bitstreamDataSize)
2450 {
2451 if (pBitstreamData && bitstreamDataSize)
2452 {
2453 void* ptr = m_buffer->getAllocation().getHostPtr();
2454
2455 DE_ASSERT(bitstreamDataSize <= m_bufferSize);
2456
2457 //Copy Bitstream nvdec hw requires min bitstream size to be 16 (see bug 1599347). memset padding to 0 if bitstream size less than 16
2458 if (bitstreamDataSize < 16)
2459 deMemset(ptr, 0, 16);
2460
2461 deMemcpy(ptr, pBitstreamData, (size_t)bitstreamDataSize);
2462
2463 flushAlloc(vkd, device, m_buffer->getAllocation());
2464 }
2465
2466 return VK_SUCCESS;
2467 }
2468
2469 void VulkanVideoBitstreamBuffer::DestroyVideoBitstreamBuffer ()
2470 {
2471 m_buffer = de::MovePtr<BufferWithMemory>();
2472 m_bufferSize = 0;
2473 m_bufferOffsetAlignment = 0;
2474 m_bufferSizeAlignment = 0;
2475 }
2476
2477 VulkanVideoBitstreamBuffer::~VulkanVideoBitstreamBuffer ()
2478 {
2479 DestroyVideoBitstreamBuffer();
2480 }
2481
2482 VkDeviceSize VulkanVideoBitstreamBuffer::GetBufferSize ()
2483 {
2484 return m_bufferSize;
2485 }
2486
2487 VkDeviceSize VulkanVideoBitstreamBuffer::GetBufferOffsetAlignment ()
2488 {
2489 return m_bufferOffsetAlignment;
2490 }
2491
2492 NvidiaParserVideoPictureParameters* VideoBaseDecoder::CheckStdObjectAfterUpdate (NvidiaSharedBaseObj<StdVideoPictureParametersSet>& stdPictureParametersSet,
2493 NvidiaParserVideoPictureParameters* pNewPictureParametersObject)
2494 {
2495 if (!stdPictureParametersSet)
2496 {
2497 return DE_NULL;
2498 }
2499
2500 if (pNewPictureParametersObject)
2501 {
2502 if (stdPictureParametersSet->m_updateSequenceCount == 0)
2503 {
2504 stdPictureParametersSet->m_vkVideoDecodeSession = m_videoDecodeSession.get();
2505 }
2506 else
2507 {
2508 // DE_ASSERT(stdPictureParametersSet->m_vkObjectOwner);
2509 // DE_ASSERT(stdPictureParametersSet->m_vkVideoDecodeSession == m_vkVideoDecodeSession);
2510 const NvidiaParserVideoPictureParameters* pOwnerPictureParameters = NvidiaParserVideoPictureParameters::VideoPictureParametersFromBase(stdPictureParametersSet->m_vkObjectOwner);
2511
2512 if (pOwnerPictureParameters)
2513 {
2514 DE_ASSERT(pOwnerPictureParameters->GetId() < pNewPictureParametersObject->GetId());
2515 }
2516 }
2517
2518 // new object owner
2519 stdPictureParametersSet->m_vkObjectOwner = pNewPictureParametersObject;
2520 return pNewPictureParametersObject;
2521
2522 }
2523 else
2524 { // new std object
2525 stdPictureParametersSet->m_vkVideoDecodeSession = m_videoDecodeSession.get();
2526 stdPictureParametersSet->m_vkObjectOwner = m_currentPictureParameters;
2527 }
2528
2529 return m_currentPictureParameters;
2530 }
2531
2532 NvidiaParserVideoPictureParameters* VideoBaseDecoder::AddPictureParameters (NvidiaSharedBaseObj<StdVideoPictureParametersSet>& spsStdPictureParametersSet,
2533 NvidiaSharedBaseObj<StdVideoPictureParametersSet>& ppsStdPictureParametersSet)
2534 {
2535 const DeviceInterface& vkd = getDeviceDriver();
2536 const VkDevice device = getDevice();
2537 NvidiaParserVideoPictureParameters* pPictureParametersObject = DE_NULL;
2538 const bool createNewObject = CheckStdObjectBeforeUpdate(spsStdPictureParametersSet)
2539 || CheckStdObjectBeforeUpdate(ppsStdPictureParametersSet);
2540 #ifdef TODO
2541 if (createNewObject)
2542 #else
2543 DE_UNREF(createNewObject);
2544
2545 if (true)
2546 #endif
2547 {
2548 pPictureParametersObject = NvidiaParserVideoPictureParameters::Create(vkd, device, m_videoDecodeSession.get(), spsStdPictureParametersSet, ppsStdPictureParametersSet, m_currentPictureParameters);
2549 if (pPictureParametersObject)
2550 m_currentPictureParameters = pPictureParametersObject;
2551 }
2552 else
2553 {
2554 m_currentPictureParameters->Update(vkd, spsStdPictureParametersSet, ppsStdPictureParametersSet);
2555 }
2556
2557 CheckStdObjectAfterUpdate(spsStdPictureParametersSet, pPictureParametersObject);
2558 CheckStdObjectAfterUpdate(ppsStdPictureParametersSet, pPictureParametersObject);
2559
2560 return pPictureParametersObject;
2561 }
2562
2563 NvVkDecodeFrameData* VideoBaseDecoder::GetCurrentFrameData (uint32_t currentSlotId)
2564 {
2565 DE_ASSERT(currentSlotId < m_maxDecodeFramesCount);
2566
2567 return &m_decodeFramesData[currentSlotId];
2568 }
2569
2570 int32_t VideoBaseDecoder::ReleaseDisplayedFrame (DecodedFrame* pDisplayedFrame)
2571 {
2572 if (pDisplayedFrame->pictureIndex != -1)
2573 {
2574 DecodedFrameRelease decodedFramesRelease = { pDisplayedFrame->pictureIndex, 0, 0, 0, 0, 0 };
2575 DecodedFrameRelease* decodedFramesReleasePtr = &decodedFramesRelease;
2576
2577 pDisplayedFrame->pictureIndex = -1;
2578
2579 decodedFramesRelease.decodeOrder = pDisplayedFrame->decodeOrder;
2580 decodedFramesRelease.displayOrder = pDisplayedFrame->displayOrder;
2581
2582 decodedFramesRelease.hasConsummerSignalFence = pDisplayedFrame->hasConsummerSignalFence;
2583 decodedFramesRelease.hasConsummerSignalSemaphore = pDisplayedFrame->hasConsummerSignalSemaphore;
2584 decodedFramesRelease.timestamp = 0;
2585
2586 return m_videoFrameBuffer->ReleaseDisplayedPicture(&decodedFramesReleasePtr, 1);
2587 }
2588
2589 return -1;
2590 }
2591
2592 VideoFrameBuffer* VideoBaseDecoder::GetVideoFrameBuffer (void)
2593 {
2594 DE_ASSERT(m_videoFrameBuffer.get() != DE_NULL);
2595
2596 return m_videoFrameBuffer.get();
2597 }
2598
2599 IfcFfmpegFunctions* VideoBaseDecoder::GetIfcFfmpegFuncs (void)
2600 {
2601 DE_ASSERT(m_ffmpegFuncs.get() != DE_NULL);
2602
2603 return m_ffmpegFuncs.get();
2604 }
2605
2606 IfcNvFunctions* VideoBaseDecoder::GetNvFuncs (void)
2607 {
2608 DE_ASSERT(m_nvFuncs.get() != DE_NULL);
2609
2610 return m_nvFuncs.get();
2611 }
2612
2613 void* copyToHeap (HeapType& heap, const void* p, size_t size)
2614 {
2615 if (p == DE_NULL || size == 0)
2616 return DE_NULL;
2617
2618 heap.push_back(de::MovePtr<vector<deUint8>>(new vector<deUint8>(size)));
2619
2620 deMemcpy(heap.back()->data(), p, size);
2621
2622 return heap.back()->data();
2623 }
2624
2625 void appendHeap (HeapType& heapTo, HeapType& heapFrom)
2626 {
2627 heapTo.reserve(heapTo.size() + heapFrom.size());
2628
2629 for (auto& item : heapFrom)
2630 heapTo.push_back(de::MovePtr<vector<deUint8>>(item.release()));
2631
2632 heapFrom.clear();
2633 }
2634
2635 void appendPerFrameDecodeParameters (PerFrameDecodeParameters* pPerFrameDecodeParameters,
2636 vector<PerFrameDecodeParameters*>& perFrameDecodeParameters,
2637 HeapType& heap)
2638 {
2639 perFrameDecodeParameters.push_back(pPerFrameDecodeParameters);
2640
2641 pPerFrameDecodeParameters->pCurrentPictureParameters->AddRef();
2642
2643 if (pPerFrameDecodeParameters->bitstreamDataLen > 0)
2644 pPerFrameDecodeParameters->pBitstreamData = static_cast<unsigned char*>(copyToHeap(heap, pPerFrameDecodeParameters->pBitstreamData, static_cast<size_t>(deAlign64(pPerFrameDecodeParameters->bitstreamDataLen, 16))));
2645 }
2646
2647 /* Callback function to be registered for getting a callback when a decoded
2648 * frame is ready to be decoded. Return value from HandlePictureDecode() are
2649 * interpreted as: 0: fail, >=1: suceeded
2650 */
2651 int32_t VideoBaseDecoder::DecodePictureWithParameters (PerFrameDecodeParameters* pPerFrameDecodeParameters,
2652 VulkanParserDecodePictureInfo* pVulkanParserDecodePictureInfo,
2653 HeapType& heap)
2654 {
2655 DEBUGLOG(std::cout << "\tDecodePictureWithParameters:" << std::dec << m_pPerFrameDecodeParameters.size() << std::endl);
2656
2657 int32_t result = -1;
2658
2659 const size_t ndx = m_pPerFrameDecodeParameters.size();
2660
2661 FlushPictureParametersQueue();
2662
2663 appendHeap(incSizeSafe(m_heaps), heap);
2664
2665 m_pVulkanParserDecodePictureInfo.push_back((VulkanParserDecodePictureInfo*)copyToHeap(m_heaps[ndx], pVulkanParserDecodePictureInfo, sizeof(*pVulkanParserDecodePictureInfo)));
2666 appendPerFrameDecodeParameters(pPerFrameDecodeParameters, m_pPerFrameDecodeParameters, m_heaps[ndx]);
2667
2668 result = pPerFrameDecodeParameters->currPicIdx;
2669
2670 if (m_pPerFrameDecodeParameters.size() >= (size_t)m_frameCountTrigger)
2671 result = DecodeCachedPictures();
2672
2673 return result;
2674 }
2675
2676 void VideoBaseDecoder::ReinitCaches (void)
2677 {
2678 const size_t size = m_frameCountTrigger;
2679
2680 for (auto& it : m_pPerFrameDecodeParameters)
2681 it->pCurrentPictureParameters->Release();
2682
2683 m_pPerFrameDecodeParameters.clear();
2684 m_pVulkanParserDecodePictureInfo.clear();
2685 m_pFrameDatas.clear();
2686 m_bitstreamBufferMemoryBarriers.clear();
2687 m_imageBarriersVec.clear();
2688 m_frameSynchronizationInfos.clear();
2689 m_commandBufferSubmitInfos.clear();
2690 m_decodeBeginInfos.clear();
2691 m_pictureResourcesInfos.clear();
2692 m_dependencyInfos.clear();
2693 m_decodeEndInfos.clear();
2694 m_submitInfos.clear();
2695 m_frameCompleteFences.clear();
2696 m_frameConsumerDoneFences.clear();
2697 m_frameCompleteSemaphoreSubmitInfos.clear();
2698 m_frameConsumerDoneSemaphoreSubmitInfos.clear();
2699 m_heaps.clear();
2700
2701 // Make sure pointers will stay consistent
2702 m_pPerFrameDecodeParameters.reserve(size);
2703 m_pVulkanParserDecodePictureInfo.reserve(size);
2704 m_pFrameDatas.reserve(size);
2705 m_bitstreamBufferMemoryBarriers.reserve(size);
2706 m_imageBarriersVec.reserve(size);
2707 m_frameSynchronizationInfos.reserve(size);
2708 m_commandBufferSubmitInfos.reserve(size);
2709 m_decodeBeginInfos.reserve(size);
2710 m_pictureResourcesInfos.reserve(size);
2711 m_dependencyInfos.reserve(size);
2712 m_decodeEndInfos.reserve(size);
2713 m_submitInfos.reserve(size);
2714 m_frameCompleteFences.reserve(size);
2715 m_frameConsumerDoneFences.reserve(size);
2716 m_frameCompleteSemaphoreSubmitInfos.reserve(size);
2717 m_frameConsumerDoneSemaphoreSubmitInfos.reserve(size);
2718 m_heaps.reserve(size);
2719 }
2720
2721 int32_t VideoBaseDecoder::DecodeCachedPictures (VideoBaseDecoder* friendDecoder,
2722 bool waitSubmitted)
2723 {
2724 DEBUGLOG(std::cout << "DecodeCachedPictures" << std::endl);
2725
2726 const DeviceInterface& vkd = getDeviceDriver();
2727 const VkDevice device = getDevice();
2728 const deUint32 queueFamilyIndex = getQueueFamilyIndexDecode();
2729 const size_t ndxMax = m_pPerFrameDecodeParameters.size();
2730 const bool interleaved = friendDecoder != DE_NULL || !waitSubmitted;
2731 vector<size_t> recordOrderIndices;
2732
2733 DE_ASSERT(m_minBitstreamBufferSizeAlignment != 0);
2734 DE_ASSERT(m_videoDecodeSession.get() != DE_NULL);
2735
2736 m_pFrameDatas.resize(ndxMax);
2737 m_bitstreamBufferMemoryBarriers.resize(ndxMax);
2738 m_imageBarriersVec.resize(ndxMax);
2739 m_frameSynchronizationInfos.resize(ndxMax);
2740 m_commandBufferSubmitInfos.resize(ndxMax);
2741 m_decodeBeginInfos.resize(ndxMax);
2742 m_pictureResourcesInfos.resize(ndxMax);
2743 m_dependencyInfos.resize(ndxMax);
2744 m_decodeEndInfos.resize(ndxMax);
2745 m_submitInfos.resize(ndxMax);
2746 m_frameCompleteFences.resize(ndxMax);
2747 m_frameConsumerDoneSemaphoreSubmitInfos.resize(ndxMax);
2748 m_frameCompleteSemaphoreSubmitInfos.resize(ndxMax);
2749
2750 for (size_t ndx = 0; ndx < ndxMax; ++ndx)
2751 {
2752 const size_t picNdx = ndx;
2753 PerFrameDecodeParameters* pPicParams = m_pPerFrameDecodeParameters[picNdx];
2754 vector<VkImageMemoryBarrier2KHR>& imageBarriers = m_imageBarriersVec[picNdx];
2755 VideoFrameBuffer::FrameSynchronizationInfo& frameSynchronizationInfo = m_frameSynchronizationInfos[picNdx];
2756 NvVkDecodeFrameData*& pFrameData = m_pFrameDatas[picNdx];
2757 vector<VideoFrameBuffer::PictureResourceInfo>& pictureResourcesInfo = m_pictureResourcesInfos[picNdx];
2758 VkBufferMemoryBarrier2KHR& bitstreamBufferMemoryBarrier = m_bitstreamBufferMemoryBarriers[picNdx];
2759 VkFence& frameCompleteFence = m_frameCompleteFences[picNdx];
2760 VulkanParserDecodePictureInfo* pDecodePictureInfo = m_pVulkanParserDecodePictureInfo[picNdx];
2761 const int32_t currPicIdx = pPicParams->currPicIdx;
2762
2763 DE_ASSERT((uint32_t)currPicIdx < m_numDecodeSurfaces);
2764
2765 m_videoFrameBuffer->SetPicNumInDecodeOrder(currPicIdx, ++m_decodePicCount);
2766
2767 pFrameData = GetCurrentFrameData((uint32_t)currPicIdx);
2768
2769 DE_ASSERT(pFrameData->bitstreamBuffer.GetBufferSize() >= pPicParams->bitstreamDataLen);
2770
2771 pFrameData->bitstreamBuffer.CopyVideoBitstreamToBuffer(vkd, device, pPicParams->pBitstreamData, pPicParams->bitstreamDataLen);
2772
2773 pPicParams->decodeFrameInfo.srcBuffer = pFrameData->bitstreamBuffer.get();
2774 pPicParams->decodeFrameInfo.srcBufferOffset = 0;
2775 pPicParams->decodeFrameInfo.srcBufferRange = deAlign64((VkDeviceSize)pPicParams->bitstreamDataLen, m_minBitstreamBufferSizeAlignment);
2776
2777 DE_ASSERT(pPicParams->decodeFrameInfo.srcBuffer != DE_NULL);
2778
2779 bitstreamBufferMemoryBarrier =
2780 {
2781 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2_KHR, // VkStructureType sType;
2782 DE_NULL, // const void* pNext;
2783 VK_PIPELINE_STAGE_2_NONE_KHR, // VkPipelineStageFlags2KHR srcStageMask;
2784 0, // VkAccessFlags2KHR srcAccessMask;
2785 VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, // VkPipelineStageFlags2KHR dstStageMask;
2786 VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR, // VkAccessFlags2KHR dstAccessMask;
2787 queueFamilyIndex, // deUint32 srcQueueFamilyIndex;
2788 queueFamilyIndex, // deUint32 dstQueueFamilyIndex;
2789 pPicParams->decodeFrameInfo.srcBuffer, // VkBuffer buffer;
2790 pPicParams->decodeFrameInfo.srcBufferOffset, // VkDeviceSize offset;
2791 pPicParams->decodeFrameInfo.srcBufferRange // VkDeviceSize size;
2792 };
2793
2794 imageBarriers.reserve(2 * PerFrameDecodeParameters::MAX_DPB_REF_SLOTS);
2795
2796 const VkImageMemoryBarrier2KHR dpbBarrierTemplate =
2797 {
2798 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR, // VkStructureType sType;
2799 DE_NULL, // const void* pNext;
2800 VK_PIPELINE_STAGE_2_NONE_KHR, // VkPipelineStageFlags2KHR srcStageMask;
2801 0, // VkAccessFlags2KHR srcAccessMask;
2802 VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, // VkPipelineStageFlags2KHR dstStageMask;
2803 VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR, // VkAccessFlags2KHR dstAccessMask;
2804 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout;
2805 VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, // VkImageLayout newLayout;
2806 queueFamilyIndex, // deUint32 srcQueueFamilyIndex;
2807 queueFamilyIndex, // deUint32 dstQueueFamilyIndex;
2808 DE_NULL, // VkImage image;
2809 { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1, } // VkImageSubresourceRange subresourceRange;
2810 };
2811
2812 {
2813 const VkImageLayout newLayout = m_distinctDstDpbImages ? VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR : VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
2814 VkVideoPictureResourceInfoKHR& pictureResource = pPicParams->decodeFrameInfo.dstPictureResource;
2815 VideoFrameBuffer::PictureResourceInfo currentPictureResource = { DE_NULL, VK_IMAGE_LAYOUT_UNDEFINED };
2816
2817 m_videoFrameBuffer->GetImageResourcesByIndex((int8_t)pPicParams->currPicIdx, &pictureResource, ¤tPictureResource, newLayout);
2818
2819 DEBUGLOG(std::cout << "PicNdx: " << std::dec << (int32_t)picNdx << " " << currentPictureResource.image << std::endl);
2820
2821 if (currentPictureResource.currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED)
2822 {
2823 VkImageMemoryBarrier2KHR& imageBarrier = incSizeSafe(imageBarriers);
2824
2825 imageBarrier = dpbBarrierTemplate;
2826 imageBarrier.oldLayout = currentPictureResource.currentImageLayout;
2827 imageBarrier.newLayout = newLayout;
2828 imageBarrier.image = currentPictureResource.image;
2829 imageBarrier.dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR;
2830
2831 DEBUGLOG(std::cout << "\tTransit DST: " << imageBarrier.image << " from " << imageBarrier.oldLayout << std::endl);
2832
2833 DE_ASSERT(imageBarrier.image != DE_NULL);
2834 }
2835 }
2836
2837 if (m_distinctDstDpbImages)
2838 {
2839 const VkImageLayout newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
2840 VkVideoPictureResourceInfoKHR& pictureResource = pPicParams->pictureResources[pPicParams->numGopReferenceSlots];
2841 VideoFrameBuffer::PictureResourceInfo currentPictureResource = { DE_NULL, VK_IMAGE_LAYOUT_UNDEFINED };
2842
2843 m_videoFrameBuffer->GetImageResourcesByIndex((int8_t)pPicParams->currPicIdx, &pictureResource, ¤tPictureResource, newLayout);
2844
2845 DEBUGLOG(std::cout << "PicNdx: " << std::dec << (int32_t)picNdx << " " << currentPictureResource.image << std::endl);
2846
2847 if (currentPictureResource.currentImageLayout == VK_IMAGE_LAYOUT_UNDEFINED)
2848 {
2849 VkImageMemoryBarrier2KHR& imageBarrier = incSizeSafe(imageBarriers);
2850
2851 imageBarrier = dpbBarrierTemplate;
2852 imageBarrier.oldLayout = currentPictureResource.currentImageLayout;
2853 imageBarrier.newLayout = newLayout;
2854 imageBarrier.image = currentPictureResource.image;
2855 imageBarrier.dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR;
2856 imageBarrier.subresourceRange.baseArrayLayer = pPicParams->decodeFrameInfo.dstPictureResource.baseArrayLayer;
2857
2858 DEBUGLOG(std::cout << "\tTransit DPB: " << imageBarrier.image << ":" << imageBarrier.subresourceRange.baseArrayLayer << " from " << imageBarrier.oldLayout << std::endl);
2859
2860 DE_ASSERT(imageBarrier.image != DE_NULL);
2861 }
2862 }
2863
2864
2865 stringstream s;
2866
2867 s << "\tGOP:" << std::dec << (int32_t)pPicParams->numGopReferenceSlots << " ( ";
2868
2869 if (pPicParams->numGopReferenceSlots)
2870 {
2871 const VkImageLayout newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
2872
2873 pictureResourcesInfo.resize(PerFrameDecodeParameters::MAX_DPB_REF_SLOTS);
2874
2875 deMemset(pictureResourcesInfo.data(), 0, sizeof(pictureResourcesInfo[0]) * pictureResourcesInfo.size());
2876
2877 m_videoFrameBuffer->GetImageResourcesByIndex(pPicParams->numGopReferenceSlots, &pPicParams->pGopReferenceImagesIndexes[0], pPicParams->pictureResources, pictureResourcesInfo.data(), newLayout);
2878
2879 for (int32_t resId = 0; resId < pPicParams->numGopReferenceSlots; resId++)
2880 {
2881 s << std::dec << (int32_t)pPicParams->pGopReferenceImagesIndexes[resId] << ":" << pictureResourcesInfo[resId].image << " ";
2882
2883 // slotLayer requires NVIDIA specific extension VK_KHR_video_layers, not enabled, just yet.
2884 // pGopReferenceSlots[resId].slotLayerIndex = 0;
2885 // pictureResourcesInfo[resId].image can be a DE_NULL handle if the picture is not-existent.
2886 if (pictureResourcesInfo[resId].image != DE_NULL && pictureResourcesInfo[resId].currentImageLayout != newLayout)
2887 {
2888 VkImageMemoryBarrier2KHR& imageBarrier = incSizeSafe(imageBarriers);
2889
2890 imageBarrier = dpbBarrierTemplate;
2891 imageBarrier.oldLayout = pictureResourcesInfo[resId].currentImageLayout;
2892 imageBarrier.newLayout = newLayout;
2893 imageBarrier.image = pictureResourcesInfo[resId].image;
2894
2895 DEBUGLOG(std::cout << "\tTransit DPB: " << imageBarrier.image << " from " << imageBarrier.oldLayout << std::endl);
2896
2897 pictureResourcesInfo[resId].currentImageLayout = imageBarrier.newLayout;
2898
2899 DE_ASSERT(imageBarrier.image != DE_NULL);
2900 }
2901 }
2902 }
2903
2904 DEBUGLOG(std::cout << s.str() << ")" << std::endl);
2905
2906 if (pDecodePictureInfo->flags.unpairedField)
2907 pDecodePictureInfo->flags.syncFirstReady = true;
2908
2909 pDecodePictureInfo->flags.syncToFirstField = false;
2910
2911 frameSynchronizationInfo.hasFrameCompleteSignalFence = true;
2912 frameSynchronizationInfo.hasFrameCompleteSignalSemaphore = true;
2913
2914 int32_t retVal = m_videoFrameBuffer->QueuePictureForDecode((int8_t)currPicIdx, pDecodePictureInfo, pPicParams->pCurrentPictureParameters->m_vkObjectOwner, &frameSynchronizationInfo);
2915
2916 if (currPicIdx != retVal)
2917 DE_ASSERT(0 && "QueuePictureForDecode has failed");
2918
2919 frameCompleteFence = frameSynchronizationInfo.frameCompleteFence;
2920 }
2921
2922 recordOrderIndices.reserve(ndxMax);
2923 for (size_t ndx = 0; ndx < ndxMax; ++ndx)
2924 recordOrderIndices.push_back(ndx);
2925
2926 if (m_randomOrSwapped)
2927 {
2928 if (ndxMax == 2)
2929 {
2930 std::swap(recordOrderIndices[0], recordOrderIndices[1]);
2931 }
2932 else
2933 {
2934 de::Random rnd(0);
2935
2936 DE_ASSERT(recordOrderIndices.size() % m_gopSize == 0);
2937
2938 for (vector<size_t>::iterator it = recordOrderIndices.begin();
2939 it != recordOrderIndices.end();
2940 it += m_gopSize)
2941 {
2942 rnd.shuffle(it, it + m_gopSize);
2943 }
2944 }
2945 }
2946
2947 for (size_t ndx = 0; ndx < ndxMax; ++ndx)
2948 {
2949 const size_t picNdx = recordOrderIndices[ndx];
2950 PerFrameDecodeParameters* pPicParams = m_pPerFrameDecodeParameters[picNdx];
2951 vector<VkImageMemoryBarrier2KHR>& imageBarriers = m_imageBarriersVec[picNdx];
2952 VideoFrameBuffer::FrameSynchronizationInfo& frameSynchronizationInfo = m_frameSynchronizationInfos[picNdx];
2953 NvVkDecodeFrameData*& pFrameData = m_pFrameDatas[picNdx];
2954 VkBufferMemoryBarrier2KHR& bitstreamBufferMemoryBarrier = m_bitstreamBufferMemoryBarriers[picNdx];
2955 VkVideoBeginCodingInfoKHR& decodeBeginInfo = m_decodeBeginInfos[picNdx];
2956 VkDependencyInfoKHR& dependencyInfo = m_dependencyInfos[picNdx];
2957 VkVideoEndCodingInfoKHR& decodeEndInfo = m_decodeEndInfos[picNdx];
2958 VkCommandBufferSubmitInfoKHR& commandBufferSubmitInfo = m_commandBufferSubmitInfos[picNdx];
2959 VkCommandBuffer& commandBuffer = commandBufferSubmitInfo.commandBuffer;
2960 const VkVideoCodingControlInfoKHR videoCodingControlInfoKHR =
2961 {
2962 VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, // VkStructureType sType;
2963 DE_NULL, // const void* pNext;
2964 VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR, // VkVideoCodingControlFlagsKHR flags;
2965 };
2966
2967 commandBufferSubmitInfo =
2968 {
2969 VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR, // VkStructureType sType;
2970 DE_NULL, // const void* pNext;
2971 pFrameData->commandBuffer.get(), // VkCommandBuffer commandBuffer;
2972 0u, // uint32_t deviceMask;
2973 };
2974
2975 // Effectively its done above
2976 commandBuffer = pFrameData->commandBuffer.get();
2977
2978 DEBUGLOG(std::cout << "PicNdx: " << std::dec << picNdx << " commandBuffer:" << commandBuffer << std::endl);
2979
2980 DE_ASSERT(pPicParams->pCurrentPictureParameters->m_vkObjectOwner);
2981 const NvidiaParserVideoPictureParameters* pOwnerPictureParameters = NvidiaParserVideoPictureParameters::VideoPictureParametersFromBase(pPicParams->pCurrentPictureParameters->m_vkObjectOwner);
2982 DE_ASSERT(pOwnerPictureParameters);
2983 //DE_ASSERT(pOwnerPictureParameters->GetId() <= m_currentPictureParameters->GetId());
2984
2985 bool isSps = false;
2986 int32_t spsId = pPicParams->pCurrentPictureParameters->GetSpsId(isSps);
2987 DE_ASSERT(!isSps);
2988 DE_ASSERT(spsId >= 0);
2989 DE_ASSERT(pOwnerPictureParameters->HasSpsId(spsId));
2990 DE_UNREF(spsId);
2991
2992 bool isPps = false;
2993 int32_t ppsId = pPicParams->pCurrentPictureParameters->GetPpsId(isPps);
2994 DE_ASSERT(isPps);
2995 DE_ASSERT(ppsId >= 0);
2996 DE_ASSERT(pOwnerPictureParameters->HasPpsId(ppsId));
2997 DE_UNREF(ppsId);
2998
2999 beginCommandBuffer(vkd, commandBuffer);
3000
3001 DEBUGLOG(std::cout << "beginCommandBuffer " << commandBuffer << " VkVideoSessionParametersKHR:" << pOwnerPictureParameters->GetVideoSessionParametersKHR() << std::endl);
3002
3003 const uint32_t referenceSlotCount = pPicParams->decodeFrameInfo.referenceSlotCount;
3004
3005 decodeBeginInfo =
3006 {
3007 VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, // VkStructureType sType;
3008 DE_NULL, // const void* pNext;
3009 0u, // VkVideoBeginCodingFlagsKHR flags;
3010 m_videoDecodeSession.get(), // VkVideoSessionKHR videoSession;
3011 pOwnerPictureParameters->GetVideoSessionParametersKHR(), // VkVideoSessionParametersKHR videoSessionParameters;
3012 referenceSlotCount, // uint32_t referenceSlotCount;
3013 pPicParams->decodeFrameInfo.pReferenceSlots, // const VkVideoReferenceSlotInfoKHR* pReferenceSlots;
3014 };
3015 dependencyInfo =
3016 {
3017 VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR, // VkStructureType sType;
3018 DE_NULL, // const void* pNext;
3019 VK_DEPENDENCY_BY_REGION_BIT, // VkDependencyFlags dependencyFlags;
3020 0, // deUint32 memoryBarrierCount;
3021 DE_NULL, // const VkMemoryBarrier2KHR* pMemoryBarriers;
3022 1, // deUint32 bufferMemoryBarrierCount;
3023 &bitstreamBufferMemoryBarrier, // const VkBufferMemoryBarrier2KHR* pBufferMemoryBarriers;
3024 (uint32_t)imageBarriers.size(), // deUint32 imageMemoryBarrierCount;
3025 dataOrNullPtr(imageBarriers), // const VkImageMemoryBarrier2KHR* pImageMemoryBarriers;
3026 };
3027 decodeEndInfo =
3028 {
3029 VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, // VkStructureType sType;
3030 DE_NULL, // const void* pNext;
3031 0, // VkVideoEndCodingFlagsKHR flags;
3032 };
3033
3034 if (m_queryResultWithStatus)
3035 vkd.cmdResetQueryPool(commandBuffer, frameSynchronizationInfo.queryPool, frameSynchronizationInfo.startQueryId, frameSynchronizationInfo.numQueries);
3036
3037 vkd.cmdBeginVideoCodingKHR(commandBuffer, &decodeBeginInfo);
3038
3039 if (picNdx == 0)
3040 vkd.cmdControlVideoCodingKHR(commandBuffer, &videoCodingControlInfoKHR);
3041
3042 vkd.cmdPipelineBarrier2(commandBuffer, &dependencyInfo);
3043
3044 if (m_queryResultWithStatus)
3045 vkd.cmdBeginQuery(commandBuffer, frameSynchronizationInfo.queryPool, frameSynchronizationInfo.startQueryId, 0u);
3046
3047 vkd.cmdDecodeVideoKHR(commandBuffer, &pPicParams->decodeFrameInfo);
3048
3049 stringstream s;
3050
3051 s << "Slots: " << (int32_t)pPicParams->decodeFrameInfo.referenceSlotCount << ": ( ";
3052 for (uint32_t i = 0; i < pPicParams->decodeFrameInfo.referenceSlotCount; i++)
3053 s << std::dec << (int32_t)pPicParams->decodeFrameInfo.pReferenceSlots[i].slotIndex << " ";
3054
3055 DEBUGLOG(std::cout << s.str() << ")" << std::endl);
3056
3057 if (m_queryResultWithStatus)
3058 vkd.cmdEndQuery(commandBuffer, frameSynchronizationInfo.queryPool, frameSynchronizationInfo.startQueryId);
3059
3060 vkd.cmdEndVideoCodingKHR(commandBuffer, &decodeEndInfo);
3061
3062 endCommandBuffer(vkd, commandBuffer);
3063
3064 DEBUGLOG(std::cout << "endCommandBuffer " << commandBuffer << std::endl);
3065
3066 if (!m_submitAfter)
3067 SubmitQueue(&commandBufferSubmitInfo, &m_submitInfos[picNdx], &m_frameSynchronizationInfos[picNdx], &m_frameConsumerDoneSemaphoreSubmitInfos[picNdx], &m_frameCompleteSemaphoreSubmitInfos[picNdx]);
3068 }
3069
3070 if (m_submitAfter && !interleaved)
3071 {
3072 for (size_t ndx = 0; ndx < recordOrderIndices.size(); ++ndx)
3073 SubmitQueue(&m_commandBufferSubmitInfos[ndx], &m_submitInfos[ndx], &m_frameSynchronizationInfos[ndx], &m_frameConsumerDoneSemaphoreSubmitInfos[ndx], &m_frameCompleteSemaphoreSubmitInfos[ndx]);
3074 }
3075
3076 m_frameConsumerDoneSemaphoreSubmitInfos.clear();
3077 m_frameCompleteSemaphoreSubmitInfos.clear();
3078
3079 if (interleaved)
3080 {
3081 for (uint32_t ndx = 0; ndx < ndxMax; ++ndx)
3082 {
3083 if (m_frameSynchronizationInfos[ndx].frameConsumerDoneFence != DE_NULL)
3084 m_frameConsumerDoneFences.push_back(m_frameSynchronizationInfos[ndx].frameConsumerDoneFence);
3085
3086 if (m_frameSynchronizationInfos[ndx].frameCompleteSemaphore != DE_NULL)
3087 m_frameCompleteSemaphoreSubmitInfos.push_back(makeSemaphoreSubmitInfo(m_frameSynchronizationInfos[ndx].frameCompleteSemaphore, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR));
3088
3089 if (m_frameSynchronizationInfos[ndx].frameConsumerDoneSemaphore != DE_NULL)
3090 m_frameConsumerDoneSemaphoreSubmitInfos.push_back(makeSemaphoreSubmitInfo(m_frameSynchronizationInfos[ndx].frameConsumerDoneSemaphore, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR));
3091 }
3092
3093 if (friendDecoder != DE_NULL)
3094 {
3095 friendDecoder->DecodeCachedPictures(DE_NULL, false);
3096
3097 for (uint32_t ndx = 0; ndx < ndxMax; ++ndx)
3098 {
3099 if (friendDecoder->m_frameSynchronizationInfos[ndx].frameConsumerDoneFence != DE_NULL)
3100 m_frameConsumerDoneFences.push_back(friendDecoder->m_frameSynchronizationInfos[ndx].frameConsumerDoneFence);
3101
3102 if (friendDecoder->m_frameSynchronizationInfos[ndx].frameCompleteSemaphore != DE_NULL)
3103 m_frameCompleteSemaphoreSubmitInfos.push_back(makeSemaphoreSubmitInfo(friendDecoder->m_frameSynchronizationInfos[ndx].frameCompleteSemaphore, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR));
3104
3105 if (friendDecoder->m_frameSynchronizationInfos[ndx].frameConsumerDoneSemaphore != DE_NULL)
3106 m_frameConsumerDoneSemaphoreSubmitInfos.push_back(makeSemaphoreSubmitInfo(friendDecoder->m_frameSynchronizationInfos[ndx].frameConsumerDoneSemaphore, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR));
3107 }
3108 }
3109
3110 if (waitSubmitted)
3111 {
3112 vector<VkCommandBufferSubmitInfoKHR> commandBufferSubmitInfos;
3113
3114 DE_ASSERT(m_commandBufferSubmitInfos.size() == friendDecoder->m_commandBufferSubmitInfos.size());
3115
3116 commandBufferSubmitInfos.reserve(m_commandBufferSubmitInfos.size() + friendDecoder->m_commandBufferSubmitInfos.size());
3117
3118 for (uint32_t ndx = 0; ndx < ndxMax; ++ndx)
3119 {
3120 incSizeSafe(commandBufferSubmitInfos) = m_commandBufferSubmitInfos[ndx];
3121 incSizeSafe(commandBufferSubmitInfos) = friendDecoder->m_commandBufferSubmitInfos[ndx];
3122 }
3123
3124 SubmitQueue(
3125 commandBufferSubmitInfos,
3126 &m_submitInfos[ndxMax - 1],
3127 m_frameCompleteFences[ndxMax - 1],
3128 m_frameConsumerDoneFences,
3129 m_frameCompleteSemaphoreSubmitInfos,
3130 m_frameConsumerDoneSemaphoreSubmitInfos);
3131 }
3132 }
3133
3134 if (waitSubmitted)
3135 {
3136 VK_CHECK(vkd.waitForFences(device, (uint32_t)m_frameCompleteFences.size(), m_frameCompleteFences.data(), true, ~0ull));
3137
3138 m_frameCompleteFences.clear();
3139 m_frameConsumerDoneFences.clear();
3140 m_frameCompleteSemaphoreSubmitInfos.clear();
3141 m_frameConsumerDoneSemaphoreSubmitInfos.clear();
3142
3143 if (friendDecoder != DE_NULL)
3144 friendDecoder->ReinitCaches();
3145 }
3146
3147 if (m_queryResultWithStatus)
3148 {
3149 for (size_t ndx = 0; ndx < ndxMax; ++ndx)
3150 {
3151 struct nvVideoGetDecodeStatus
3152 {
3153 VkQueryResultStatusKHR decodeStatus;
3154 uint32_t hwCyclesCount; // OUT: HW cycle count per frame
3155 uint32_t hwStatus; // OUT: HW decode status
3156 uint32_t mbsCorrectlyDecoded; // total numers of correctly decoded macroblocks
3157 uint32_t mbsInError; // number of error macroblocks.
3158 uint16_t instanceId; // OUT: nvdec instance id
3159 uint16_t reserved1; // Reserved for future use
3160 } queryResult;
3161
3162 VkResult result = vkd.getQueryPoolResults(device,
3163 m_frameSynchronizationInfos[ndx].queryPool,
3164 m_frameSynchronizationInfos[ndx].startQueryId,
3165 1,
3166 sizeof(queryResult),
3167 &queryResult,
3168 sizeof(queryResult),
3169 VK_QUERY_RESULT_WITH_STATUS_BIT_KHR | VK_QUERY_RESULT_WAIT_BIT);
3170
3171 if (queryResult.decodeStatus != VK_QUERY_RESULT_STATUS_COMPLETE_KHR)
3172 TCU_FAIL("VK_QUERY_RESULT_STATUS_COMPLETE_KHR expected");
3173
3174 //TCU_FAIL("TODO: nvVideoGetDecodeStatus is not specified in spec");
3175
3176 DE_UNREF(result);
3177 }
3178 }
3179
3180 const int result = m_pPerFrameDecodeParameters[m_pPerFrameDecodeParameters.size() - 1]->currPicIdx;
3181
3182 if (waitSubmitted)
3183 ReinitCaches();
3184
3185 return result;
3186 }
3187
3188 void VideoBaseDecoder::SubmitQueue (VkCommandBufferSubmitInfoKHR* commandBufferSubmitInfo,
3189 VkSubmitInfo2KHR* submitInfo,
3190 VideoFrameBuffer::FrameSynchronizationInfo* frameSynchronizationInfo,
3191 VkSemaphoreSubmitInfoKHR* frameConsumerDoneSemaphore,
3192 VkSemaphoreSubmitInfoKHR* frameCompleteSemaphore)
3193 {
3194 const DeviceInterface& vkd = getDeviceDriver();
3195 const VkDevice device = getDevice();
3196 const VkQueue queue = getQueueDecode();
3197 const deUint32 waitSemaphoreCount = (frameSynchronizationInfo->frameConsumerDoneSemaphore == DE_NULL) ? 0u : 1u;
3198 const deUint32 signalSemaphoreInfoCount = (frameSynchronizationInfo->frameCompleteSemaphore == DE_NULL) ? 0u : 1u;
3199
3200 *frameConsumerDoneSemaphore = makeSemaphoreSubmitInfo(frameSynchronizationInfo->frameConsumerDoneSemaphore, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR);
3201 *frameCompleteSemaphore = makeSemaphoreSubmitInfo(frameSynchronizationInfo->frameCompleteSemaphore, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR);
3202
3203 *submitInfo =
3204 {
3205 VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, // VkStructureType sType;
3206 DE_NULL, // const void* pNext;
3207 0u, // VkSubmitFlagsKHR flags;
3208 waitSemaphoreCount, // uint32_t waitSemaphoreInfoCount;
3209 frameConsumerDoneSemaphore, // const VkSemaphoreSubmitInfoKHR* pWaitSemaphoreInfos;
3210 1u, // uint32_t commandBufferInfoCount;
3211 commandBufferSubmitInfo, // const VkCommandBufferSubmitInfoKHR* pCommandBufferInfos;
3212 signalSemaphoreInfoCount, // uint32_t signalSemaphoreInfoCount;
3213 frameCompleteSemaphore, // const VkSemaphoreSubmitInfoKHR* pSignalSemaphoreInfos;
3214 };
3215
3216 VkResult result = VK_SUCCESS;
3217
3218 if ((frameSynchronizationInfo->frameConsumerDoneSemaphore == DE_NULL) && (frameSynchronizationInfo->frameConsumerDoneFence != DE_NULL))
3219 VK_CHECK(vkd.waitForFences(device, 1, &frameSynchronizationInfo->frameConsumerDoneFence, true, ~0ull));
3220
3221 VK_CHECK(vkd.getFenceStatus(device, frameSynchronizationInfo->frameCompleteFence));
3222
3223 VK_CHECK(vkd.resetFences(device, 1, &frameSynchronizationInfo->frameCompleteFence));
3224
3225 result = vkd.getFenceStatus(device, frameSynchronizationInfo->frameCompleteFence);
3226 DE_ASSERT(result == VK_NOT_READY);
3227 DE_UNREF(result);
3228
3229 vkd.queueSubmit2(queue, 1, submitInfo, frameSynchronizationInfo->frameCompleteFence);
3230 }
3231
3232 void VideoBaseDecoder::SubmitQueue (vector<VkCommandBufferSubmitInfoKHR>& commandBufferSubmitInfos,
3233 VkSubmitInfo2KHR* submitInfo,
3234 const VkFence frameCompleteFence,
3235 const vector<VkFence>& frameConsumerDoneFence,
3236 const vector<VkSemaphoreSubmitInfoKHR>& frameConsumerDoneSemaphores,
3237 const vector<VkSemaphoreSubmitInfoKHR>& frameCompleteSemaphores)
3238 {
3239 const DeviceInterface& vkd = getDeviceDriver();
3240 const VkDevice device = getDevice();
3241 const VkQueue queue = getQueueDecode();
3242
3243 for (uint32_t ndx = 0; ndx < frameConsumerDoneSemaphores.size(); ++ndx)
3244 if ((frameConsumerDoneSemaphores[ndx].semaphore == DE_NULL) && (frameConsumerDoneFence[ndx] != DE_NULL))
3245 VK_CHECK(vkd.waitForFences(device, 1, &frameConsumerDoneFence[ndx], true, ~0ull));
3246
3247 VK_CHECK(vkd.getFenceStatus(device, frameCompleteFence));
3248 VK_CHECK(vkd.resetFences(device, 1, &frameCompleteFence));
3249 DE_ASSERT(vkd.getFenceStatus(device, frameCompleteFence) == VK_NOT_READY);
3250
3251 *submitInfo =
3252 {
3253 VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, // VkStructureType sType;
3254 DE_NULL, // const void* pNext;
3255 0, // VkSubmitFlagsKHR flags;
3256 (deUint32)frameCompleteSemaphores.size(), // uint32_t waitSemaphoreInfoCount;
3257 de::dataOrNull(frameCompleteSemaphores), // const VkSemaphoreSubmitInfoKHR* pWaitSemaphoreInfos;
3258 (uint32_t)commandBufferSubmitInfos.size(), // uint32_t commandBufferInfoCount;
3259 dataOrNullPtr(commandBufferSubmitInfos), // const VkCommandBufferSubmitInfoKHR* pCommandBufferInfos;
3260 (deUint32)frameConsumerDoneSemaphores.size(), // uint32_t signalSemaphoreInfoCount;
3261 de::dataOrNull(frameConsumerDoneSemaphores), // const VkSemaphoreSubmitInfoKHR* pSignalSemaphoreInfos;
3262 };
3263
3264 vkd.queueSubmit2(queue, 1, submitInfo, frameCompleteFence);
3265 }
3266
3267 void VideoBaseDecoder::Deinitialize ()
3268 {
3269 if (m_device != DE_NULL)
3270 {
3271 const DeviceInterface& vkd = getDeviceDriver();
3272 const VkDevice device = getDevice();
3273 const VkQueue queueDecode = getQueueDecode();
3274 const VkQueue queueTransfer = getQueueTransfer();
3275
3276 if (queueDecode)
3277 {
3278 vkd.queueWaitIdle(queueDecode);
3279 }
3280
3281 if (queueTransfer)
3282 {
3283 vkd.queueWaitIdle(queueTransfer);
3284 }
3285
3286 if (device)
3287 {
3288 vkd.deviceWaitIdle(device);
3289 }
3290 }
3291
3292 m_dpb.Deinit();
3293
3294 if (m_videoFrameBuffer)
3295 {
3296 m_videoFrameBuffer = MovePtr<VideoFrameBuffer>();
3297 }
3298
3299 if (m_decodeFramesData && m_videoCommandPool)
3300 {
3301 for (size_t decodeFrameId = 0; decodeFrameId < m_maxDecodeFramesCount; decodeFrameId++)
3302 m_decodeFramesData[decodeFrameId].commandBuffer = Move<VkCommandBuffer>();
3303
3304 m_videoCommandPool = Move<VkCommandPool>();
3305 }
3306
3307 if (m_decodeFramesData)
3308 {
3309 for (size_t decodeFrameId = 0; decodeFrameId < m_maxDecodeFramesCount; decodeFrameId++)
3310 {
3311 m_decodeFramesData[decodeFrameId].bitstreamBuffer.DestroyVideoBitstreamBuffer();
3312 }
3313
3314 delete[] m_decodeFramesData;
3315
3316 m_decodeFramesData = DE_NULL;
3317 }
3318
3319 }
3320
3321 int32_t NvidiaParserVideoPictureParameters::m_currentId = 0;
3322
3323 int32_t NvidiaParserVideoPictureParameters::PopulateH264UpdateFields (const StdVideoPictureParametersSet* pStdPictureParametersSet,
3324 vk::VkVideoDecodeH264SessionParametersAddInfoKHR& h264SessionParametersAddInfo)
3325 {
3326 if (pStdPictureParametersSet == DE_NULL)
3327 return -1;
3328
3329 DE_ASSERT((pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H264_SPS) || (pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H264_PPS));
3330 DE_ASSERT(h264SessionParametersAddInfo.sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR);
3331
3332 if (pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H264_SPS)
3333 {
3334 h264SessionParametersAddInfo.stdSPSCount = 1;
3335 h264SessionParametersAddInfo.pStdSPSs = &pStdPictureParametersSet->m_data.h264Sps.stdSps;
3336 return pStdPictureParametersSet->m_data.h264Sps.stdSps.seq_parameter_set_id;
3337 }
3338 else if (pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H264_PPS)
3339 {
3340 h264SessionParametersAddInfo.stdPPSCount = 1;
3341 h264SessionParametersAddInfo.pStdPPSs = &pStdPictureParametersSet->m_data.h264Pps.stdPps;
3342 return pStdPictureParametersSet->m_data.h264Pps.stdPps.pic_parameter_set_id;
3343 }
3344 else
3345 {
3346 TCU_THROW(InternalError, "Incorrect h.264 type");
3347 }
3348 }
3349
3350 int32_t NvidiaParserVideoPictureParameters::PopulateH265UpdateFields (const StdVideoPictureParametersSet* pStdPictureParametersSet,
3351 vk::VkVideoDecodeH265SessionParametersAddInfoKHR& h265SessionParametersAddInfo)
3352 {
3353 if (pStdPictureParametersSet == DE_NULL)
3354 return -1;
3355
3356 DE_ASSERT((pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H265_SPS) || (pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H265_PPS)
3357 || (pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H265_VPS));
3358 DE_ASSERT(h265SessionParametersAddInfo.sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR);
3359
3360 if (pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H265_SPS)
3361 {
3362 h265SessionParametersAddInfo.stdSPSCount = 1;
3363 h265SessionParametersAddInfo.pStdSPSs = &pStdPictureParametersSet->m_data.h265Sps.stdSps;
3364 return pStdPictureParametersSet->m_data.h265Sps.stdSps.sps_seq_parameter_set_id;
3365 }
3366 else if (pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H265_PPS)
3367 {
3368 h265SessionParametersAddInfo.stdPPSCount = 1;
3369 h265SessionParametersAddInfo.pStdPPSs = &pStdPictureParametersSet->m_data.h265Pps.stdPps;
3370 return pStdPictureParametersSet->m_data.h265Pps.stdPps.pps_seq_parameter_set_id;
3371 }
3372 else if (pStdPictureParametersSet->m_updateType == VK_PICTURE_PARAMETERS_UPDATE_H265_VPS)
3373 {
3374 // Vulkan Video Decode APIs do not support VPS parameters
3375 return -1;
3376 }
3377 else
3378 {
3379 TCU_THROW(InternalError, "Incorrect h.265 type");
3380 }
3381 }
3382
3383 NvidiaParserVideoPictureParameters* NvidiaParserVideoPictureParameters::Create (const DeviceInterface& vkd,
3384 VkDevice device,
3385 VkVideoSessionKHR videoSession,
3386 const StdVideoPictureParametersSet* pSpsStdPictureParametersSet,
3387 const StdVideoPictureParametersSet* pPpsStdPictureParametersSet,
3388 NvidiaParserVideoPictureParameters* pTemplate)
3389 {
3390 int32_t currentSpsId = -1;
3391 int32_t currentPpsId = -1;
3392 const NvidiaParserVideoPictureParameters* pTemplatePictureParameters = pTemplate;
3393 vk::VkVideoSessionParametersCreateInfoKHR createInfo = vk::initVulkanStructure();
3394 vk::VkVideoDecodeH264SessionParametersCreateInfoKHR h264SessionParametersCreateInfo = vk::initVulkanStructure();
3395 vk::VkVideoDecodeH264SessionParametersAddInfoKHR h264SessionParametersAddInfo = vk::initVulkanStructure();
3396 vk::VkVideoDecodeH265SessionParametersCreateInfoKHR h265SessionParametersCreateInfo = vk::initVulkanStructure();
3397 vk::VkVideoDecodeH265SessionParametersAddInfoKHR h265SessionParametersAddInfo = vk::initVulkanStructure();
3398 NvidiaParserPictureParametersUpdateType updateType = pSpsStdPictureParametersSet
3399 ? pSpsStdPictureParametersSet->m_updateType
3400 : pPpsStdPictureParametersSet->m_updateType;
3401 NvidiaParserVideoPictureParameters* pPictureParameters = new NvidiaParserVideoPictureParameters(device);
3402
3403 if (pPictureParameters == DE_NULL)
3404 return DE_NULL;
3405
3406 switch (updateType)
3407 {
3408 case VK_PICTURE_PARAMETERS_UPDATE_H264_SPS:
3409 case VK_PICTURE_PARAMETERS_UPDATE_H264_PPS:
3410 {
3411 createInfo.pNext = &h264SessionParametersCreateInfo;
3412 // TODO maxStdVPSCount ?
3413 h264SessionParametersCreateInfo.maxStdSPSCount = MAX_SPS_IDS;
3414 h264SessionParametersCreateInfo.maxStdPPSCount = MAX_PPS_IDS;
3415 h264SessionParametersCreateInfo.pParametersAddInfo = &h264SessionParametersAddInfo;
3416
3417 currentSpsId = PopulateH264UpdateFields(pSpsStdPictureParametersSet, h264SessionParametersAddInfo);
3418 currentPpsId = PopulateH264UpdateFields(pPpsStdPictureParametersSet, h264SessionParametersAddInfo);
3419
3420 break;
3421 }
3422 case VK_PICTURE_PARAMETERS_UPDATE_H265_SPS:
3423 case VK_PICTURE_PARAMETERS_UPDATE_H265_PPS:
3424 {
3425 createInfo.pNext = &h265SessionParametersCreateInfo;
3426
3427 h265SessionParametersCreateInfo.maxStdSPSCount = MAX_SPS_IDS;
3428 h265SessionParametersCreateInfo.maxStdPPSCount = MAX_PPS_IDS;
3429 h265SessionParametersCreateInfo.pParametersAddInfo = &h265SessionParametersAddInfo;
3430
3431 currentSpsId = PopulateH265UpdateFields(pSpsStdPictureParametersSet, h265SessionParametersAddInfo);
3432 currentPpsId = PopulateH265UpdateFields(pPpsStdPictureParametersSet, h265SessionParametersAddInfo);
3433
3434 break;
3435 }
3436 case VK_PICTURE_PARAMETERS_UPDATE_H265_VPS:
3437 {
3438 // Vulkan Video Decode APIs do not support VPS parameters
3439 return DE_NULL;
3440 }
3441 default:
3442 TCU_THROW(InternalError, "Invalid Parser format");
3443 }
3444
3445 createInfo.videoSessionParametersTemplate = pTemplatePictureParameters != DE_NULL
3446 ? pTemplatePictureParameters->GetVideoSessionParametersKHR()
3447 : VkVideoSessionParametersKHR(DE_NULL);
3448 createInfo.videoSession = videoSession;
3449
3450 pPictureParameters->m_sessionParameters = createVideoSessionParametersKHR(vkd, device, &createInfo);
3451
3452 DEBUGLOG(cout << "VkVideoSessionParametersKHR:" << pPictureParameters->m_sessionParameters.get() << endl);
3453
3454 if (pTemplatePictureParameters)
3455 {
3456 pPictureParameters->m_spsIdsUsed = pTemplatePictureParameters->m_spsIdsUsed;
3457 pPictureParameters->m_ppsIdsUsed = pTemplatePictureParameters->m_ppsIdsUsed;
3458 }
3459
3460 DE_ASSERT((currentSpsId != -1) || (currentPpsId != -1));
3461
3462 if (currentSpsId != -1)
3463 {
3464 pPictureParameters->m_spsIdsUsed.set(currentSpsId, true);
3465 }
3466
3467 if (currentPpsId != -1)
3468 {
3469 pPictureParameters->m_ppsIdsUsed.set(currentPpsId, true);
3470 }
3471
3472 pPictureParameters->m_Id = ++m_currentId;
3473
3474 return pPictureParameters;
3475 }
3476
3477 VkResult NvidiaParserVideoPictureParameters::Update (const DeviceInterface& vkd,
3478 const StdVideoPictureParametersSet* pSpsStdPictureParametersSet,
3479 const StdVideoPictureParametersSet* pPpsStdPictureParametersSet)
3480 {
3481 int32_t currentSpsId = -1;
3482 int32_t currentPpsId = -1;
3483
3484 VkVideoSessionParametersUpdateInfoKHR updateInfo = vk::initVulkanStructure();
3485 VkVideoDecodeH264SessionParametersAddInfoKHR h264SessionParametersAddInfo = vk::initVulkanStructure();
3486 VkVideoDecodeH265SessionParametersAddInfoKHR h265SessionParametersAddInfo = vk::initVulkanStructure();
3487 NvidiaParserPictureParametersUpdateType updateType = pSpsStdPictureParametersSet
3488 ? pSpsStdPictureParametersSet->m_updateType
3489 : pPpsStdPictureParametersSet->m_updateType;
3490
3491 switch (updateType)
3492 {
3493 case VK_PICTURE_PARAMETERS_UPDATE_H264_SPS:
3494 case VK_PICTURE_PARAMETERS_UPDATE_H264_PPS:
3495 {
3496
3497 updateInfo.pNext = &h264SessionParametersAddInfo;
3498
3499 currentSpsId = PopulateH264UpdateFields(pSpsStdPictureParametersSet, h264SessionParametersAddInfo);
3500 currentPpsId = PopulateH264UpdateFields(pPpsStdPictureParametersSet, h264SessionParametersAddInfo);
3501
3502 break;
3503 }
3504 case VK_PICTURE_PARAMETERS_UPDATE_H265_VPS:
3505 {
3506 // Vulkan Video Decode APIs do not support VPS parameters
3507 return VK_ERROR_INITIALIZATION_FAILED;
3508 }
3509 case VK_PICTURE_PARAMETERS_UPDATE_H265_SPS:
3510 case VK_PICTURE_PARAMETERS_UPDATE_H265_PPS:
3511 {
3512
3513 updateInfo.pNext = &h265SessionParametersAddInfo;
3514
3515 currentSpsId = PopulateH265UpdateFields(pSpsStdPictureParametersSet, h265SessionParametersAddInfo);
3516 currentPpsId = PopulateH265UpdateFields(pPpsStdPictureParametersSet, h265SessionParametersAddInfo);
3517
3518 break;
3519 }
3520 default:
3521 TCU_THROW(InternalError, "Invalid Parser format");
3522 }
3523
3524 if (pSpsStdPictureParametersSet)
3525 {
3526 updateInfo.updateSequenceCount = std::max(pSpsStdPictureParametersSet->m_updateSequenceCount, updateInfo.updateSequenceCount);
3527 }
3528
3529 if (pPpsStdPictureParametersSet)
3530 {
3531 updateInfo.updateSequenceCount = std::max(pPpsStdPictureParametersSet->m_updateSequenceCount, updateInfo.updateSequenceCount);
3532 }
3533
3534 vk::VkResult result = vkd.updateVideoSessionParametersKHR(m_device, *m_sessionParameters, &updateInfo);
3535
3536 if (result == VK_SUCCESS)
3537 {
3538 DE_ASSERT((currentSpsId != -1) || (currentPpsId != -1));
3539
3540 if (currentSpsId != -1)
3541 {
3542 m_spsIdsUsed.set(currentSpsId, true);
3543 }
3544
3545 if (currentPpsId != -1)
3546 {
3547 m_ppsIdsUsed.set(currentPpsId, true);
3548 }
3549 }
3550 else
3551 {
3552 TCU_THROW(InternalError, "Could not update Session Parameters Object");
3553 }
3554
3555 return result;
3556 }
3557
3558 int32_t NvidiaParserVideoPictureParameters::AddRef ()
3559 {
3560 return ++m_refCount;
3561 }
3562
3563 int32_t NvidiaParserVideoPictureParameters::Release ()
3564 {
3565 uint32_t ret = --m_refCount;
3566
3567 if (ret == 0)
3568 {
3569 delete this;
3570 }
3571
3572 return ret;
3573 }
3574
3575 ImageObject::ImageObject ()
3576 : m_imageFormat (VK_FORMAT_UNDEFINED)
3577 , m_imageExtent ()
3578 , m_image (DE_NULL)
3579 , m_imageView ()
3580 , m_imageArrayLayers (0)
3581 {
3582 }
3583
3584 ImageObject::~ImageObject ()
3585 {
3586 DestroyImage();
3587 }
3588
3589 void ImageObject::DestroyImage ()
3590 {
3591 m_image = de::MovePtr<ImageWithMemory>(DE_NULL);
3592 m_imageView = Move<VkImageView>();
3593 m_imageFormat = VK_FORMAT_UNDEFINED;
3594 m_imageExtent = makeExtent2D(0,0);
3595 m_imageArrayLayers = 0;
3596 }
3597
3598 VkResult ImageObject::CreateImage (const DeviceInterface& vkd,
3599 VkDevice device,
3600 int32_t queueFamilyIndex,
3601 Allocator& allocator,
3602 const VkImageCreateInfo* pImageCreateInfo,
3603 const MemoryRequirement memoryRequirement)
3604 {
3605 DestroyImage();
3606
3607 m_imageFormat = pImageCreateInfo->format;
3608 m_imageExtent = makeExtent2D(pImageCreateInfo->extent.width, pImageCreateInfo->extent.height);
3609 m_image = de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, *pImageCreateInfo, memoryRequirement));
3610 m_imageArrayLayers = pImageCreateInfo->arrayLayers;
3611
3612 DE_ASSERT(m_imageArrayLayers != 0);
3613
3614 VkResult status = StageImage(vkd, device, pImageCreateInfo->usage, memoryRequirement, queueFamilyIndex);
3615
3616 if (VK_SUCCESS != status)
3617 {
3618 return status;
3619 }
3620
3621 const VkImageViewCreateInfo imageViewCreateInfo =
3622 {
3623 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
3624 DE_NULL, // const void* pNext;
3625 0, // VkImageViewCreateFlags flags;
3626 m_image->get(), // VkImage image;
3627 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
3628 m_imageFormat, // VkFormat format;
3629 makeComponentMappingIdentity(), // VkComponentMapping components;
3630 { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, m_imageArrayLayers }, // VkImageSubresourceRange subresourceRange;
3631 };
3632
3633 m_imageView = createImageView(vkd, device, &imageViewCreateInfo);
3634
3635 return VK_SUCCESS;
3636 }
3637
3638 VkResult ImageObject::StageImage (const DeviceInterface& vkd,
3639 VkDevice device,
3640 VkImageUsageFlags usage,
3641 const MemoryRequirement memoryRequirement,
3642 uint32_t queueFamilyIndex)
3643 {
3644 if (usage == 0 && memoryRequirement == MemoryRequirement::Any)
3645 {
3646 return VK_ERROR_FORMAT_NOT_SUPPORTED;
3647 }
3648
3649 Move<VkCommandPool> cmdPool = createCommandPool(vkd, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
3650 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3651 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyIndex, 0u);
3652 const VkImageLayout layout = (usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR) ? VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR
3653 : (usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) ? VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR
3654 : VK_IMAGE_LAYOUT_UNDEFINED;
3655
3656 DE_ASSERT(layout != VK_IMAGE_LAYOUT_UNDEFINED);
3657
3658 beginCommandBuffer(vkd, *cmdBuffer, 0u);
3659
3660 setImageLayout(vkd, *cmdBuffer, m_image->get(), VK_IMAGE_LAYOUT_UNDEFINED, layout, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR);
3661
3662 endCommandBuffer(vkd, *cmdBuffer);
3663
3664 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
3665
3666 return VK_SUCCESS;
3667 }
3668
3669 VkFormat ImageObject::getFormat (void) const
3670 {
3671 return m_imageFormat;
3672 }
3673
3674 VkExtent2D ImageObject::getExtent (void) const
3675 {
3676 return m_imageExtent;
3677 }
3678
3679 VkImage ImageObject::getImage (void) const
3680 {
3681 return m_image->get();
3682 }
3683
3684 VkImageView ImageObject::getView (void) const
3685 {
3686 return m_imageView.get();
3687 }
3688
3689 bool ImageObject::isArray (void) const
3690 {
3691 return m_imageArrayLayers > 1;
3692 }
3693
3694 bool ImageObject::isImageExist (void) const
3695 {
3696 return (m_image != DE_NULL) && (m_image->get() != DE_NULL);
3697 }
3698
3699
3700 NvidiaPerFrameDecodeImage::NvidiaPerFrameDecodeImage ()
3701 : NvidiaVulkanPictureBase ()
3702 , m_picDispInfo ()
3703 , m_frameImage ()
3704 , m_frameImageCurrentLayout (VK_IMAGE_LAYOUT_UNDEFINED)
3705 , m_frameCompleteFence ()
3706 , m_frameCompleteSemaphore ()
3707 , m_frameConsumerDoneFence ()
3708 , m_frameConsumerDoneSemaphore ()
3709 , m_hasFrameCompleteSignalFence (false)
3710 , m_hasFrameCompleteSignalSemaphore (false)
3711 , m_hasConsummerSignalFence (false)
3712 , m_hasConsummerSignalSemaphore (false)
3713 , m_inDecodeQueue (false)
3714 , m_inDisplayQueue (false)
3715 , m_ownedByDisplay (false)
3716 , m_dpbImage ()
3717 , m_dpbImageCurrentLayout (VK_IMAGE_LAYOUT_UNDEFINED)
3718 {
3719 }
3720
3721 void NvidiaPerFrameDecodeImage::init (const DeviceInterface& vkd,
3722 VkDevice device)
3723 {
3724 const VkFenceCreateInfo fenceFrameCompleteInfo = // The fence waited on for the first frame should be signaled.
3725 {
3726 VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType sType;
3727 DE_NULL, // const void* pNext;
3728 VK_FENCE_CREATE_SIGNALED_BIT, // VkFenceCreateFlags flags;
3729 };
3730 const VkFenceCreateInfo fenceInfo =
3731 {
3732 VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType sType;
3733 DE_NULL, // const void* pNext;
3734 0, // VkFenceCreateFlags flags;
3735 };
3736 const VkSemaphoreCreateInfo semInfo =
3737 {
3738 VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, // VkStructureType sType;
3739 DE_NULL, // const void* pNext;
3740 0, // VkSemaphoreCreateFlags flags;
3741 };
3742
3743 m_frameCompleteFence = createFence(vkd, device, &fenceFrameCompleteInfo);
3744 m_frameConsumerDoneFence = createFence(vkd, device, &fenceInfo);
3745 m_frameCompleteSemaphore = createSemaphore(vkd, device, &semInfo);
3746 m_frameConsumerDoneSemaphore = createSemaphore(vkd, device, &semInfo);
3747
3748 Reset();
3749 }
3750
3751 VkResult NvidiaPerFrameDecodeImage::CreateImage (const DeviceInterface& vkd,
3752 VkDevice device,
3753 int32_t queueFamilyIndex,
3754 Allocator& allocator,
3755 const VkImageCreateInfo* pImageCreateInfo,
3756 const MemoryRequirement memoryRequirement)
3757 {
3758 VK_CHECK(m_frameImage.CreateImage(vkd, device, queueFamilyIndex, allocator, pImageCreateInfo, memoryRequirement));
3759
3760 m_frameImageCurrentLayout = VK_IMAGE_LAYOUT_UNDEFINED;
3761
3762 return VK_SUCCESS;
3763 }
3764
3765 const ImageObject* NvidiaPerFrameDecodeImage::GetImageObject (void)
3766 {
3767 return isImageExist() ? &m_frameImage : DE_NULL;
3768 }
3769
3770 bool NvidiaPerFrameDecodeImage::isImageExist (void)
3771 {
3772 return m_frameImage.isImageExist();
3773 }
3774
3775 const ImageObject* NvidiaPerFrameDecodeImage::GetDPBImageObject (void)
3776 {
3777 return m_dpbImage.get();
3778 }
3779
3780 void NvidiaPerFrameDecodeImage::deinit ()
3781 {
3782 currentVkPictureParameters = DE_NULL;
3783
3784 m_frameCompleteFence = Move<VkFence>();
3785 m_frameConsumerDoneFence = Move<VkFence>();
3786 m_frameCompleteSemaphore = Move<VkSemaphore>();
3787 m_frameConsumerDoneSemaphore = Move<VkSemaphore>();
3788
3789 m_frameImage.DestroyImage();
3790 m_dpbImage.clear();
3791
3792 Reset();
3793 }
3794
3795 NvidiaPerFrameDecodeImage::~NvidiaPerFrameDecodeImage ()
3796 {
3797 deinit();
3798 }
3799
3800 NvidiaPerFrameDecodeImageSet::NvidiaPerFrameDecodeImageSet ()
3801 : m_size (0)
3802 , m_frameDecodeImages ()
3803 {
3804 }
3805
3806 int32_t NvidiaPerFrameDecodeImageSet::init (const DeviceInterface& vkd,
3807 VkDevice device,
3808 int32_t queueFamilyIndex,
3809 Allocator& allocator,
3810 uint32_t numImages,
3811 const VkImageCreateInfo* pOutImageCreateInfo,
3812 const VkImageCreateInfo* pDpbImageCreateInfo,
3813 MemoryRequirement memoryRequirement)
3814 {
3815 const uint32_t firstIndex = (uint32_t)m_size;
3816
3817 // CTS is not designed to reinitialize images
3818 DE_ASSERT(numImages > m_size);
3819
3820 DE_ASSERT(numImages < DE_LENGTH_OF_ARRAY(m_frameDecodeImages));
3821
3822 for (uint32_t imageIndex = firstIndex; imageIndex < numImages; imageIndex++)
3823 {
3824 DE_ASSERT(!m_frameDecodeImages[imageIndex].isImageExist());
3825
3826 m_frameDecodeImages[imageIndex].init(vkd, device);
3827
3828 VK_CHECK(m_frameDecodeImages[imageIndex].CreateImage(vkd, device, queueFamilyIndex, allocator, pOutImageCreateInfo, memoryRequirement));
3829
3830 DEBUGLOG(std::cout << "CreateImg: " << m_frameDecodeImages[imageIndex].m_frameImage.getImage() << " " << std::dec << pOutImageCreateInfo->extent.width << "x" << pOutImageCreateInfo->extent.height << " " << m_frameDecodeImages[imageIndex].m_frameImageCurrentLayout << std::endl);
3831
3832 if (pDpbImageCreateInfo != DE_NULL)
3833 {
3834 DE_ASSERT(pDpbImageCreateInfo->arrayLayers == 1 || pDpbImageCreateInfo->arrayLayers >= numImages - m_size);
3835
3836 if (pDpbImageCreateInfo->arrayLayers == 1 || imageIndex == firstIndex)
3837 {
3838 m_frameDecodeImages[imageIndex].m_dpbImage = de::SharedPtr<ImageObject>(new ImageObject());
3839
3840 VK_CHECK(m_frameDecodeImages[imageIndex].m_dpbImage->CreateImage(vkd, device, queueFamilyIndex, allocator, pDpbImageCreateInfo, memoryRequirement));
3841
3842 DEBUGLOG(std::cout << "CreateDPB: " << m_frameDecodeImages[imageIndex].m_dpbImage->getImage() << " " << std::dec << pOutImageCreateInfo->extent.width << "x" << pOutImageCreateInfo->extent.height << " " << m_frameDecodeImages[imageIndex].m_dpbImageCurrentLayout << std::endl);
3843 }
3844 else
3845 {
3846 m_frameDecodeImages[imageIndex].m_dpbImage = m_frameDecodeImages[firstIndex].m_dpbImage;
3847 }
3848 }
3849 }
3850
3851 m_size = numImages;
3852
3853 return (int32_t)m_size;
3854 }
3855
3856 void NvidiaPerFrameDecodeImageSet::deinit ()
3857 {
3858 for (uint32_t ndx = 0; ndx < m_size; ndx++)
3859 m_frameDecodeImages[ndx].deinit();
3860
3861 m_size = 0;
3862 }
3863
3864 NvidiaPerFrameDecodeImageSet::~NvidiaPerFrameDecodeImageSet ()
3865 {
3866 deinit();
3867 }
3868
3869 NvidiaPerFrameDecodeImage& NvidiaPerFrameDecodeImageSet::operator[] (size_t index)
3870 {
3871 DE_ASSERT(index < m_size);
3872
3873 return m_frameDecodeImages[index];
3874 }
3875
3876 size_t NvidiaPerFrameDecodeImageSet::size ()
3877 {
3878 return m_size;
3879 }
3880
3881 VideoFrameBuffer::VideoFrameBuffer ()
3882 : m_perFrameDecodeImageSet ()
3883 , m_displayFrames ()
3884 , m_queryPool ()
3885 , m_ownedByDisplayMask (0)
3886 , m_frameNumInDecodeOrder (0)
3887 , m_frameNumInDisplayOrder (0)
3888 , m_extent { 0, 0 }
3889 {
3890 }
3891
3892 Move<VkQueryPool> VideoFrameBuffer::CreateVideoQueries (const DeviceInterface& vkd,
3893 VkDevice device,
3894 uint32_t numSlots,
3895 const VkVideoProfileInfoKHR* pDecodeProfile)
3896 {
3897 const VkQueryPoolCreateInfo queryPoolCreateInfo =
3898 {
3899 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // VkStructureType sType;
3900 pDecodeProfile, // const void* pNext;
3901 0, // VkQueryPoolCreateFlags flags;
3902 VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, // VkQueryType queryType;
3903 numSlots, // deUint32 queryCount;
3904 0, // VkQueryPipelineStatisticFlags pipelineStatistics;
3905 };
3906
3907 return createQueryPool(vkd, device, &queryPoolCreateInfo);
3908 }
3909
3910 int32_t VideoFrameBuffer::InitImagePool (const DeviceInterface& vkd,
3911 VkDevice device,
3912 int32_t queueFamilyIndex,
3913 Allocator& allocator,
3914 uint32_t numImages,
3915 uint32_t maxNumImages,
3916 const VkImageCreateInfo* pOutImageCreateInfo,
3917 const VkImageCreateInfo* pDpbImageCreateInfo,
3918 const VkVideoProfileInfoKHR* pDecodeProfile)
3919 {
3920 if (numImages && pDecodeProfile && m_queryPool.get() == DE_NULL)
3921 m_queryPool = CreateVideoQueries(vkd, device, maxNumImages, pDecodeProfile);
3922
3923 if (numImages && pOutImageCreateInfo)
3924 {
3925 m_extent = makeExtent2D(pOutImageCreateInfo->extent.width, pOutImageCreateInfo->extent.height);
3926
3927 return m_perFrameDecodeImageSet.init(vkd, device, queueFamilyIndex, allocator, numImages, pOutImageCreateInfo, pDpbImageCreateInfo, MemoryRequirement::Local);
3928 }
3929
3930 return 0;
3931 }
3932
3933 int32_t VideoFrameBuffer::QueueDecodedPictureForDisplay (int8_t picId, DisplayPictureInfo* pDispInfo)
3934 {
3935 DE_ASSERT((uint32_t)picId < m_perFrameDecodeImageSet.size());
3936
3937 m_perFrameDecodeImageSet[picId].m_displayOrder = m_frameNumInDisplayOrder++;
3938 m_perFrameDecodeImageSet[picId].m_timestamp = pDispInfo->timestamp;
3939 m_perFrameDecodeImageSet[picId].m_inDisplayQueue = true;
3940 m_perFrameDecodeImageSet[picId].AddRef();
3941
3942 m_displayFrames.push((uint8_t)picId);
3943
3944 return picId;
3945 }
3946
3947 int32_t VideoFrameBuffer::QueuePictureForDecode (int8_t picId,
3948 VulkanParserDecodePictureInfo* pDecodePictureInfo,
3949 NvidiaParserVideoRefCountBase* pCurrentVkPictureParameters,
3950 FrameSynchronizationInfo* pFrameSynchronizationInfo)
3951 {
3952 DE_ASSERT((uint32_t)picId < m_perFrameDecodeImageSet.size());
3953
3954 m_perFrameDecodeImageSet[picId].m_picDispInfo = *pDecodePictureInfo;
3955 m_perFrameDecodeImageSet[picId].m_decodeOrder = m_frameNumInDecodeOrder++;
3956 m_perFrameDecodeImageSet[picId].m_inDecodeQueue = true;
3957 m_perFrameDecodeImageSet[picId].currentVkPictureParameters = pCurrentVkPictureParameters;
3958
3959 if (pFrameSynchronizationInfo->hasFrameCompleteSignalFence)
3960 {
3961 pFrameSynchronizationInfo->frameCompleteFence = m_perFrameDecodeImageSet[picId].m_frameCompleteFence.get();
3962
3963 if (pFrameSynchronizationInfo->frameCompleteFence != DE_NULL)
3964 {
3965 m_perFrameDecodeImageSet[picId].m_hasFrameCompleteSignalFence = true;
3966 }
3967 }
3968
3969 if (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence)
3970 {
3971 pFrameSynchronizationInfo->frameConsumerDoneFence = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneFence.get();
3972
3973 m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence = false;
3974 }
3975
3976 if (pFrameSynchronizationInfo->hasFrameCompleteSignalSemaphore)
3977 {
3978 pFrameSynchronizationInfo->frameCompleteSemaphore = m_perFrameDecodeImageSet[picId].m_frameCompleteSemaphore.get();
3979
3980 if (pFrameSynchronizationInfo->frameCompleteSemaphore != DE_NULL)
3981 {
3982 m_perFrameDecodeImageSet[picId].m_hasFrameCompleteSignalSemaphore = true;
3983 }
3984 }
3985
3986 if (m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore)
3987 {
3988 pFrameSynchronizationInfo->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[picId].m_frameConsumerDoneSemaphore.get();
3989
3990 m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = false;
3991 }
3992
3993 pFrameSynchronizationInfo->queryPool = m_queryPool.get();
3994 pFrameSynchronizationInfo->startQueryId = picId;
3995 pFrameSynchronizationInfo->numQueries = 1;
3996
3997 return picId;
3998 }
3999
4000 int32_t VideoFrameBuffer::DequeueDecodedPicture (DecodedFrame* pDecodedFrame)
4001 {
4002 int numberOfPendingFrames = 0;
4003 int pictureIndex = -1;
4004
4005 if (!m_displayFrames.empty())
4006 {
4007 numberOfPendingFrames = (int)m_displayFrames.size();
4008 pictureIndex = m_displayFrames.front();
4009
4010 DE_ASSERT((pictureIndex >= 0) && ((uint32_t)pictureIndex < m_perFrameDecodeImageSet.size()));
4011 DE_ASSERT(!(m_ownedByDisplayMask & (1 << pictureIndex)));
4012
4013 m_ownedByDisplayMask |= (1 << pictureIndex);
4014 m_displayFrames.pop();
4015 m_perFrameDecodeImageSet[pictureIndex].m_inDisplayQueue = false;
4016 m_perFrameDecodeImageSet[pictureIndex].m_ownedByDisplay = true;
4017 }
4018
4019 if ((uint32_t)pictureIndex < m_perFrameDecodeImageSet.size())
4020 {
4021 pDecodedFrame->pictureIndex = pictureIndex;
4022
4023 pDecodedFrame->pDecodedImage = &m_perFrameDecodeImageSet[pictureIndex].m_frameImage;
4024
4025 pDecodedFrame->decodedImageLayout = m_perFrameDecodeImageSet[pictureIndex].m_frameImageCurrentLayout;
4026
4027 if (m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalFence)
4028 {
4029 pDecodedFrame->frameCompleteFence = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteFence.get();
4030
4031 m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalFence = false;
4032 }
4033 else
4034 {
4035 pDecodedFrame->frameCompleteFence = DE_NULL;
4036 }
4037
4038 if (m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore)
4039 {
4040 pDecodedFrame->frameCompleteSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameCompleteSemaphore.get();
4041
4042 m_perFrameDecodeImageSet[pictureIndex].m_hasFrameCompleteSignalSemaphore = false;
4043 }
4044 else
4045 {
4046 pDecodedFrame->frameCompleteSemaphore = DE_NULL;
4047 }
4048
4049 pDecodedFrame->frameConsumerDoneFence = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneFence.get();
4050 pDecodedFrame->frameConsumerDoneSemaphore = m_perFrameDecodeImageSet[pictureIndex].m_frameConsumerDoneSemaphore.get();
4051 pDecodedFrame->timestamp = m_perFrameDecodeImageSet[pictureIndex].m_timestamp;
4052 pDecodedFrame->decodeOrder = m_perFrameDecodeImageSet[pictureIndex].m_decodeOrder;
4053 pDecodedFrame->displayOrder = m_perFrameDecodeImageSet[pictureIndex].m_displayOrder;
4054 pDecodedFrame->queryPool = m_queryPool.get();
4055 pDecodedFrame->startQueryId = pictureIndex;
4056 pDecodedFrame->numQueries = 1;
4057 }
4058
4059 return numberOfPendingFrames;
4060 }
4061
4062 int32_t VideoFrameBuffer::GetDisplayFramesCount (void)
4063 {
4064 return static_cast<int32_t>(m_displayFrames.size());
4065 }
4066
4067 int32_t VideoFrameBuffer::ReleaseDisplayedPicture (DecodedFrameRelease** pDecodedFramesRelease,
4068 uint32_t numFramesToRelease)
4069 {
4070 for (uint32_t i = 0; i < numFramesToRelease; i++)
4071 {
4072 const DecodedFrameRelease* pDecodedFrameRelease = pDecodedFramesRelease[i];
4073 int picId = pDecodedFrameRelease->pictureIndex;
4074
4075 DE_ASSERT((picId >= 0) && ((uint32_t)picId < m_perFrameDecodeImageSet.size()));
4076 DE_ASSERT(m_perFrameDecodeImageSet[picId].m_decodeOrder == pDecodedFrameRelease->decodeOrder);
4077 DE_ASSERT(m_perFrameDecodeImageSet[picId].m_displayOrder == pDecodedFrameRelease->displayOrder);
4078 DE_ASSERT(m_ownedByDisplayMask & (1 << picId));
4079
4080 m_ownedByDisplayMask &= ~(1 << picId);
4081 m_perFrameDecodeImageSet[picId].m_inDecodeQueue = false;
4082 m_perFrameDecodeImageSet[picId].currentVkPictureParameters = DE_NULL;
4083 m_perFrameDecodeImageSet[picId].m_ownedByDisplay = false;
4084 m_perFrameDecodeImageSet[picId].Release();
4085
4086 m_perFrameDecodeImageSet[picId].m_hasConsummerSignalFence = pDecodedFrameRelease->hasConsummerSignalFence;
4087 m_perFrameDecodeImageSet[picId].m_hasConsummerSignalSemaphore = pDecodedFrameRelease->hasConsummerSignalSemaphore;
4088 }
4089
4090 return 0;
4091 }
4092
4093 void VideoFrameBuffer::GetImageResourcesByIndex (int32_t numResources,
4094 const int8_t* referenceSlotIndexes,
4095 VkVideoPictureResourceInfoKHR* videoPictureResources,
4096 PictureResourceInfo* pictureResourcesInfos,
4097 VkImageLayout newImageLayout)
4098 {
4099 DE_ASSERT(newImageLayout == VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR || newImageLayout == VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR);
4100 DE_ASSERT(pictureResourcesInfos != DE_NULL);
4101
4102 for (int32_t resId = 0; resId < numResources; resId++)
4103 {
4104 const int32_t referenceSlotIndex = referenceSlotIndexes[resId];
4105 const int32_t perFrameDecodeImageSetSize = (int32_t)m_perFrameDecodeImageSet.size();
4106
4107 if (de::inBounds(referenceSlotIndex, 0, perFrameDecodeImageSetSize))
4108 {
4109 NvidiaPerFrameDecodeImage& perFrameDecodeImage = m_perFrameDecodeImageSet[referenceSlotIndex];
4110 VkVideoPictureResourceInfoKHR& videoPictureResource = videoPictureResources[resId];
4111 PictureResourceInfo& pictureResourcesInfo = pictureResourcesInfos[resId];
4112
4113 DE_ASSERT(videoPictureResource.sType == VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR);
4114
4115 if (newImageLayout == VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR || perFrameDecodeImage.m_dpbImage == DE_NULL)
4116 {
4117 videoPictureResource.codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode.
4118 videoPictureResource.codedExtent = m_extent;
4119 videoPictureResource.baseArrayLayer = 0;
4120 videoPictureResource.imageViewBinding = perFrameDecodeImage.m_frameImage.getView();
4121
4122 pictureResourcesInfo.image = perFrameDecodeImage.m_frameImage.getImage();
4123 pictureResourcesInfo.currentImageLayout = perFrameDecodeImage.m_frameImageCurrentLayout;
4124
4125 perFrameDecodeImage.m_frameImageCurrentLayout = newImageLayout;
4126 }
4127 else if (newImageLayout == VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR)
4128 {
4129 videoPictureResource.codedOffset = { 0, 0 }; // FIXME: This parameter must to be adjusted based on the interlaced mode.
4130 videoPictureResource.codedExtent = m_extent;
4131 videoPictureResource.baseArrayLayer = perFrameDecodeImage.m_dpbImage->isArray() ? referenceSlotIndex : 0;
4132 videoPictureResource.imageViewBinding = perFrameDecodeImage.m_dpbImage->getView();
4133
4134 pictureResourcesInfo.image = perFrameDecodeImage.m_dpbImage->getImage();
4135 pictureResourcesInfo.currentImageLayout = perFrameDecodeImage.m_dpbImageCurrentLayout;
4136
4137 perFrameDecodeImage.m_dpbImageCurrentLayout = newImageLayout;
4138 }
4139 else
4140 DE_ASSERT(0 && "Unknown image resource requested");
4141 }
4142 }
4143 }
4144
4145 void VideoFrameBuffer::GetImageResourcesByIndex (const int8_t referenceSlotIndex,
4146 VkVideoPictureResourceInfoKHR* videoPictureResources,
4147 PictureResourceInfo* pictureResourcesInfos,
4148 VkImageLayout newImageLayout)
4149 {
4150 GetImageResourcesByIndex(1, &referenceSlotIndex, videoPictureResources, pictureResourcesInfos, newImageLayout);
4151 }
4152
4153 int32_t VideoFrameBuffer::SetPicNumInDecodeOrder (int32_t picId, int32_t picNumInDecodeOrder)
4154 {
4155 if ((uint32_t)picId < m_perFrameDecodeImageSet.size())
4156 {
4157 int32_t oldPicNumInDecodeOrder = m_perFrameDecodeImageSet[picId].m_decodeOrder;
4158
4159 m_perFrameDecodeImageSet[picId].m_decodeOrder = picNumInDecodeOrder;
4160
4161 return oldPicNumInDecodeOrder;
4162 }
4163
4164 TCU_THROW(InternalError, "Impossible in SetPicNumInDecodeOrder");
4165 }
4166
4167 int32_t VideoFrameBuffer::SetPicNumInDisplayOrder (int32_t picId, int32_t picNumInDisplayOrder)
4168 {
4169 if ((uint32_t)picId < m_perFrameDecodeImageSet.size())
4170 {
4171 int32_t oldPicNumInDisplayOrder = m_perFrameDecodeImageSet[picId].m_displayOrder;
4172
4173 m_perFrameDecodeImageSet[picId].m_displayOrder = picNumInDisplayOrder;
4174
4175 return oldPicNumInDisplayOrder;
4176 }
4177
4178 TCU_THROW(InternalError, "Impossible in SetPicNumInDisplayOrder");
4179 }
4180
4181 NvidiaVulkanPictureBase* VideoFrameBuffer::ReservePictureBuffer (void)
4182 {
4183 for (uint32_t picId = 0; picId < m_perFrameDecodeImageSet.size(); picId++)
4184 {
4185 NvidiaVulkanPictureBase& perFrameDecodeImage = m_perFrameDecodeImageSet[picId];
4186
4187 if (perFrameDecodeImage.IsAvailable())
4188 {
4189 perFrameDecodeImage.Reset();
4190 perFrameDecodeImage.AddRef();
4191 perFrameDecodeImage.m_picIdx = picId;
4192
4193 DEBUGLOG(std::cout << "\tReservePictureBuffer " << picId << std::endl);
4194
4195 return &perFrameDecodeImage;
4196 }
4197 }
4198
4199 TCU_THROW(InternalError, "ReservePictureBuffer failed");
4200 }
4201
4202 size_t VideoFrameBuffer::GetSize (void)
4203 {
4204 return m_perFrameDecodeImageSet.size();
4205 }
4206
4207 VideoFrameBuffer::~VideoFrameBuffer ()
4208 {
4209 }
4210
4211 } // video
4212 } // vkt
4213