/*------------------------------------------------------------------------- * Vulkan Conformance Tests * ------------------------ * * Copyright (c) 2020 Valve Corporation. * Copyright (c) 2020 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *//*! * \file * \brief SPIR-V tests for VK_AMD_shader_trinary_minmax. *//*--------------------------------------------------------------------*/ #include "vktSpvAsmTrinaryMinMaxTests.hpp" #include "vktTestCase.hpp" #include "vkQueryUtil.hpp" #include "vkObjUtil.hpp" #include "vkBufferWithMemory.hpp" #include "vkBuilderUtil.hpp" #include "vkTypeUtil.hpp" #include "vkBarrierUtil.hpp" #include "vkCmdUtil.hpp" #include "tcuStringTemplate.hpp" #include "tcuFloat.hpp" #include "tcuMaybe.hpp" #include "deStringUtil.hpp" #include "deRandom.hpp" #include "deMemory.h" #include #include #include #include #include #include #include namespace vkt { namespace SpirVAssembly { namespace { enum class OperationType { MIN = 0, MAX = 1, MID = 2, }; enum class BaseType { TYPE_INT = 0, TYPE_UINT, TYPE_FLOAT, }; // The numeric value is the size in bytes. enum class TypeSize { SIZE_8BIT = 1, SIZE_16BIT = 2, SIZE_32BIT = 4, SIZE_64BIT = 8, }; // The numeric value is the number of components. enum class AggregationType { SCALAR = 1, VEC2 = 2, VEC3 = 3, VEC4 = 4, }; struct TestParams { OperationType operation; BaseType baseType; TypeSize typeSize; AggregationType aggregation; deUint32 randomSeed; deUint32 operandSize () const; // In bytes. deUint32 numComponents () const; // Number of components. deUint32 effectiveComponents () const; // Effective number of components for size calculation. deUint32 componentSize () const; // In bytes. }; deUint32 TestParams::operandSize () const { return (effectiveComponents() * componentSize()); } deUint32 TestParams::numComponents () const { return static_cast(aggregation); } deUint32 TestParams::effectiveComponents () const { return static_cast((aggregation == AggregationType::VEC3) ? AggregationType::VEC4 : aggregation); } deUint32 TestParams::componentSize () const { return static_cast(typeSize); } template T min3(T op1, T op2, T op3) { return std::min({op1, op2, op3}); } template T max3(T op1, T op2, T op3) { return std::max({op1, op2, op3}); } template T mid3(T op1, T op2, T op3) { std::array aux{{op1, op2, op3}}; std::sort(begin(aux), end(aux)); return aux[1]; } class OperationManager { public: // Operation and component index in case of error. using OperationComponent = std::pair; using ComparisonError = tcu::Maybe; OperationManager (const TestParams& params); void genInputBuffer (void* bufferPtr, deUint32 numOperations); void calculateResult (void* referenceBuffer, void* inputBuffer, deUint32 numOperations); ComparisonError compareResults (void* referenceBuffer, void* resultsBuffer, deUint32 numOperations); private: using GenerateCompFunc = void (*)(de::Random&, void*); // Write a generated component to the given location. // Generator variants to populate input buffer. static void genInt8 (de::Random& rnd, void* ptr) { *reinterpret_cast(ptr) = static_cast(rnd.getUint8()); } static void genUint8 (de::Random& rnd, void* ptr) { *reinterpret_cast(ptr) = rnd.getUint8(); } static void genInt16 (de::Random& rnd, void* ptr) { *reinterpret_cast(ptr) = static_cast(rnd.getUint16()); } static void genUint16 (de::Random& rnd, void* ptr) { *reinterpret_cast(ptr) = rnd.getUint16(); } static void genInt32 (de::Random& rnd, void* ptr) { *reinterpret_cast(ptr) = static_cast(rnd.getUint32()); } static void genUint32 (de::Random& rnd, void* ptr) { *reinterpret_cast(ptr) = rnd.getUint32(); } static void genInt64 (de::Random& rnd, void* ptr) { *reinterpret_cast(ptr) = static_cast(rnd.getUint64()); } static void genUint64 (de::Random& rnd, void* ptr) { *reinterpret_cast(ptr) = rnd.getUint64(); } // Helper template for float generators. // T must be a tcu::Float instantiation. // Attempts to generate +-Inf once every 10 times and avoid denormals. template static inline void genFloat (de::Random& rnd, void *ptr) { T* valuePtr = reinterpret_cast(ptr); if (rnd.getInt(1, 10) == 1) *valuePtr = T::inf(rnd.getBool() ? 1 : -1); else { do { *valuePtr = T{rnd.getDouble(T::largestNormal(-1).asDouble(), T::largestNormal(1).asDouble())}; } while (valuePtr->isDenorm()); } } static void genFloat16 (de::Random& rnd, void* ptr) { genFloat(rnd, ptr); } static void genFloat32 (de::Random& rnd, void* ptr) { genFloat(rnd, ptr); } static void genFloat64 (de::Random& rnd, void* ptr) { genFloat(rnd, ptr); } // An operation function writes an output value given 3 input values. using OperationFunc = void (*)(void*, const void*, const void*, const void*); // Helper template used below. template static inline void runOpFunc (F f, void* out, const void* in1, const void* in2, const void* in3) { *reinterpret_cast(out) = f(*reinterpret_cast(in1), *reinterpret_cast(in2), *reinterpret_cast(in3)); } // Apply an operation in software to a given group of components and calculate result. static void minInt8 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (min3, out, in1, in2, in3); } static void maxInt8 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (max3, out, in1, in2, in3); } static void midInt8 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (mid3, out, in1, in2, in3); } static void minUint8 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (min3, out, in1, in2, in3); } static void maxUint8 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (max3, out, in1, in2, in3); } static void midUint8 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (mid3, out, in1, in2, in3); } static void minInt16 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (min3, out, in1, in2, in3); } static void maxInt16 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (max3, out, in1, in2, in3); } static void midInt16 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (mid3, out, in1, in2, in3); } static void minUint16 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (min3, out, in1, in2, in3); } static void maxUint16 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (max3, out, in1, in2, in3); } static void midUint16 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (mid3, out, in1, in2, in3); } static void minInt32 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (min3, out, in1, in2, in3); } static void maxInt32 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (max3, out, in1, in2, in3); } static void midInt32 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (mid3, out, in1, in2, in3); } static void minUint32 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (min3, out, in1, in2, in3); } static void maxUint32 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (max3, out, in1, in2, in3); } static void midUint32 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (mid3, out, in1, in2, in3); } static void minInt64 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (min3, out, in1, in2, in3); } static void maxInt64 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (max3, out, in1, in2, in3); } static void midInt64 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (mid3, out, in1, in2, in3); } static void minUint64 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (min3, out, in1, in2, in3); } static void maxUint64 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (max3, out, in1, in2, in3); } static void midUint64 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc (mid3, out, in1, in2, in3); } static void minFloat16 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc(min3, out, in1, in2, in3); } static void maxFloat16 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc(max3, out, in1, in2, in3); } static void midFloat16 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc(mid3, out, in1, in2, in3); } static void minFloat32 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc(min3, out, in1, in2, in3); } static void maxFloat32 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc(max3, out, in1, in2, in3); } static void midFloat32 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc(mid3, out, in1, in2, in3); } static void minFloat64 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc(min3, out, in1, in2, in3); } static void maxFloat64 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc(max3, out, in1, in2, in3); } static void midFloat64 (void* out, const void* in1, const void* in2, const void* in3) { runOpFunc(mid3, out, in1, in2, in3); } // Case for accessing the functions map. struct Case { BaseType type; TypeSize size; OperationType operation; // This is required for sorting in the map. bool operator< (const Case& other) const { return (toArray() < other.toArray()); } private: std::array toArray () const { return std::array{{static_cast(type), static_cast(size), static_cast(operation)}}; } }; // Helper map to correctly choose the right generator and operation function for the specific case being tested. using FuncPair = std::pair; using CaseMap = std::map; static const CaseMap kFunctionsMap; GenerateCompFunc m_chosenGenerator; OperationFunc m_chosenOperation; de::Random m_random; const deUint32 m_operandSize; const deUint32 m_numComponents; const deUint32 m_componentSize; }; // This map is used to choose how to generate inputs for each case and which operation to run on the CPU to calculate the reference // results for the generated inputs. const OperationManager::CaseMap OperationManager::kFunctionsMap = { { { BaseType::TYPE_INT, TypeSize::SIZE_8BIT, OperationType::MIN }, { genInt8, minInt8 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_8BIT, OperationType::MAX }, { genInt8, maxInt8 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_8BIT, OperationType::MID }, { genInt8, midInt8 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_16BIT, OperationType::MIN }, { genInt16, minInt16 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_16BIT, OperationType::MAX }, { genInt16, maxInt16 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_16BIT, OperationType::MID }, { genInt16, midInt16 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_32BIT, OperationType::MIN }, { genInt32, minInt32 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_32BIT, OperationType::MAX }, { genInt32, maxInt32 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_32BIT, OperationType::MID }, { genInt32, midInt32 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_64BIT, OperationType::MIN }, { genInt64, minInt64 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_64BIT, OperationType::MAX }, { genInt64, maxInt64 } }, { { BaseType::TYPE_INT, TypeSize::SIZE_64BIT, OperationType::MID }, { genInt64, midInt64 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_8BIT, OperationType::MIN }, { genUint8, minUint8 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_8BIT, OperationType::MAX }, { genUint8, maxUint8 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_8BIT, OperationType::MID }, { genUint8, midUint8 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_16BIT, OperationType::MIN }, { genUint16, minUint16 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_16BIT, OperationType::MAX }, { genUint16, maxUint16 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_16BIT, OperationType::MID }, { genUint16, midUint16 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_32BIT, OperationType::MIN }, { genUint32, minUint32 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_32BIT, OperationType::MAX }, { genUint32, maxUint32 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_32BIT, OperationType::MID }, { genUint32, midUint32 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_64BIT, OperationType::MIN }, { genUint64, minUint64 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_64BIT, OperationType::MAX }, { genUint64, maxUint64 } }, { { BaseType::TYPE_UINT, TypeSize::SIZE_64BIT, OperationType::MID }, { genUint64, midUint64 } }, { { BaseType::TYPE_FLOAT, TypeSize::SIZE_16BIT, OperationType::MIN }, { genFloat16, minFloat16 } }, { { BaseType::TYPE_FLOAT, TypeSize::SIZE_16BIT, OperationType::MAX }, { genFloat16, maxFloat16 } }, { { BaseType::TYPE_FLOAT, TypeSize::SIZE_16BIT, OperationType::MID }, { genFloat16, midFloat16 } }, { { BaseType::TYPE_FLOAT, TypeSize::SIZE_32BIT, OperationType::MIN }, { genFloat32, minFloat32 } }, { { BaseType::TYPE_FLOAT, TypeSize::SIZE_32BIT, OperationType::MAX }, { genFloat32, maxFloat32 } }, { { BaseType::TYPE_FLOAT, TypeSize::SIZE_32BIT, OperationType::MID }, { genFloat32, midFloat32 } }, { { BaseType::TYPE_FLOAT, TypeSize::SIZE_64BIT, OperationType::MIN }, { genFloat64, minFloat64 } }, { { BaseType::TYPE_FLOAT, TypeSize::SIZE_64BIT, OperationType::MAX }, { genFloat64, maxFloat64 } }, { { BaseType::TYPE_FLOAT, TypeSize::SIZE_64BIT, OperationType::MID }, { genFloat64, midFloat64 } }, }; OperationManager::OperationManager (const TestParams& params) : m_chosenGenerator {nullptr} , m_chosenOperation {nullptr} , m_random {params.randomSeed} , m_operandSize {params.operandSize()} , m_numComponents {params.numComponents()} , m_componentSize {params.componentSize()} { // Choose generator and CPU operation from the map. const Case paramCase{params.baseType, params.typeSize, params.operation}; const auto iter = kFunctionsMap.find(paramCase); DE_ASSERT(iter != kFunctionsMap.end()); m_chosenGenerator = iter->second.first; m_chosenOperation = iter->second.second; } // See TrinaryMinMaxCase::initPrograms for a description of the input buffer format. // Generates inputs with the chosen generator. void OperationManager::genInputBuffer (void* bufferPtr, deUint32 numOperations) { const deUint32 numOperands = numOperations * 3u; char* byteBuffer = reinterpret_cast(bufferPtr); for (deUint32 opIdx = 0u; opIdx < numOperands; ++opIdx) { char* compPtr = byteBuffer; for (deUint32 compIdx = 0u; compIdx < m_numComponents; ++compIdx) { m_chosenGenerator(m_random, reinterpret_cast(compPtr)); compPtr += m_componentSize; } byteBuffer += m_operandSize; } } // See TrinaryMinMaxCase::initPrograms for a description of the input and output buffer formats. // Calculates reference results on the CPU using the chosen operation and the input buffer. void OperationManager::calculateResult (void* referenceBuffer, void* inputBuffer, deUint32 numOperations) { char* outputByte = reinterpret_cast(referenceBuffer); char* inputByte = reinterpret_cast(inputBuffer); for (deUint32 opIdx = 0u; opIdx < numOperations; ++opIdx) { char* res = outputByte; char* op1 = inputByte; char* op2 = inputByte + m_operandSize; char* op3 = inputByte + m_operandSize * 2u; for (deUint32 compIdx = 0u; compIdx < m_numComponents; ++compIdx) { m_chosenOperation( reinterpret_cast(res), reinterpret_cast(op1), reinterpret_cast(op2), reinterpret_cast(op3)); res += m_componentSize; op1 += m_componentSize; op2 += m_componentSize; op3 += m_componentSize; } outputByte += m_operandSize; inputByte += m_operandSize * 3u; } } // See TrinaryMinMaxCase::initPrograms for a description of the output buffer format. OperationManager::ComparisonError OperationManager::compareResults (void* referenceBuffer, void* resultsBuffer, deUint32 numOperations) { char* referenceBytes = reinterpret_cast(referenceBuffer); char* resultsBytes = reinterpret_cast(resultsBuffer); for (deUint32 opIdx = 0u; opIdx < numOperations; ++opIdx) { char *refCompBytes = referenceBytes; char *resCompBytes = resultsBytes; for (deUint32 compIdx = 0u; compIdx < m_numComponents; ++compIdx) { if (deMemCmp(refCompBytes, resCompBytes, m_componentSize) != 0) return tcu::just(OperationComponent(opIdx, compIdx)); refCompBytes += m_componentSize; resCompBytes += m_componentSize; } referenceBytes += m_operandSize; resultsBytes += m_operandSize; } return tcu::Nothing; } class TrinaryMinMaxCase : public vkt::TestCase { public: using ReplacementsMap = std::map; TrinaryMinMaxCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params); virtual ~TrinaryMinMaxCase (void) {} virtual void initPrograms (vk::SourceCollections& programCollection) const; virtual TestInstance* createInstance (Context& context) const; virtual void checkSupport (Context& context) const; ReplacementsMap getSpirVReplacements (void) const; static const deUint32 kArraySize; private: TestParams m_params; }; const deUint32 TrinaryMinMaxCase::kArraySize = 100u; class TrinaryMinMaxInstance : public vkt::TestInstance { public: TrinaryMinMaxInstance (Context& context, const TestParams& params); virtual ~TrinaryMinMaxInstance (void) {} virtual tcu::TestStatus iterate (void); private: TestParams m_params; }; TrinaryMinMaxCase::TrinaryMinMaxCase (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParams& params) : vkt::TestCase (testCtx, name, description) , m_params (params) {} TestInstance* TrinaryMinMaxCase::createInstance (Context& context) const { return new TrinaryMinMaxInstance{context, m_params}; } void TrinaryMinMaxCase::checkSupport (Context& context) const { // These are always required. context.requireInstanceFunctionality("VK_KHR_get_physical_device_properties2"); context.requireDeviceFunctionality("VK_KHR_storage_buffer_storage_class"); context.requireDeviceFunctionality("VK_AMD_shader_trinary_minmax"); const auto devFeatures = context.getDeviceFeatures(); const auto storage16BitFeatures = context.get16BitStorageFeatures(); const auto storage8BitFeatures = context.get8BitStorageFeatures(); const auto shaderFeatures = context.getShaderFloat16Int8Features(); // Storage features. if (m_params.typeSize == TypeSize::SIZE_8BIT) { // We will be using 8-bit types in storage buffers. context.requireDeviceFunctionality("VK_KHR_8bit_storage"); if (!storage8BitFeatures.storageBuffer8BitAccess) TCU_THROW(NotSupportedError, "8-bit storage buffer access not supported"); } else if (m_params.typeSize == TypeSize::SIZE_16BIT) { // We will be using 16-bit types in storage buffers. context.requireDeviceFunctionality("VK_KHR_16bit_storage"); if (!storage16BitFeatures.storageBuffer16BitAccess) TCU_THROW(NotSupportedError, "16-bit storage buffer access not supported"); } // Shader type features. if (m_params.baseType == BaseType::TYPE_INT || m_params.baseType == BaseType::TYPE_UINT) { if (m_params.typeSize == TypeSize::SIZE_8BIT && !shaderFeatures.shaderInt8) TCU_THROW(NotSupportedError, "8-bit integers not supported in shaders"); else if (m_params.typeSize == TypeSize::SIZE_16BIT && !devFeatures.shaderInt16) TCU_THROW(NotSupportedError, "16-bit integers not supported in shaders"); else if (m_params.typeSize == TypeSize::SIZE_64BIT && !devFeatures.shaderInt64) TCU_THROW(NotSupportedError, "64-bit integers not supported in shaders"); } else // BaseType::TYPE_FLOAT { DE_ASSERT(m_params.typeSize != TypeSize::SIZE_8BIT); if (m_params.typeSize == TypeSize::SIZE_16BIT && !shaderFeatures.shaderFloat16) TCU_THROW(NotSupportedError, "16-bit floats not supported in shaders"); else if (m_params.typeSize == TypeSize::SIZE_64BIT && !devFeatures.shaderFloat64) TCU_THROW(NotSupportedError, "64-bit floats not supported in shaders"); } } TrinaryMinMaxCase::ReplacementsMap TrinaryMinMaxCase::getSpirVReplacements (void) const { ReplacementsMap replacements; // Capabilities and extensions. if (m_params.baseType == BaseType::TYPE_INT || m_params.baseType == BaseType::TYPE_UINT) { if (m_params.typeSize == TypeSize::SIZE_8BIT) replacements["CAPABILITIES"] += "OpCapability Int8\n"; else if (m_params.typeSize == TypeSize::SIZE_16BIT) replacements["CAPABILITIES"] += "OpCapability Int16\n"; else if (m_params.typeSize == TypeSize::SIZE_64BIT) replacements["CAPABILITIES"] += "OpCapability Int64\n"; } else // BaseType::TYPE_FLOAT { if (m_params.typeSize == TypeSize::SIZE_16BIT) replacements["CAPABILITIES"] += "OpCapability Float16\n"; else if (m_params.typeSize == TypeSize::SIZE_64BIT) replacements["CAPABILITIES"] += "OpCapability Float64\n"; } if (m_params.typeSize == TypeSize::SIZE_8BIT) { replacements["CAPABILITIES"] += "OpCapability StorageBuffer8BitAccess\n"; replacements["EXTENSIONS"] += "OpExtension \"SPV_KHR_8bit_storage\"\n"; } else if (m_params.typeSize == TypeSize::SIZE_16BIT) { replacements["CAPABILITIES"] += "OpCapability StorageBuffer16BitAccess\n"; replacements["EXTENSIONS"] += "OpExtension \"SPV_KHR_16bit_storage\"\n"; } // Operand size in bytes. const deUint32 opSize = m_params.operandSize(); replacements["OPERAND_SIZE"] = de::toString(opSize); replacements["OPERAND_SIZE_2TIMES"] = de::toString(opSize * 2u); replacements["OPERAND_SIZE_3TIMES"] = de::toString(opSize * 3u); // Array size. replacements["ARRAY_SIZE"] = de::toString(kArraySize); // Types and operand type: define the base integer or float type and the vector type if needed, then set the operand type replacement. const std::string vecSize = de::toString(m_params.numComponents()); const std::string bitSize = de::toString(m_params.componentSize() * 8u); if (m_params.baseType == BaseType::TYPE_INT || m_params.baseType == BaseType::TYPE_UINT) { const std::string signBit = (m_params.baseType == BaseType::TYPE_INT ? "1" : "0"); const std::string typePrefix = (m_params.baseType == BaseType::TYPE_UINT ? "u" : ""); std::string baseTypeName; // 32-bit integers are already defined in the default shader text. if (m_params.typeSize != TypeSize::SIZE_32BIT) { baseTypeName = typePrefix + "int" + bitSize + "_t"; replacements["TYPES"] += "%" + baseTypeName + " = OpTypeInt " + bitSize + " " + signBit + "\n"; } else { baseTypeName = typePrefix + "int"; } if (m_params.aggregation == AggregationType::SCALAR) { replacements["OPERAND_TYPE"] = "%" + baseTypeName; } else { const std::string typeName = "%v" + vecSize + baseTypeName; // %v3uint is already defined in the default shader text. if (m_params.baseType != BaseType::TYPE_UINT || m_params.typeSize != TypeSize::SIZE_32BIT || m_params.aggregation != AggregationType::VEC3) { replacements["TYPES"] += typeName + " = OpTypeVector %" + baseTypeName + " " + vecSize + "\n"; } replacements["OPERAND_TYPE"] = typeName; } } else // BaseType::TYPE_FLOAT { const std::string baseTypeName = "float" + bitSize + "_t"; replacements["TYPES"] += "%" + baseTypeName + " = OpTypeFloat " + bitSize + "\n"; if (m_params.aggregation == AggregationType::SCALAR) { replacements["OPERAND_TYPE"] = "%" + baseTypeName; } else { const std::string typeName = "%v" + vecSize + baseTypeName; replacements["TYPES"] += typeName + " = OpTypeVector %" + baseTypeName + " " + vecSize + "\n"; replacements["OPERAND_TYPE"] = typeName; } } // Operation name. const static std::vector opTypeStr = { "Min", "Max", "Mid" }; const static std::vector opPrefix = { "S", "U", "F" }; replacements["OPERATION_NAME"] = opPrefix[static_cast(m_params.baseType)] + opTypeStr[static_cast(m_params.operation)] + "3AMD"; return replacements; } void TrinaryMinMaxCase::initPrograms (vk::SourceCollections& programCollection) const { // The shader below uses an input buffer at set 0 binding 0 and an output buffer at set 0 binding 1. Their structure is similar // to the code below: // // struct Operands { // op1; // op2; // op3; // }; // // layout (set=0, binding=0, std430) buffer InputBlock { // Operands operands[]; // }; // // layout (set=0, binding=1, std430) buffer OutputBlock { // result[]; // }; // // Where can be int8_t, uint32_t, float, etc. So in the input buffer the operands are "grouped" per operation and can // have several components each and the output buffer contains an array of results, one per trio of input operands. std::ostringstream shaderStr; shaderStr << "; SPIR-V\n" << "; Version: 1.5\n" << " OpCapability Shader\n" << "${CAPABILITIES:opt}" << " OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" << " OpExtension \"SPV_AMD_shader_trinary_minmax\"\n" << "${EXTENSIONS:opt}" << " %std450 = OpExtInstImport \"GLSL.std.450\"\n" << " %trinary = OpExtInstImport \"SPV_AMD_shader_trinary_minmax\"\n" << " OpMemoryModel Logical GLSL450\n" << " OpEntryPoint GLCompute %main \"main\" %gl_GlobalInvocationID %output_buffer %input_buffer\n" << " OpExecutionMode %main LocalSize 1 1 1\n" << " OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n" << " OpDecorate %results_array_t ArrayStride ${OPERAND_SIZE}\n" << " OpMemberDecorate %OutputBlock 0 Offset 0\n" << " OpDecorate %OutputBlock Block\n" << " OpDecorate %output_buffer DescriptorSet 0\n" << " OpDecorate %output_buffer Binding 1\n" << " OpMemberDecorate %Operands 0 Offset 0\n" << " OpMemberDecorate %Operands 1 Offset ${OPERAND_SIZE}\n" << " OpMemberDecorate %Operands 2 Offset ${OPERAND_SIZE_2TIMES}\n" << " OpDecorate %_arr_Operands_arraysize ArrayStride ${OPERAND_SIZE_3TIMES}\n" << " OpMemberDecorate %InputBlock 0 Offset 0\n" << " OpDecorate %InputBlock Block\n" << " OpDecorate %input_buffer DescriptorSet 0\n" << " OpDecorate %input_buffer Binding 0\n" << " OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n" << " %void = OpTypeVoid\n" << " %voidfunc = OpTypeFunction %void\n" << " %int = OpTypeInt 32 1\n" << " %uint = OpTypeInt 32 0\n" << " %v3uint = OpTypeVector %uint 3\n" << "${TYPES:opt}" << " %int_0 = OpConstant %int 0\n" << " %int_1 = OpConstant %int 1\n" << " %int_2 = OpConstant %int 2\n" << " %uint_1 = OpConstant %uint 1\n" << " %uint_0 = OpConstant %uint 0\n" << " %arraysize = OpConstant %uint ${ARRAY_SIZE}\n" << " %_ptr_Function_uint = OpTypePointer Function %uint\n" << " %_ptr_Input_v3uint = OpTypePointer Input %v3uint\n" << " %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n" << " %_ptr_Input_uint = OpTypePointer Input %uint\n" << " %results_array_t = OpTypeArray ${OPERAND_TYPE} %arraysize\n" << " %Operands = OpTypeStruct ${OPERAND_TYPE} ${OPERAND_TYPE} ${OPERAND_TYPE}\n" << " %_arr_Operands_arraysize = OpTypeArray %Operands %arraysize\n" << " %OutputBlock = OpTypeStruct %results_array_t\n" << " %InputBlock = OpTypeStruct %_arr_Operands_arraysize\n" << "%_ptr_Uniform_OutputBlock = OpTypePointer StorageBuffer %OutputBlock\n" << " %_ptr_Uniform_InputBlock = OpTypePointer StorageBuffer %InputBlock\n" << " %output_buffer = OpVariable %_ptr_Uniform_OutputBlock StorageBuffer\n" << " %input_buffer = OpVariable %_ptr_Uniform_InputBlock StorageBuffer\n" << " %optype_ptr = OpTypePointer StorageBuffer ${OPERAND_TYPE}\n" << " %gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1\n" << " %main = OpFunction %void None %voidfunc\n" << " %mainlabel = OpLabel\n" << " %gidxptr = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n" << " %idx = OpLoad %uint %gidxptr\n" << " %op1ptr = OpAccessChain %optype_ptr %input_buffer %int_0 %idx %int_0\n" << " %op1 = OpLoad ${OPERAND_TYPE} %op1ptr\n" << " %op2ptr = OpAccessChain %optype_ptr %input_buffer %int_0 %idx %int_1\n" << " %op2 = OpLoad ${OPERAND_TYPE} %op2ptr\n" << " %op3ptr = OpAccessChain %optype_ptr %input_buffer %int_0 %idx %int_2\n" << " %op3 = OpLoad ${OPERAND_TYPE} %op3ptr\n" << " %result = OpExtInst ${OPERAND_TYPE} %trinary ${OPERATION_NAME} %op1 %op2 %op3\n" << " %resultptr = OpAccessChain %optype_ptr %output_buffer %int_0 %idx\n" << " OpStore %resultptr %result\n" << " OpReturn\n" << " OpFunctionEnd\n" ; const tcu::StringTemplate shaderTemplate {shaderStr.str()}; const vk::SpirVAsmBuildOptions buildOptions { VK_MAKE_API_VERSION(0, 1, 2, 0), vk::SPIRV_VERSION_1_5}; programCollection.spirvAsmSources.add("comp", &buildOptions) << shaderTemplate.specialize(getSpirVReplacements()); } TrinaryMinMaxInstance::TrinaryMinMaxInstance (Context& context, const TestParams& params) : vkt::TestInstance (context) , m_params (params) {} tcu::TestStatus TrinaryMinMaxInstance::iterate (void) { const auto& vkd = m_context.getDeviceInterface(); const auto device = m_context.getDevice(); auto& allocator = m_context.getDefaultAllocator(); const auto queue = m_context.getUniversalQueue(); const auto queueIndex = m_context.getUniversalQueueFamilyIndex(); constexpr auto kNumOperations = TrinaryMinMaxCase::kArraySize; const vk::VkDeviceSize kInputBufferSize = static_cast(kNumOperations * 3u * m_params.operandSize()); const vk::VkDeviceSize kOutputBufferSize = static_cast(kNumOperations * m_params.operandSize()); // Single output per operation. // Create input, output and reference buffers. auto inputBufferInfo = vk::makeBufferCreateInfo(kInputBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); auto outputBufferInfo = vk::makeBufferCreateInfo(kOutputBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); vk::BufferWithMemory inputBuffer {vkd, device, allocator, inputBufferInfo, vk::MemoryRequirement::HostVisible}; vk::BufferWithMemory outputBuffer {vkd, device, allocator, outputBufferInfo, vk::MemoryRequirement::HostVisible}; std::unique_ptr referenceBuffer {new char[static_cast(kOutputBufferSize)]}; // Fill buffers with initial contents. auto& inputAlloc = inputBuffer.getAllocation(); auto& outputAlloc = outputBuffer.getAllocation(); void* inputBufferPtr = static_cast(inputAlloc.getHostPtr()) + inputAlloc.getOffset(); void* outputBufferPtr = static_cast(outputAlloc.getHostPtr()) + outputAlloc.getOffset(); void* referenceBufferPtr = referenceBuffer.get(); deMemset(inputBufferPtr, 0, static_cast(kInputBufferSize)); deMemset(outputBufferPtr, 0, static_cast(kOutputBufferSize)); deMemset(referenceBufferPtr, 0, static_cast(kOutputBufferSize)); // Generate input buffer and calculate reference results. OperationManager opMan{m_params}; opMan.genInputBuffer(inputBufferPtr, kNumOperations); opMan.calculateResult(referenceBufferPtr, inputBufferPtr, kNumOperations); // Flush buffer memory before starting. vk::flushAlloc(vkd, device, inputAlloc); vk::flushAlloc(vkd, device, outputAlloc); // Descriptor set layout. vk::DescriptorSetLayoutBuilder layoutBuilder; layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT); layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT); auto descriptorSetLayout = layoutBuilder.build(vkd, device); // Descriptor pool. vk::DescriptorPoolBuilder poolBuilder; poolBuilder.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u); auto descriptorPool = poolBuilder.build(vkd, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); // Descriptor set. const auto descriptorSet = vk::makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get()); // Update descriptor set using the buffers. const auto inputBufferDescriptorInfo = vk::makeDescriptorBufferInfo(inputBuffer.get(), 0ull, VK_WHOLE_SIZE); const auto outputBufferDescriptorInfo = vk::makeDescriptorBufferInfo(outputBuffer.get(), 0ull, VK_WHOLE_SIZE); vk::DescriptorSetUpdateBuilder updateBuilder; updateBuilder.writeSingle(descriptorSet.get(), vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferDescriptorInfo); updateBuilder.writeSingle(descriptorSet.get(), vk::DescriptorSetUpdateBuilder::Location::binding(1u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo); updateBuilder.update(vkd, device); // Create compute pipeline. auto shaderModule = vk::createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0u); auto pipelineLayout = vk::makePipelineLayout(vkd, device, descriptorSetLayout.get()); const vk::VkComputePipelineCreateInfo pipelineCreateInfo = { vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, nullptr, 0u, // flags { // compute shader vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType; nullptr, // const void* pNext; 0u, // VkPipelineShaderStageCreateFlags flags; vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage; shaderModule.get(), // VkShaderModule module; "main", // const char* pName; nullptr, // const VkSpecializationInfo* pSpecializationInfo; }, pipelineLayout.get(), // layout DE_NULL, // basePipelineHandle 0, // basePipelineIndex }; auto pipeline = vk::createComputePipeline(vkd, device, DE_NULL, &pipelineCreateInfo); // Synchronization barriers. auto inputBufferHostToDevBarrier = vk::makeBufferMemoryBarrier(vk::VK_ACCESS_HOST_WRITE_BIT, vk::VK_ACCESS_SHADER_READ_BIT, inputBuffer.get(), 0ull, VK_WHOLE_SIZE); auto outputBufferHostToDevBarrier = vk::makeBufferMemoryBarrier(vk::VK_ACCESS_HOST_WRITE_BIT, vk::VK_ACCESS_SHADER_WRITE_BIT, outputBuffer.get(), 0ull, VK_WHOLE_SIZE); auto outputBufferDevToHostBarrier = vk::makeBufferMemoryBarrier(vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, outputBuffer.get(), 0ull, VK_WHOLE_SIZE); // Command buffer. auto cmdPool = vk::makeCommandPool(vkd, device, queueIndex); auto cmdBufferPtr = vk::allocateCommandBuffer(vkd, device, cmdPool.get(), vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY); auto cmdBuffer = cmdBufferPtr.get(); // Record and submit commands. vk::beginCommandBuffer(vkd, cmdBuffer); vkd.cmdBindPipeline(cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.get()); vkd.cmdBindDescriptorSets(cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout.get(), 0, 1u, &descriptorSet.get(), 0u, nullptr); vkd.cmdPipelineBarrier(cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 1u, &inputBufferHostToDevBarrier, 0u, nullptr); vkd.cmdPipelineBarrier(cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 1u, &outputBufferHostToDevBarrier, 0u, nullptr); vkd.cmdDispatch(cmdBuffer, kNumOperations, 1u, 1u); vkd.cmdPipelineBarrier(cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, nullptr, 1u, &outputBufferDevToHostBarrier, 0u, nullptr); vk::endCommandBuffer(vkd, cmdBuffer); vk::submitCommandsAndWait(vkd, device, queue, cmdBuffer); // Verify output buffer contents. vk::invalidateAlloc(vkd, device, outputAlloc); const auto error = opMan.compareResults(referenceBufferPtr, outputBufferPtr, kNumOperations); if (!error) return tcu::TestStatus::pass("Pass"); std::ostringstream msg; msg << "Value mismatch at operation " << error.get().first << " in component " << error.get().second; return tcu::TestStatus::fail(msg.str()); } } // anonymous tcu::TestCaseGroup* createTrinaryMinMaxGroup (tcu::TestContext& testCtx) { deUint32 seed = 0xFEE768FCu; de::MovePtr group{new tcu::TestCaseGroup{testCtx, "amd_trinary_minmax", "Tests for VK_AMD_trinary_minmax operations"}}; static const std::vector> operationTypes = { { OperationType::MIN, "min3" }, { OperationType::MAX, "max3" }, { OperationType::MID, "mid3" }, }; static const std::vector> baseTypes = { { BaseType::TYPE_INT, "i" }, { BaseType::TYPE_UINT, "u" }, { BaseType::TYPE_FLOAT, "f" }, }; static const std::vector> typeSizes = { { TypeSize::SIZE_8BIT, "8" }, { TypeSize::SIZE_16BIT, "16" }, { TypeSize::SIZE_32BIT, "32" }, { TypeSize::SIZE_64BIT, "64" }, }; static const std::vector> aggregationTypes = { { AggregationType::SCALAR, "scalar" }, { AggregationType::VEC2, "vec2" }, { AggregationType::VEC3, "vec3" }, { AggregationType::VEC4, "vec4" }, }; for (const auto& opType : operationTypes) { const std::string opDesc = "Tests for " + opType.second + " operation"; de::MovePtr opGroup{new tcu::TestCaseGroup{testCtx, opType.second.c_str(), opDesc.c_str()}}; for (const auto& baseType : baseTypes) for (const auto& typeSize : typeSizes) { // There are no 8-bit floats. if (baseType.first == BaseType::TYPE_FLOAT && typeSize.first == TypeSize::SIZE_8BIT) continue; const std::string typeName = baseType.second + typeSize.second; const std::string typeDesc = "Tests using " + typeName + " data"; de::MovePtr typeGroup{new tcu::TestCaseGroup{testCtx, typeName.c_str(), typeDesc.c_str()}}; for (const auto& aggType : aggregationTypes) { const TestParams params = { opType.first, // OperationType operation; baseType.first, // BaseType baseType; typeSize.first, // TypeSize typeSize; aggType.first, // AggregationType aggregation; seed++, // deUint32 randomSeed; }; typeGroup->addChild(new TrinaryMinMaxCase{testCtx, aggType.second, "", params}); } opGroup->addChild(typeGroup.release()); } group->addChild(opGroup.release()); } return group.release(); } } // SpirVAssembly } // vkt