1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "dxil_container.h" 25#include "dxil_module.h" 26 27#include "util/u_debug.h" 28 29#include <assert.h> 30 31const uint32_t DXIL_DXBC = DXIL_FOURCC('D', 'X', 'B', 'C'); 32 33void 34dxil_container_init(struct dxil_container *c) 35{ 36 blob_init(&c->parts); 37 c->num_parts = 0; 38} 39 40void 41dxil_container_finish(struct dxil_container *c) 42{ 43 blob_finish(&c->parts); 44} 45 46static bool 47add_part_header(struct dxil_container *c, 48 enum dxil_part_fourcc fourcc, 49 uint32_t part_size) 50{ 51 assert(c->parts.size < UINT_MAX); 52 unsigned offset = (unsigned)c->parts.size; 53 if (!blob_write_bytes(&c->parts, &fourcc, sizeof(fourcc)) || 54 !blob_write_bytes(&c->parts, &part_size, sizeof(part_size))) 55 return false; 56 57 assert(c->num_parts < DXIL_MAX_PARTS); 58 c->part_offsets[c->num_parts++] = offset; 59 return true; 60} 61 62static bool 63add_part(struct dxil_container *c, 64 enum dxil_part_fourcc fourcc, 65 const void *part_data, uint32_t part_size) 66{ 67 return add_part_header(c, fourcc, part_size) && 68 blob_write_bytes(&c->parts, part_data, part_size); 69} 70 71bool 72dxil_container_add_features(struct dxil_container *c, 73 const struct dxil_features *features) 74{ 75 /* DXIL feature info is a bitfield packed into a uint64_t. */ 76 static_assert(sizeof(struct dxil_features) <= sizeof(uint64_t), 77 "Expected dxil_features to fit into a uint64_t"); 78 uint64_t bits = 0; 79 memcpy(&bits, features, sizeof(struct dxil_features)); 80 return add_part(c, DXIL_SFI0, &bits, sizeof(uint64_t)); 81} 82 83typedef struct { 84 struct { 85 const char *name; 86 uint32_t offset; 87 } entries[DXIL_SHADER_MAX_IO_ROWS]; 88 uint32_t num_entries; 89} name_offset_cache_t; 90 91static uint32_t 92get_semantic_name_offset(name_offset_cache_t *cache, const char *name, 93 struct _mesa_string_buffer *buf, uint32_t buf_offset, 94 bool validator_7) 95{ 96 uint32_t offset = buf->length + buf_offset; 97 98 /* DXC doesn't de-duplicate arbitrary semantic names until validator 1.7, only SVs. */ 99 if (validator_7 || strncmp(name, "SV_", 3) == 0) { 100 /* consider replacing this with a binary search using rb_tree */ 101 for (unsigned i = 0; i < cache->num_entries; ++i) { 102 if (!strcmp(name, cache->entries[i].name)) 103 return cache->entries[i].offset; 104 } 105 106 cache->entries[cache->num_entries].name = name; 107 cache->entries[cache->num_entries].offset = offset; 108 ++cache->num_entries; 109 } 110 _mesa_string_buffer_append_len(buf, name, strlen(name) + 1); 111 112 return offset; 113} 114 115static uint32_t 116collect_semantic_names(unsigned num_records, 117 struct dxil_signature_record *io_data, 118 struct _mesa_string_buffer *buf, 119 uint32_t buf_offset, 120 bool validator_7) 121{ 122 name_offset_cache_t cache; 123 cache.num_entries = 0; 124 125 for (unsigned i = 0; i < num_records; ++i) { 126 struct dxil_signature_record *io = &io_data[i]; 127 uint32_t offset = get_semantic_name_offset(&cache, io->name, buf, buf_offset, validator_7); 128 for (unsigned j = 0; j < io->num_elements; ++j) 129 io->elements[j].semantic_name_offset = offset; 130 } 131 if (validator_7 && buf->length % sizeof(uint32_t) != 0) { 132 unsigned padding_to_add = sizeof(uint32_t) - (buf->length % sizeof(uint32_t)); 133 char padding[sizeof(uint32_t)] = { 0 }; 134 _mesa_string_buffer_append_len(buf, padding, padding_to_add); 135 } 136 return buf_offset + buf->length; 137} 138 139bool 140dxil_container_add_io_signature(struct dxil_container *c, 141 enum dxil_part_fourcc part, 142 unsigned num_records, 143 struct dxil_signature_record *io_data, 144 bool validator_7) 145{ 146 struct { 147 uint32_t param_count; 148 uint32_t param_offset; 149 } header; 150 header.param_count = 0; 151 uint32_t fixed_size = sizeof(header); 152 header.param_offset = fixed_size; 153 154 bool retval = true; 155 156 for (unsigned i = 0; i < num_records; ++i) { 157 /* TODO: 158 * - Here we need to check whether the value is actually part of the 159 * signature */ 160 fixed_size += sizeof(struct dxil_signature_element) * io_data[i].num_elements; 161 header.param_count += io_data[i].num_elements; 162 } 163 164 struct _mesa_string_buffer *names = 165 _mesa_string_buffer_create(NULL, 1024); 166 167 uint32_t last_offset = collect_semantic_names(num_records, io_data, 168 names, fixed_size, 169 validator_7); 170 171 172 if (!add_part_header(c, part, last_offset) || 173 !blob_write_bytes(&c->parts, &header, sizeof(header))) { 174 retval = false; 175 goto cleanup; 176 } 177 178 /* write all parts */ 179 for (unsigned i = 0; i < num_records; ++i) 180 for (unsigned j = 0; j < io_data[i].num_elements; ++j) { 181 if (!blob_write_bytes(&c->parts, &io_data[i].elements[j], 182 sizeof(io_data[i].elements[j]))) { 183 retval = false; 184 goto cleanup; 185 } 186 } 187 188 /* write all names */ 189 190 if (!blob_write_bytes(&c->parts, names->buf, names->length)) 191 retval = false; 192 193cleanup: 194 _mesa_string_buffer_destroy(names); 195 return retval; 196} 197 198static uint32_t 199compute_input_output_table_dwords(unsigned input_vectors, unsigned output_vectors) 200{ 201 return ((output_vectors + 7) >> 3) * input_vectors * 4; 202} 203 204bool 205dxil_container_add_state_validation(struct dxil_container *c, 206 const struct dxil_module *m, 207 struct dxil_validation_state *state) 208{ 209 uint32_t psv_size = m->minor_validator >= 6 ? 210 sizeof(struct dxil_psv_runtime_info_2) : 211 sizeof(struct dxil_psv_runtime_info_1); 212 uint32_t resource_bind_info_size = m->minor_validator >= 6 ? 213 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0); 214 uint32_t dxil_pvs_sig_size = sizeof(struct dxil_psv_signature_element); 215 uint32_t resource_count = state->num_resources; 216 217 uint32_t size = psv_size + 2 * sizeof(uint32_t); 218 if (resource_count > 0) { 219 size += sizeof (uint32_t) + 220 resource_bind_info_size * resource_count; 221 } 222 uint32_t string_table_size = (m->sem_string_table->length + 3) & ~3u; 223 size += sizeof(uint32_t) + string_table_size; 224 225 size += sizeof(uint32_t) + m->sem_index_table.size * sizeof(uint32_t); 226 227 if (m->num_sig_inputs || m->num_sig_outputs || m->num_sig_patch_consts) { 228 size += sizeof(uint32_t); 229 } 230 231 size += dxil_pvs_sig_size * m->num_sig_inputs; 232 size += dxil_pvs_sig_size * m->num_sig_outputs; 233 size += dxil_pvs_sig_size * m->num_sig_patch_consts; 234 235 state->state.psv1.sig_input_vectors = (uint8_t)m->num_psv_inputs; 236 237 for (unsigned i = 0; i < 4; ++i) 238 state->state.psv1.sig_output_vectors[i] = (uint8_t)m->num_psv_outputs[i]; 239 240 // TODO: Add viewID records size 241 242 uint32_t dependency_table_size = 0; 243 if (state->state.psv1.sig_input_vectors > 0) { 244 for (unsigned i = 0; i < 4; ++i) { 245 if (state->state.psv1.sig_output_vectors[i] > 0) 246 dependency_table_size += sizeof(uint32_t) * 247 compute_input_output_table_dwords(state->state.psv1.sig_input_vectors, 248 state->state.psv1.sig_output_vectors[i]); 249 } 250 if (state->state.psv1.shader_stage == DXIL_HULL_SHADER && state->state.psv1.sig_patch_const_or_prim_vectors) { 251 dependency_table_size += sizeof(uint32_t) * compute_input_output_table_dwords(state->state.psv1.sig_input_vectors, 252 state->state.psv1.sig_patch_const_or_prim_vectors); 253 } 254 } 255 if (state->state.psv1.shader_stage == DXIL_DOMAIN_SHADER && 256 state->state.psv1.sig_patch_const_or_prim_vectors && 257 state->state.psv1.sig_output_vectors[0]) { 258 dependency_table_size += sizeof(uint32_t) * compute_input_output_table_dwords( 259 state->state.psv1.sig_patch_const_or_prim_vectors, state->state.psv1.sig_output_vectors[0]); 260 } 261 size += dependency_table_size; 262 // TODO: Domain shader table goes here 263 264 if (!add_part_header(c, DXIL_PSV0, size)) 265 return false; 266 267 if (!blob_write_bytes(&c->parts, &psv_size, sizeof(psv_size))) 268 return false; 269 270 if (!blob_write_bytes(&c->parts, &state->state, psv_size)) 271 return false; 272 273 if (!blob_write_bytes(&c->parts, &resource_count, sizeof(resource_count))) 274 return false; 275 276 if (resource_count > 0) { 277 if (!blob_write_bytes(&c->parts, &resource_bind_info_size, sizeof(resource_bind_info_size)) || 278 !blob_write_bytes(&c->parts, state->resources.v0, resource_bind_info_size * state->num_resources)) 279 return false; 280 } 281 282 283 uint32_t fill = 0; 284 if (!blob_write_bytes(&c->parts, &string_table_size, sizeof(string_table_size)) || 285 !blob_write_bytes(&c->parts, m->sem_string_table->buf, m->sem_string_table->length) || 286 !blob_write_bytes(&c->parts, &fill, string_table_size - m->sem_string_table->length)) 287 return false; 288 289 if (!blob_write_bytes(&c->parts, &m->sem_index_table.size, sizeof(uint32_t))) 290 return false; 291 292 if (m->sem_index_table.size > 0) { 293 if (!blob_write_bytes(&c->parts, m->sem_index_table.data, 294 m->sem_index_table.size * sizeof(uint32_t))) 295 return false; 296 } 297 298 if (m->num_sig_inputs || m->num_sig_outputs || m->num_sig_patch_consts) { 299 if (!blob_write_bytes(&c->parts, &dxil_pvs_sig_size, sizeof(dxil_pvs_sig_size))) 300 return false; 301 302 if (!blob_write_bytes(&c->parts, &m->psv_inputs, dxil_pvs_sig_size * m->num_sig_inputs)) 303 return false; 304 305 if (!blob_write_bytes(&c->parts, &m->psv_outputs, dxil_pvs_sig_size * m->num_sig_outputs)) 306 return false; 307 308 if (!blob_write_bytes(&c->parts, &m->psv_patch_consts, dxil_pvs_sig_size * m->num_sig_patch_consts)) 309 return false; 310 } 311 312 // TODO: Handle case when ViewID is used 313 314 // TODO: Handle sig input output dependency table 315 316 for (uint32_t i = 0; i < dependency_table_size; ++i) 317 blob_write_uint8(&c->parts, 0); 318 319 return true; 320} 321 322bool 323dxil_container_add_module(struct dxil_container *c, 324 const struct dxil_module *m) 325{ 326 assert(m->buf.buf_bits == 0); // make sure the module is fully flushed 327 uint32_t version = (m->shader_kind << 16) | 328 (m->major_version << 4) | 329 m->minor_version; 330 uint32_t size = 6 * sizeof(uint32_t) + m->buf.blob.size; 331 assert(size % sizeof(uint32_t) == 0); 332 uint32_t uint32_size = size / sizeof(uint32_t); 333 uint32_t magic = 0x4C495844; 334 uint32_t dxil_version = 1 << 8; // I have no idea... 335 uint32_t bitcode_offset = 16; 336 uint32_t bitcode_size = m->buf.blob.size; 337 338 return add_part_header(c, DXIL_DXIL, size) && 339 blob_write_bytes(&c->parts, &version, sizeof(version)) && 340 blob_write_bytes(&c->parts, &uint32_size, sizeof(uint32_size)) && 341 blob_write_bytes(&c->parts, &magic, sizeof(magic)) && 342 blob_write_bytes(&c->parts, &dxil_version, sizeof(dxil_version)) && 343 blob_write_bytes(&c->parts, &bitcode_offset, sizeof(bitcode_offset)) && 344 blob_write_bytes(&c->parts, &bitcode_size, sizeof(bitcode_size)) && 345 blob_write_bytes(&c->parts, m->buf.blob.data, m->buf.blob.size); 346} 347 348bool 349dxil_container_write(struct dxil_container *c, struct blob *blob) 350{ 351 assert(blob->size == 0); 352 if (!blob_write_bytes(blob, &DXIL_DXBC, sizeof(DXIL_DXBC))) 353 return false; 354 355 const uint8_t unsigned_digest[16] = { 0 }; // null-digest means unsigned 356 if (!blob_write_bytes(blob, unsigned_digest, sizeof(unsigned_digest))) 357 return false; 358 359 uint16_t major_version = 1; 360 uint16_t minor_version = 0; 361 if (!blob_write_bytes(blob, &major_version, sizeof(major_version)) || 362 !blob_write_bytes(blob, &minor_version, sizeof(minor_version))) 363 return false; 364 365 size_t header_size = 32 + 4 * c->num_parts; 366 size_t size = header_size + c->parts.size; 367 assert(size <= UINT32_MAX); 368 uint32_t container_size = (uint32_t)size; 369 if (!blob_write_bytes(blob, &container_size, sizeof(container_size))) 370 return false; 371 372 uint32_t part_offsets[DXIL_MAX_PARTS]; 373 for (int i = 0; i < c->num_parts; ++i) { 374 size_t offset = header_size + c->part_offsets[i]; 375 assert(offset <= UINT32_MAX); 376 part_offsets[i] = (uint32_t)offset; 377 } 378 379 if (!blob_write_bytes(blob, &c->num_parts, sizeof(c->num_parts)) || 380 !blob_write_bytes(blob, part_offsets, sizeof(uint32_t) * c->num_parts) || 381 !blob_write_bytes(blob, c->parts.data, c->parts.size)) 382 return false; 383 384 return true; 385} 386