1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "dxil_container.h"
25#include "dxil_module.h"
26
27#include "util/u_debug.h"
28
29#include <assert.h>
30
31const uint32_t DXIL_DXBC = DXIL_FOURCC('D', 'X', 'B', 'C');
32
33void
34dxil_container_init(struct dxil_container *c)
35{
36   blob_init(&c->parts);
37   c->num_parts = 0;
38}
39
40void
41dxil_container_finish(struct dxil_container *c)
42{
43   blob_finish(&c->parts);
44}
45
46static bool
47add_part_header(struct dxil_container *c,
48                enum dxil_part_fourcc fourcc,
49                uint32_t part_size)
50{
51   assert(c->parts.size < UINT_MAX);
52   unsigned offset = (unsigned)c->parts.size;
53   if (!blob_write_bytes(&c->parts, &fourcc, sizeof(fourcc)) ||
54       !blob_write_bytes(&c->parts, &part_size, sizeof(part_size)))
55      return false;
56
57   assert(c->num_parts < DXIL_MAX_PARTS);
58   c->part_offsets[c->num_parts++] = offset;
59   return true;
60}
61
62static bool
63add_part(struct dxil_container *c,
64         enum dxil_part_fourcc fourcc,
65         const void *part_data, uint32_t part_size)
66{
67   return add_part_header(c, fourcc, part_size) &&
68          blob_write_bytes(&c->parts, part_data, part_size);
69}
70
71bool
72dxil_container_add_features(struct dxil_container *c,
73                            const struct dxil_features *features)
74{
75   /* DXIL feature info is a bitfield packed into a uint64_t. */
76   static_assert(sizeof(struct dxil_features) <= sizeof(uint64_t),
77                 "Expected dxil_features to fit into a uint64_t");
78   uint64_t bits = 0;
79   memcpy(&bits, features, sizeof(struct dxil_features));
80   return add_part(c, DXIL_SFI0, &bits, sizeof(uint64_t));
81}
82
83typedef struct {
84   struct {
85      const char *name;
86      uint32_t offset;
87   } entries[DXIL_SHADER_MAX_IO_ROWS];
88   uint32_t num_entries;
89} name_offset_cache_t;
90
91static uint32_t
92get_semantic_name_offset(name_offset_cache_t *cache, const char *name,
93                         struct _mesa_string_buffer *buf, uint32_t buf_offset,
94                         bool validator_7)
95{
96   uint32_t offset = buf->length + buf_offset;
97
98   /* DXC doesn't de-duplicate arbitrary semantic names until validator 1.7, only SVs. */
99   if (validator_7 || strncmp(name, "SV_", 3) == 0) {
100      /* consider replacing this with a binary search using rb_tree */
101      for (unsigned i = 0; i < cache->num_entries; ++i) {
102         if (!strcmp(name, cache->entries[i].name))
103            return cache->entries[i].offset;
104      }
105
106      cache->entries[cache->num_entries].name = name;
107      cache->entries[cache->num_entries].offset = offset;
108      ++cache->num_entries;
109   }
110   _mesa_string_buffer_append_len(buf, name, strlen(name) + 1);
111
112   return offset;
113}
114
115static uint32_t
116collect_semantic_names(unsigned num_records,
117                       struct dxil_signature_record *io_data,
118                       struct _mesa_string_buffer *buf,
119                       uint32_t buf_offset,
120                       bool validator_7)
121{
122   name_offset_cache_t cache;
123   cache.num_entries = 0;
124
125   for (unsigned i = 0; i < num_records; ++i) {
126      struct dxil_signature_record *io = &io_data[i];
127      uint32_t offset = get_semantic_name_offset(&cache, io->name, buf, buf_offset, validator_7);
128      for (unsigned j = 0; j < io->num_elements; ++j)
129         io->elements[j].semantic_name_offset = offset;
130   }
131   if (validator_7 && buf->length % sizeof(uint32_t) != 0) {
132      unsigned padding_to_add = sizeof(uint32_t) - (buf->length % sizeof(uint32_t));
133      char padding[sizeof(uint32_t)] = { 0 };
134      _mesa_string_buffer_append_len(buf, padding, padding_to_add);
135   }
136   return buf_offset + buf->length;
137}
138
139bool
140dxil_container_add_io_signature(struct dxil_container *c,
141                                enum dxil_part_fourcc part,
142                                unsigned num_records,
143                                struct dxil_signature_record *io_data,
144                                bool validator_7)
145{
146   struct {
147      uint32_t param_count;
148      uint32_t param_offset;
149   } header;
150   header.param_count = 0;
151   uint32_t fixed_size = sizeof(header);
152   header.param_offset = fixed_size;
153
154   bool retval = true;
155
156   for (unsigned i = 0; i < num_records; ++i) {
157      /* TODO:
158       * - Here we need to check whether the value is actually part of the
159       * signature */
160      fixed_size += sizeof(struct dxil_signature_element) * io_data[i].num_elements;
161      header.param_count += io_data[i].num_elements;
162   }
163
164   struct _mesa_string_buffer *names =
165         _mesa_string_buffer_create(NULL, 1024);
166
167   uint32_t last_offset = collect_semantic_names(num_records, io_data,
168                                                 names, fixed_size,
169                                                 validator_7);
170
171
172   if (!add_part_header(c, part, last_offset) ||
173       !blob_write_bytes(&c->parts, &header, sizeof(header))) {
174      retval = false;
175      goto cleanup;
176   }
177
178   /* write all parts */
179   for (unsigned i = 0; i < num_records; ++i)
180      for (unsigned j = 0; j < io_data[i].num_elements; ++j) {
181         if (!blob_write_bytes(&c->parts, &io_data[i].elements[j],
182                              sizeof(io_data[i].elements[j]))) {
183            retval = false;
184            goto cleanup;
185         }
186      }
187
188   /* write all names */
189
190   if (!blob_write_bytes(&c->parts, names->buf, names->length))
191      retval = false;
192
193cleanup:
194   _mesa_string_buffer_destroy(names);
195   return retval;
196}
197
198static uint32_t
199compute_input_output_table_dwords(unsigned input_vectors, unsigned output_vectors)
200{
201   return ((output_vectors + 7) >> 3) * input_vectors * 4;
202}
203
204bool
205dxil_container_add_state_validation(struct dxil_container *c,
206                                    const struct dxil_module *m,
207                                    struct dxil_validation_state *state)
208{
209   uint32_t psv_size = m->minor_validator >= 6 ?
210      sizeof(struct dxil_psv_runtime_info_2) :
211      sizeof(struct dxil_psv_runtime_info_1);
212   uint32_t resource_bind_info_size = m->minor_validator >= 6 ?
213      sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
214   uint32_t dxil_pvs_sig_size = sizeof(struct dxil_psv_signature_element);
215   uint32_t resource_count = state->num_resources;
216
217   uint32_t size = psv_size + 2 * sizeof(uint32_t);
218   if (resource_count > 0) {
219      size += sizeof (uint32_t) +
220              resource_bind_info_size * resource_count;
221   }
222   uint32_t string_table_size = (m->sem_string_table->length + 3) & ~3u;
223   size  += sizeof(uint32_t) + string_table_size;
224
225   size  += sizeof(uint32_t) + m->sem_index_table.size * sizeof(uint32_t);
226
227   if (m->num_sig_inputs || m->num_sig_outputs || m->num_sig_patch_consts) {
228      size  += sizeof(uint32_t);
229   }
230
231   size += dxil_pvs_sig_size * m->num_sig_inputs;
232   size += dxil_pvs_sig_size * m->num_sig_outputs;
233   size += dxil_pvs_sig_size * m->num_sig_patch_consts;
234
235   state->state.psv1.sig_input_vectors = (uint8_t)m->num_psv_inputs;
236
237   for (unsigned i = 0; i < 4; ++i)
238      state->state.psv1.sig_output_vectors[i] = (uint8_t)m->num_psv_outputs[i];
239
240   // TODO: Add viewID records size
241
242   uint32_t dependency_table_size = 0;
243   if (state->state.psv1.sig_input_vectors > 0) {
244      for (unsigned i = 0; i < 4; ++i) {
245         if (state->state.psv1.sig_output_vectors[i] > 0)
246            dependency_table_size += sizeof(uint32_t) *
247            compute_input_output_table_dwords(state->state.psv1.sig_input_vectors,
248               state->state.psv1.sig_output_vectors[i]);
249      }
250      if (state->state.psv1.shader_stage == DXIL_HULL_SHADER && state->state.psv1.sig_patch_const_or_prim_vectors) {
251         dependency_table_size += sizeof(uint32_t) * compute_input_output_table_dwords(state->state.psv1.sig_input_vectors,
252            state->state.psv1.sig_patch_const_or_prim_vectors);
253      }
254   }
255   if (state->state.psv1.shader_stage == DXIL_DOMAIN_SHADER &&
256       state->state.psv1.sig_patch_const_or_prim_vectors &&
257       state->state.psv1.sig_output_vectors[0]) {
258      dependency_table_size += sizeof(uint32_t) * compute_input_output_table_dwords(
259         state->state.psv1.sig_patch_const_or_prim_vectors, state->state.psv1.sig_output_vectors[0]);
260   }
261   size += dependency_table_size;
262   // TODO: Domain shader table goes here
263
264   if (!add_part_header(c, DXIL_PSV0, size))
265      return false;
266
267   if (!blob_write_bytes(&c->parts, &psv_size, sizeof(psv_size)))
268       return false;
269
270   if (!blob_write_bytes(&c->parts, &state->state, psv_size))
271      return false;
272
273   if (!blob_write_bytes(&c->parts, &resource_count, sizeof(resource_count)))
274      return false;
275
276   if (resource_count > 0) {
277      if (!blob_write_bytes(&c->parts, &resource_bind_info_size, sizeof(resource_bind_info_size)) ||
278          !blob_write_bytes(&c->parts, state->resources.v0, resource_bind_info_size * state->num_resources))
279         return false;
280   }
281
282
283   uint32_t fill = 0;
284   if (!blob_write_bytes(&c->parts, &string_table_size, sizeof(string_table_size)) ||
285       !blob_write_bytes(&c->parts, m->sem_string_table->buf, m->sem_string_table->length) ||
286       !blob_write_bytes(&c->parts, &fill, string_table_size - m->sem_string_table->length))
287      return false;
288
289   if (!blob_write_bytes(&c->parts, &m->sem_index_table.size, sizeof(uint32_t)))
290      return false;
291
292   if (m->sem_index_table.size > 0) {
293      if (!blob_write_bytes(&c->parts, m->sem_index_table.data,
294                            m->sem_index_table.size * sizeof(uint32_t)))
295         return false;
296   }
297
298   if (m->num_sig_inputs || m->num_sig_outputs || m->num_sig_patch_consts) {
299      if (!blob_write_bytes(&c->parts, &dxil_pvs_sig_size, sizeof(dxil_pvs_sig_size)))
300         return false;
301
302      if (!blob_write_bytes(&c->parts, &m->psv_inputs, dxil_pvs_sig_size * m->num_sig_inputs))
303         return false;
304
305      if (!blob_write_bytes(&c->parts, &m->psv_outputs, dxil_pvs_sig_size * m->num_sig_outputs))
306         return false;
307
308      if (!blob_write_bytes(&c->parts, &m->psv_patch_consts, dxil_pvs_sig_size * m->num_sig_patch_consts))
309         return false;
310   }
311
312   // TODO: Handle case when ViewID is used
313
314   // TODO: Handle sig input output dependency table
315
316   for (uint32_t i = 0; i < dependency_table_size; ++i)
317      blob_write_uint8(&c->parts, 0);
318
319   return true;
320}
321
322bool
323dxil_container_add_module(struct dxil_container *c,
324                          const struct dxil_module *m)
325{
326   assert(m->buf.buf_bits == 0); // make sure the module is fully flushed
327   uint32_t version = (m->shader_kind << 16) |
328                      (m->major_version << 4) |
329                      m->minor_version;
330   uint32_t size = 6 * sizeof(uint32_t) + m->buf.blob.size;
331   assert(size % sizeof(uint32_t) == 0);
332   uint32_t uint32_size = size / sizeof(uint32_t);
333   uint32_t magic = 0x4C495844;
334   uint32_t dxil_version = 1 << 8; // I have no idea...
335   uint32_t bitcode_offset = 16;
336   uint32_t bitcode_size = m->buf.blob.size;
337
338   return add_part_header(c, DXIL_DXIL, size) &&
339          blob_write_bytes(&c->parts, &version, sizeof(version)) &&
340          blob_write_bytes(&c->parts, &uint32_size, sizeof(uint32_size)) &&
341          blob_write_bytes(&c->parts, &magic, sizeof(magic)) &&
342          blob_write_bytes(&c->parts, &dxil_version, sizeof(dxil_version)) &&
343          blob_write_bytes(&c->parts, &bitcode_offset, sizeof(bitcode_offset)) &&
344          blob_write_bytes(&c->parts, &bitcode_size, sizeof(bitcode_size)) &&
345          blob_write_bytes(&c->parts, m->buf.blob.data, m->buf.blob.size);
346}
347
348bool
349dxil_container_write(struct dxil_container *c, struct blob *blob)
350{
351   assert(blob->size == 0);
352   if (!blob_write_bytes(blob, &DXIL_DXBC, sizeof(DXIL_DXBC)))
353      return false;
354
355   const uint8_t unsigned_digest[16] = { 0 }; // null-digest means unsigned
356   if (!blob_write_bytes(blob, unsigned_digest, sizeof(unsigned_digest)))
357      return false;
358
359   uint16_t major_version = 1;
360   uint16_t minor_version = 0;
361   if (!blob_write_bytes(blob, &major_version, sizeof(major_version)) ||
362       !blob_write_bytes(blob, &minor_version, sizeof(minor_version)))
363      return false;
364
365   size_t header_size = 32 + 4 * c->num_parts;
366   size_t size = header_size + c->parts.size;
367   assert(size <= UINT32_MAX);
368   uint32_t container_size = (uint32_t)size;
369   if (!blob_write_bytes(blob, &container_size, sizeof(container_size)))
370      return false;
371
372   uint32_t part_offsets[DXIL_MAX_PARTS];
373   for (int i = 0; i < c->num_parts; ++i) {
374      size_t offset = header_size + c->part_offsets[i];
375      assert(offset <= UINT32_MAX);
376      part_offsets[i] = (uint32_t)offset;
377   }
378
379   if (!blob_write_bytes(blob, &c->num_parts, sizeof(c->num_parts)) ||
380       !blob_write_bytes(blob, part_offsets, sizeof(uint32_t) * c->num_parts) ||
381       !blob_write_bytes(blob, c->parts.data, c->parts.size))
382      return false;
383
384   return true;
385}
386