1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_scoreboard.cpp
25 *
26 * Gfx12+ hardware lacks the register scoreboard logic that used to guarantee
27 * data coherency between register reads and writes in previous generations.
28 * This lowering pass runs after register allocation in order to make up for
29 * it.
30 *
31 * It works by performing global dataflow analysis in order to determine the
32 * set of potential dependencies of every instruction in the shader, and then
33 * inserts any required SWSB annotations and additional SYNC instructions in
34 * order to guarantee data coherency.
35 *
36 * WARNING - Access of the following (rarely used) ARF registers is not
37 * tracked here, and require the RegDist SWSB annotation to be set
38 * to 1 by the generator in order to avoid data races:
39 *
40 * - sp stack pointer
41 * - sr0 state register
42 * - cr0 control register
43 * - ip instruction pointer
44 * - tm0 timestamp register
45 * - dbg0 debug register
46 * - acc2-9 special accumulator registers on TGL
47 * - mme0-7 math macro extended accumulator registers
48 *
49 * The following ARF registers don't need to be tracked here because data
50 * coherency is still provided transparently by the hardware:
51 *
52 * - f0-1 flag registers
53 * - n0 notification register
54 * - tdr0 thread dependency register
55 */
56
57 #include "brw_fs.h"
58 #include "brw_cfg.h"
59
60 using namespace brw;
61
62 namespace {
63 /**
64 * In-order instruction accounting.
65 * @{
66 */
67
68 /**
69 * Return the RegDist pipeline the hardware will synchronize with if no
70 * pipeline information is provided in the SWSB annotation of an
71 * instruction (e.g. when TGL_PIPE_NONE is specified in tgl_swsb).
72 */
73 tgl_pipe
inferred_sync_pipe(const struct intel_device_info *devinfo, const fs_inst *inst)74 inferred_sync_pipe(const struct intel_device_info *devinfo, const fs_inst *inst)
75 {
76 if (devinfo->verx10 >= 125) {
77 bool has_int_src = false, has_long_src = false;
78
79 if (is_send(inst))
80 return TGL_PIPE_NONE;
81
82 for (unsigned i = 0; i < inst->sources; i++) {
83 if (inst->src[i].file != BAD_FILE &&
84 !inst->is_control_source(i)) {
85 const brw_reg_type t = inst->src[i].type;
86 has_int_src |= !brw_reg_type_is_floating_point(t);
87 has_long_src |= type_sz(t) >= 8;
88 }
89 }
90
91 return has_long_src ? TGL_PIPE_LONG :
92 has_int_src ? TGL_PIPE_INT :
93 TGL_PIPE_FLOAT;
94
95 } else {
96 return TGL_PIPE_FLOAT;
97 }
98 }
99
100 /**
101 * Return the RegDist pipeline that will execute an instruction, or
102 * TGL_PIPE_NONE if the instruction is out-of-order and doesn't use the
103 * RegDist synchronization mechanism.
104 */
105 tgl_pipe
inferred_exec_pipe(const struct intel_device_info *devinfo, const fs_inst *inst)106 inferred_exec_pipe(const struct intel_device_info *devinfo, const fs_inst *inst)
107 {
108 const brw_reg_type t = get_exec_type(inst);
109 const bool is_dword_multiply = !brw_reg_type_is_floating_point(t) &&
110 ((inst->opcode == BRW_OPCODE_MUL &&
111 MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) ||
112 (inst->opcode == BRW_OPCODE_MAD &&
113 MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
114
115 if (is_unordered(inst))
116 return TGL_PIPE_NONE;
117 else if (devinfo->verx10 < 125)
118 return TGL_PIPE_FLOAT;
119 else if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
120 type_sz(t) >= 8)
121 return TGL_PIPE_INT;
122 else if (inst->opcode == SHADER_OPCODE_BROADCAST &&
123 !devinfo->has_64bit_float && type_sz(t) >= 8)
124 return TGL_PIPE_INT;
125 else if (inst->opcode == FS_OPCODE_PACK_HALF_2x16_SPLIT)
126 return TGL_PIPE_FLOAT;
127 else if (type_sz(inst->dst.type) >= 8 || type_sz(t) >= 8 ||
128 is_dword_multiply) {
129 assert(devinfo->has_64bit_float || devinfo->has_64bit_int ||
130 devinfo->has_integer_dword_mul);
131 return TGL_PIPE_LONG;
132 } else if (brw_reg_type_is_floating_point(inst->dst.type))
133 return TGL_PIPE_FLOAT;
134 else
135 return TGL_PIPE_INT;
136 }
137
138 /**
139 * Index of the \p p pipeline counter in the ordered_address vector defined
140 * below.
141 */
142 #define IDX(p) (p >= TGL_PIPE_FLOAT ? unsigned(p - TGL_PIPE_FLOAT) : \
143 (abort(), ~0u))
144
145 /**
146 * Number of in-order hardware instructions for pipeline index \p contained
147 * in this IR instruction. This determines the increment applied to the
148 * RegDist counter calculated for any ordered dependency that crosses this
149 * instruction.
150 */
151 unsigned
ordered_unit(const struct intel_device_info *devinfo, const fs_inst *inst, unsigned p)152 ordered_unit(const struct intel_device_info *devinfo, const fs_inst *inst,
153 unsigned p)
154 {
155 switch (inst->opcode) {
156 case BRW_OPCODE_SYNC:
157 case BRW_OPCODE_DO:
158 case SHADER_OPCODE_UNDEF:
159 case SHADER_OPCODE_HALT_TARGET:
160 case FS_OPCODE_SCHEDULING_FENCE:
161 return 0;
162 default:
163 /* Note that the following is inaccurate for virtual instructions
164 * that expand to more in-order instructions than assumed here, but
165 * that can only lead to suboptimal execution ordering, data
166 * coherency won't be impacted. Providing exact RegDist counts for
167 * each virtual instruction would allow better ALU performance, but
168 * it would require keeping this switch statement in perfect sync
169 * with the generator in order to avoid data corruption. Lesson is
170 * (again) don't use virtual instructions if you want optimal
171 * scheduling.
172 */
173 if (!is_unordered(inst) && (p == IDX(inferred_exec_pipe(devinfo, inst)) ||
174 p == IDX(TGL_PIPE_ALL)))
175 return 1;
176 else
177 return 0;
178 }
179 }
180
181 /**
182 * Type for an instruction counter that increments for in-order
183 * instructions only, arbitrarily denoted 'jp' throughout this lowering
184 * pass in order to distinguish it from the regular instruction counter.
185 * This is represented as a vector with an independent counter for each
186 * asynchronous ALU pipeline in the EU.
187 */
188 struct ordered_address {
189 /**
190 * Construct the ordered address of a dependency known to execute on a
191 * single specified pipeline \p p (unless TGL_PIPE_NONE or TGL_PIPE_ALL
192 * is provided), in which case the vector counter will be initialized
193 * with all components equal to INT_MIN (always satisfied) except for
194 * component IDX(p).
195 */
ordered_address__anon8930::ordered_address196 ordered_address(tgl_pipe p = TGL_PIPE_NONE, int jp0 = INT_MIN) {
197 for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++)
198 jp[q] = (p == TGL_PIPE_NONE || (IDX(p) != q && p != TGL_PIPE_ALL) ?
199 INT_MIN : jp0);
200 }
201
202 int jp[IDX(TGL_PIPE_ALL)];
203
204 friend bool
operator ==__anon8930::ordered_address205 operator==(const ordered_address &jp0, const ordered_address &jp1)
206 {
207 for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++) {
208 if (jp0.jp[p] != jp1.jp[p])
209 return false;
210 }
211
212 return true;
213 }
214 };
215
216 /**
217 * Return true if the specified ordered address is trivially satisfied for
218 * all pipelines except potentially for the specified pipeline \p p.
219 */
220 bool
is_single_pipe(const ordered_address &jp, tgl_pipe p)221 is_single_pipe(const ordered_address &jp, tgl_pipe p)
222 {
223 for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++) {
224 if ((p == TGL_PIPE_NONE || IDX(p) != q) && jp.jp[q] > INT_MIN)
225 return false;
226 }
227
228 return true;
229 }
230
231 /**
232 * Return the number of instructions in the program.
233 */
234 unsigned
num_instructions(const backend_shader *shader)235 num_instructions(const backend_shader *shader)
236 {
237 return shader->cfg->blocks[shader->cfg->num_blocks - 1]->end_ip + 1;
238 }
239
240 /**
241 * Calculate the local ordered_address instruction counter at every
242 * instruction of the shader for subsequent constant-time look-up.
243 */
244 ordered_address *
ordered_inst_addresses(const fs_visitor *shader)245 ordered_inst_addresses(const fs_visitor *shader)
246 {
247 ordered_address *jps = new ordered_address[num_instructions(shader)];
248 ordered_address jp(TGL_PIPE_ALL, 0);
249 unsigned ip = 0;
250
251 foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
252 jps[ip] = jp;
253 for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
254 jp.jp[p] += ordered_unit(shader->devinfo, inst, p);
255 ip++;
256 }
257
258 return jps;
259 }
260
261 /**
262 * Synchronization mode required for data manipulated by in-order
263 * instructions.
264 *
265 * Similar to tgl_sbid_mode, but without SET mode. Defined as a separate
266 * enum for additional type safety. The hardware doesn't provide control
267 * over the synchronization mode for RegDist annotations, this is only used
268 * internally in this pass in order to optimize out redundant read
269 * dependencies where possible.
270 */
271 enum tgl_regdist_mode {
272 TGL_REGDIST_NULL = 0,
273 TGL_REGDIST_SRC = 1,
274 TGL_REGDIST_DST = 2
275 };
276
277 /**
278 * Allow bitwise arithmetic of tgl_regdist_mode enums.
279 */
280 tgl_regdist_mode
operator |(tgl_regdist_mode x, tgl_regdist_mode y)281 operator|(tgl_regdist_mode x, tgl_regdist_mode y)
282 {
283 return tgl_regdist_mode(unsigned(x) | unsigned(y));
284 }
285
286 tgl_regdist_mode
operator &(tgl_regdist_mode x, tgl_regdist_mode y)287 operator&(tgl_regdist_mode x, tgl_regdist_mode y)
288 {
289 return tgl_regdist_mode(unsigned(x) & unsigned(y));
290 }
291
292 tgl_regdist_mode &
operator |=(tgl_regdist_mode &x, tgl_regdist_mode y)293 operator|=(tgl_regdist_mode &x, tgl_regdist_mode y)
294 {
295 return x = x | y;
296 }
297
298 tgl_regdist_mode &
operator &=(tgl_regdist_mode &x, tgl_regdist_mode y)299 operator&=(tgl_regdist_mode &x, tgl_regdist_mode y)
300 {
301 return x = x & y;
302 }
303
304 /** @} */
305
306 /**
307 * Representation of an equivalence relation among the set of unsigned
308 * integers.
309 *
310 * Its initial state is the identity relation '~' such that i ~ j if and
311 * only if i == j for every pair of unsigned integers i and j.
312 */
313 struct equivalence_relation {
equivalence_relation__anon8930::equivalence_relation314 equivalence_relation(unsigned n) : is(new unsigned[n]), n(n)
315 {
316 for (unsigned i = 0; i < n; i++)
317 is[i] = i;
318 }
319
~equivalence_relation__anon8930::equivalence_relation320 ~equivalence_relation()
321 {
322 delete[] is;
323 }
324
325 /**
326 * Return equivalence class index of the specified element. Effectively
327 * this is the numeric value of an arbitrary representative from the
328 * equivalence class.
329 *
330 * Allows the evaluation of the equivalence relation according to the
331 * rule that i ~ j if and only if lookup(i) == lookup(j).
332 */
333 unsigned
lookup__anon8930::equivalence_relation334 lookup(unsigned i) const
335 {
336 if (i < n && is[i] != i)
337 return lookup(is[i]);
338 else
339 return i;
340 }
341
342 /**
343 * Create an array with the results of the lookup() method for
344 * constant-time evaluation.
345 */
346 unsigned *
flatten__anon8930::equivalence_relation347 flatten() const
348 {
349 unsigned *ids = new unsigned[n];
350
351 for (unsigned i = 0; i < n; i++)
352 ids[i] = lookup(i);
353
354 return ids;
355 }
356
357 /**
358 * Mutate the existing equivalence relation minimally by imposing the
359 * additional requirement that i ~ j.
360 *
361 * The algorithm updates the internal representation recursively in
362 * order to guarantee transitivity while preserving the previously
363 * specified equivalence requirements.
364 */
365 unsigned
link__anon8930::equivalence_relation366 link(unsigned i, unsigned j)
367 {
368 const unsigned k = lookup(i);
369 assign(i, k);
370 assign(j, k);
371 return k;
372 }
373
374 private:
375 equivalence_relation(const equivalence_relation &);
376
377 equivalence_relation &
378 operator=(const equivalence_relation &);
379
380 /**
381 * Assign the representative of \p from to be equivalent to \p to.
382 *
383 * At the same time the data structure is partially flattened as much as
384 * it's possible without increasing the number of recursive calls.
385 */
386 void
assign__anon8930::equivalence_relation387 assign(unsigned from, unsigned to)
388 {
389 if (from != to) {
390 assert(from < n);
391
392 if (is[from] != from)
393 assign(is[from], to);
394
395 is[from] = to;
396 }
397 }
398
399 unsigned *is;
400 unsigned n;
401 };
402
403 /**
404 * Representation of a data dependency between two instructions in the
405 * program.
406 * @{
407 */
408 struct dependency {
409 /**
410 * No dependency information.
411 */
412 dependency() : ordered(TGL_REGDIST_NULL), jp(),
413 unordered(TGL_SBID_NULL), id(0),
414 exec_all(false) {}
415
416 /**
417 * Construct a dependency on the in-order instruction with the provided
418 * ordered_address instruction counter.
419 */
420 dependency(tgl_regdist_mode mode, const ordered_address &jp,
421 bool exec_all) :
422 ordered(mode), jp(jp), unordered(TGL_SBID_NULL), id(0),
423 exec_all(exec_all) {}
424
425 /**
426 * Construct a dependency on the out-of-order instruction with the
427 * specified synchronization token.
428 */
429 dependency(tgl_sbid_mode mode, unsigned id, bool exec_all) :
430 ordered(TGL_REGDIST_NULL), jp(), unordered(mode), id(id),
431 exec_all(exec_all) {}
432
433 /**
434 * Synchronization mode of in-order dependency, or zero if no in-order
435 * dependency is present.
436 */
437 tgl_regdist_mode ordered;
438
439 /**
440 * Instruction counter of in-order dependency.
441 *
442 * For a dependency part of a different block in the program, this is
443 * relative to the specific control flow path taken between the
444 * dependency and the current block: It is the ordered_address such that
445 * the difference between it and the ordered_address of the first
446 * instruction of the current block is exactly the number of in-order
447 * instructions across that control flow path. It is not guaranteed to
448 * be equal to the local ordered_address of the generating instruction
449 * [as returned by ordered_inst_addresses()], except for block-local
450 * dependencies.
451 */
452 ordered_address jp;
453
454 /**
455 * Synchronization mode of unordered dependency, or zero if no unordered
456 * dependency is present.
457 */
458 tgl_sbid_mode unordered;
459
460 /** Synchronization token of out-of-order dependency. */
461 unsigned id;
462
463 /**
464 * Whether the dependency could be run with execution masking disabled,
465 * which might lead to the unwanted execution of the generating
466 * instruction in cases where a BB is executed with all channels
467 * disabled due to hardware bug Wa_1407528679.
468 */
469 bool exec_all;
470
471 /**
472 * Trivial in-order dependency that's always satisfied.
473 *
474 * Note that unlike a default-constructed dependency() which is also
475 * trivially satisfied, this is considered to provide dependency
476 * information and can be used to clear a previously pending dependency
477 * via shadow().
478 */
479 static const dependency done;
480
481 friend bool
482 operator==(const dependency &dep0, const dependency &dep1)
483 {
484 return dep0.ordered == dep1.ordered &&
485 dep0.jp == dep1.jp &&
486 dep0.unordered == dep1.unordered &&
487 dep0.id == dep1.id &&
488 dep0.exec_all == dep1.exec_all;
489 }
490
491 friend bool
492 operator!=(const dependency &dep0, const dependency &dep1)
493 {
494 return !(dep0 == dep1);
495 }
496 };
497
498 const dependency dependency::done =
499 dependency(TGL_REGDIST_DST, ordered_address(), false);
500
501 /**
502 * Return whether \p dep contains any dependency information.
503 */
504 bool
505 is_valid(const dependency &dep)
506 {
507 return dep.ordered || dep.unordered;
508 }
509
510 /**
511 * Combine \p dep0 and \p dep1 into a single dependency object that is only
512 * satisfied when both original dependencies are satisfied. This might
513 * involve updating the equivalence relation \p eq in order to make sure
514 * that both out-of-order dependencies are assigned the same hardware SBID
515 * as synchronization token.
516 */
517 dependency
518 merge(equivalence_relation &eq,
519 const dependency &dep0, const dependency &dep1)
520 {
521 dependency dep;
522
523 if (dep0.ordered || dep1.ordered) {
524 dep.ordered = dep0.ordered | dep1.ordered;
525 for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
526 dep.jp.jp[p] = MAX2(dep0.jp.jp[p], dep1.jp.jp[p]);
527 }
528
529 if (dep0.unordered || dep1.unordered) {
530 dep.unordered = dep0.unordered | dep1.unordered;
531 dep.id = eq.link(dep0.unordered ? dep0.id : dep1.id,
532 dep1.unordered ? dep1.id : dep0.id);
533 }
534
535 dep.exec_all = dep0.exec_all || dep1.exec_all;
536
537 return dep;
538 }
539
540 /**
541 * Override dependency information of \p dep0 with that of \p dep1.
542 */
543 dependency
544 shadow(const dependency &dep0, const dependency &dep1)
545 {
546 if (dep0.ordered == TGL_REGDIST_SRC &&
547 is_valid(dep1) && !(dep1.unordered & TGL_SBID_DST) &&
548 !(dep1.ordered & TGL_REGDIST_DST)) {
549 /* As an optimization (see dependency_for_read()),
550 * instructions with a RaR dependency don't synchronize
551 * against a previous in-order read, so we need to pass
552 * through both ordered dependencies instead of simply
553 * dropping the first one. Otherwise we could encounter a
554 * WaR data hazard between OP0 and OP2 in cases like:
555 *
556 * OP0 r1:f r0:d
557 * OP1 r2:d r0:d
558 * OP2 r0:d r3:d
559 *
560 * since only the integer-pipeline r0 dependency from OP1
561 * would be visible to OP2, even though OP0 could technically
562 * execute after OP1 due to the floating-point and integer
563 * pipelines being asynchronous on Gfx12.5+ platforms, so
564 * synchronizing OP2 against OP1 would be insufficient.
565 */
566 dependency dep = dep1;
567
568 dep.ordered |= dep0.ordered;
569 for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
570 dep.jp.jp[p] = MAX2(dep.jp.jp[p], dep0.jp.jp[p]);
571
572 return dep;
573 } else {
574 return is_valid(dep1) ? dep1 : dep0;
575 }
576 }
577
578 /**
579 * Translate dependency information across the program.
580 *
581 * This returns a dependency on the same instruction translated to the
582 * ordered_address space of a different block. The correct shift for
583 * transporting a dependency across an edge of the CFG is the difference
584 * between the local ordered_address of the first instruction of the target
585 * block and the local ordered_address of the instruction immediately after
586 * the end of the origin block.
587 */
588 dependency
589 transport(dependency dep, int delta[IDX(TGL_PIPE_ALL)])
590 {
591 if (dep.ordered) {
592 for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++) {
593 if (dep.jp.jp[p] > INT_MIN)
594 dep.jp.jp[p] += delta[p];
595 }
596 }
597
598 return dep;
599 }
600
601 /**
602 * Return simplified dependency removing any synchronization modes not
603 * applicable to an instruction reading the same register location.
604 */
605 dependency
606 dependency_for_read(dependency dep)
607 {
608 dep.ordered &= TGL_REGDIST_DST;
609 return dep;
610 }
611
612 /**
613 * Return simplified dependency removing any synchronization modes not
614 * applicable to an instruction \p inst writing the same register location.
615 *
616 * This clears any WaR dependency for writes performed from the same
617 * pipeline as the read, since there is no possibility for a data hazard.
618 */
619 dependency
620 dependency_for_write(const struct intel_device_info *devinfo,
621 const fs_inst *inst, dependency dep)
622 {
623 if (!is_unordered(inst) &&
624 is_single_pipe(dep.jp, inferred_exec_pipe(devinfo, inst)))
625 dep.ordered &= TGL_REGDIST_DST;
626 return dep;
627 }
628
629 /** @} */
630
631 /**
632 * Scoreboard representation. This keeps track of the data dependencies of
633 * registers with GRF granularity.
634 */
635 class scoreboard {
636 public:
637 /**
638 * Look up the most current data dependency for register \p r.
639 */
640 dependency
641 get(const fs_reg &r) const
642 {
643 if (const dependency *p = const_cast<scoreboard *>(this)->dep(r))
644 return *p;
645 else
646 return dependency();
647 }
648
649 /**
650 * Specify the most current data dependency for register \p r.
651 */
652 void
653 set(const fs_reg &r, const dependency &d)
654 {
655 if (dependency *p = dep(r))
656 *p = d;
657 }
658
659 /**
660 * Component-wise merge() of corresponding dependencies from two
661 * scoreboard objects. \sa merge().
662 */
663 friend scoreboard
664 merge(equivalence_relation &eq,
665 const scoreboard &sb0, const scoreboard &sb1)
666 {
667 scoreboard sb;
668
669 for (unsigned i = 0; i < ARRAY_SIZE(sb.grf_deps); i++)
670 sb.grf_deps[i] = merge(eq, sb0.grf_deps[i], sb1.grf_deps[i]);
671
672 sb.addr_dep = merge(eq, sb0.addr_dep, sb1.addr_dep);
673 sb.accum_dep = merge(eq, sb0.accum_dep, sb1.accum_dep);
674
675 return sb;
676 }
677
678 /**
679 * Component-wise shadow() of corresponding dependencies from two
680 * scoreboard objects. \sa shadow().
681 */
682 friend scoreboard
683 shadow(const scoreboard &sb0, const scoreboard &sb1)
684 {
685 scoreboard sb;
686
687 for (unsigned i = 0; i < ARRAY_SIZE(sb.grf_deps); i++)
688 sb.grf_deps[i] = shadow(sb0.grf_deps[i], sb1.grf_deps[i]);
689
690 sb.addr_dep = shadow(sb0.addr_dep, sb1.addr_dep);
691 sb.accum_dep = shadow(sb0.accum_dep, sb1.accum_dep);
692
693 return sb;
694 }
695
696 /**
697 * Component-wise transport() of dependencies from a scoreboard
698 * object. \sa transport().
699 */
700 friend scoreboard
701 transport(const scoreboard &sb0, int delta[IDX(TGL_PIPE_ALL)])
702 {
703 scoreboard sb;
704
705 for (unsigned i = 0; i < ARRAY_SIZE(sb.grf_deps); i++)
706 sb.grf_deps[i] = transport(sb0.grf_deps[i], delta);
707
708 sb.addr_dep = transport(sb0.addr_dep, delta);
709 sb.accum_dep = transport(sb0.accum_dep, delta);
710
711 return sb;
712 }
713
714 friend bool
715 operator==(const scoreboard &sb0, const scoreboard &sb1)
716 {
717 for (unsigned i = 0; i < ARRAY_SIZE(sb0.grf_deps); i++) {
718 if (sb0.grf_deps[i] != sb1.grf_deps[i])
719 return false;
720 }
721
722 if (sb0.addr_dep != sb1.addr_dep)
723 return false;
724
725 if (sb0.accum_dep != sb1.accum_dep)
726 return false;
727
728 return true;
729 }
730
731 friend bool
732 operator!=(const scoreboard &sb0, const scoreboard &sb1)
733 {
734 return !(sb0 == sb1);
735 }
736
737 private:
738 dependency grf_deps[BRW_MAX_GRF];
739 dependency addr_dep;
740 dependency accum_dep;
741
742 dependency *
743 dep(const fs_reg &r)
744 {
745 const unsigned reg = (r.file == VGRF ? r.nr + r.offset / REG_SIZE :
746 reg_offset(r) / REG_SIZE);
747
748 return (r.file == VGRF || r.file == FIXED_GRF ? &grf_deps[reg] :
749 r.file == MRF ? &grf_deps[GFX7_MRF_HACK_START + reg] :
750 r.file == ARF && reg >= BRW_ARF_ADDRESS &&
751 reg < BRW_ARF_ACCUMULATOR ? &addr_dep :
752 r.file == ARF && reg >= BRW_ARF_ACCUMULATOR &&
753 reg < BRW_ARF_FLAG ? &accum_dep :
754 NULL);
755 }
756 };
757
758 /**
759 * Dependency list handling.
760 * @{
761 */
762 struct dependency_list {
763 dependency_list() : deps(NULL), n(0) {}
764
765 ~dependency_list()
766 {
767 free(deps);
768 }
769
770 void
771 push_back(const dependency &dep)
772 {
773 deps = (dependency *)realloc(deps, (n + 1) * sizeof(*deps));
774 deps[n++] = dep;
775 }
776
777 unsigned
778 size() const
779 {
780 return n;
781 }
782
783 const dependency &
784 operator[](unsigned i) const
785 {
786 assert(i < n);
787 return deps[i];
788 }
789
790 dependency &
791 operator[](unsigned i)
792 {
793 assert(i < n);
794 return deps[i];
795 }
796
797 private:
798 dependency_list(const dependency_list &);
799 dependency_list &
800 operator=(const dependency_list &);
801
802 dependency *deps;
803 unsigned n;
804 };
805
806 /**
807 * Add dependency \p dep to the list of dependencies of an instruction
808 * \p deps.
809 */
810 void
811 add_dependency(const unsigned *ids, dependency_list &deps, dependency dep)
812 {
813 if (is_valid(dep)) {
814 /* Translate the unordered dependency token first in order to keep
815 * the list minimally redundant.
816 */
817 if (dep.unordered)
818 dep.id = ids[dep.id];
819
820 /* Try to combine the specified dependency with any existing ones. */
821 for (unsigned i = 0; i < deps.size(); i++) {
822 /* Don't combine otherwise matching dependencies if there is an
823 * exec_all mismatch which would cause a SET dependency to gain an
824 * exec_all flag, since that would prevent it from being baked
825 * into the instruction we want to allocate an SBID for.
826 */
827 if (deps[i].exec_all != dep.exec_all &&
828 (!deps[i].exec_all || (dep.unordered & TGL_SBID_SET)) &&
829 (!dep.exec_all || (deps[i].unordered & TGL_SBID_SET)))
830 continue;
831
832 if (dep.ordered && deps[i].ordered) {
833 for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
834 deps[i].jp.jp[p] = MAX2(deps[i].jp.jp[p], dep.jp.jp[p]);
835
836 deps[i].ordered |= dep.ordered;
837 deps[i].exec_all |= dep.exec_all;
838 dep.ordered = TGL_REGDIST_NULL;
839 }
840
841 if (dep.unordered && deps[i].unordered && deps[i].id == dep.id) {
842 deps[i].unordered |= dep.unordered;
843 deps[i].exec_all |= dep.exec_all;
844 dep.unordered = TGL_SBID_NULL;
845 }
846 }
847
848 /* Add it to the end of the list if necessary. */
849 if (is_valid(dep))
850 deps.push_back(dep);
851 }
852 }
853
854 /**
855 * Construct a tgl_swsb annotation encoding any ordered dependencies from
856 * the dependency list \p deps of an instruction with ordered_address \p
857 * jp. If \p exec_all is false only dependencies known to be executed with
858 * channel masking applied will be considered in the calculation.
859 */
860 tgl_swsb
861 ordered_dependency_swsb(const dependency_list &deps,
862 const ordered_address &jp,
863 bool exec_all)
864 {
865 tgl_pipe p = TGL_PIPE_NONE;
866 unsigned min_dist = ~0u;
867
868 for (unsigned i = 0; i < deps.size(); i++) {
869 if (deps[i].ordered && exec_all >= deps[i].exec_all) {
870 for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++) {
871 const unsigned dist = jp.jp[q] - int64_t(deps[i].jp.jp[q]);
872 const unsigned max_dist = (q == IDX(TGL_PIPE_LONG) ? 14 : 10);
873 assert(jp.jp[q] > deps[i].jp.jp[q]);
874 if (dist <= max_dist) {
875 p = (p && IDX(p) != q ? TGL_PIPE_ALL :
876 tgl_pipe(TGL_PIPE_FLOAT + q));
877 min_dist = MIN3(min_dist, dist, 7);
878 }
879 }
880 }
881 }
882
883 return { p ? min_dist : 0, p };
884 }
885
886 /**
887 * Return whether the dependency list \p deps of an instruction with
888 * ordered_address \p jp has any non-trivial ordered dependencies. If \p
889 * exec_all is false only dependencies known to be executed with channel
890 * masking applied will be considered in the calculation.
891 */
892 bool
893 find_ordered_dependency(const dependency_list &deps,
894 const ordered_address &jp,
895 bool exec_all)
896 {
897 return ordered_dependency_swsb(deps, jp, exec_all).regdist;
898 }
899
900 /**
901 * Return the full tgl_sbid_mode bitset for the first unordered dependency
902 * on the list \p deps that matches the specified tgl_sbid_mode, or zero if
903 * no such dependency is present. If \p exec_all is false only
904 * dependencies known to be executed with channel masking applied will be
905 * considered in the calculation.
906 */
907 tgl_sbid_mode
908 find_unordered_dependency(const dependency_list &deps,
909 tgl_sbid_mode unordered,
910 bool exec_all)
911 {
912 if (unordered) {
913 for (unsigned i = 0; i < deps.size(); i++) {
914 if ((unordered & deps[i].unordered) &&
915 exec_all >= deps[i].exec_all)
916 return deps[i].unordered;
917 }
918 }
919
920 return TGL_SBID_NULL;
921 }
922
923 /**
924 * Return the tgl_sbid_mode bitset of an unordered dependency from the list
925 * \p deps that can be represented directly in the SWSB annotation of the
926 * instruction without additional SYNC instructions, or zero if no such
927 * dependency is present.
928 */
929 tgl_sbid_mode
930 baked_unordered_dependency_mode(const struct intel_device_info *devinfo,
931 const fs_inst *inst,
932 const dependency_list &deps,
933 const ordered_address &jp)
934 {
935 const bool exec_all = inst->force_writemask_all;
936 const bool has_ordered = find_ordered_dependency(deps, jp, exec_all);
937 const tgl_pipe ordered_pipe = ordered_dependency_swsb(deps, jp,
938 exec_all).pipe;
939
940 if (find_unordered_dependency(deps, TGL_SBID_SET, exec_all))
941 return find_unordered_dependency(deps, TGL_SBID_SET, exec_all);
942 else if (has_ordered && is_unordered(inst))
943 return TGL_SBID_NULL;
944 else if (find_unordered_dependency(deps, TGL_SBID_DST, exec_all) &&
945 (!has_ordered || ordered_pipe == inferred_sync_pipe(devinfo, inst)))
946 return find_unordered_dependency(deps, TGL_SBID_DST, exec_all);
947 else if (!has_ordered)
948 return find_unordered_dependency(deps, TGL_SBID_SRC, exec_all);
949 else
950 return TGL_SBID_NULL;
951 }
952
953 /**
954 * Return whether an ordered dependency from the list \p deps can be
955 * represented directly in the SWSB annotation of the instruction without
956 * additional SYNC instructions.
957 */
958 bool
959 baked_ordered_dependency_mode(const struct intel_device_info *devinfo,
960 const fs_inst *inst,
961 const dependency_list &deps,
962 const ordered_address &jp)
963 {
964 const bool exec_all = inst->force_writemask_all;
965 const bool has_ordered = find_ordered_dependency(deps, jp, exec_all);
966 const tgl_pipe ordered_pipe = ordered_dependency_swsb(deps, jp,
967 exec_all).pipe;
968 const tgl_sbid_mode unordered_mode =
969 baked_unordered_dependency_mode(devinfo, inst, deps, jp);
970
971 if (!has_ordered)
972 return false;
973 else if (!unordered_mode)
974 return true;
975 else
976 return ordered_pipe == inferred_sync_pipe(devinfo, inst) &&
977 unordered_mode == (is_unordered(inst) ? TGL_SBID_SET :
978 TGL_SBID_DST);
979 }
980
981 /** @} */
982
983 /**
984 * Shader instruction dependency calculation.
985 * @{
986 */
987
988 /**
989 * Update scoreboard object \p sb to account for the execution of
990 * instruction \p inst.
991 */
992 void
993 update_inst_scoreboard(const fs_visitor *shader, const ordered_address *jps,
994 const fs_inst *inst, unsigned ip, scoreboard &sb)
995 {
996 const bool exec_all = inst->force_writemask_all;
997 const struct intel_device_info *devinfo = shader->devinfo;
998 const tgl_pipe p = inferred_exec_pipe(devinfo, inst);
999 const ordered_address jp = p ? ordered_address(p, jps[ip].jp[IDX(p)]) :
1000 ordered_address();
1001 const bool is_ordered = ordered_unit(devinfo, inst, IDX(TGL_PIPE_ALL));
1002
1003 /* Track any source registers that may be fetched asynchronously by this
1004 * instruction, otherwise clear the dependency in order to avoid
1005 * subsequent redundant synchronization.
1006 */
1007 for (unsigned i = 0; i < inst->sources; i++) {
1008 const dependency rd_dep =
1009 (inst->is_payload(i) ||
1010 inst->is_math()) ? dependency(TGL_SBID_SRC, ip, exec_all) :
1011 is_ordered ? dependency(TGL_REGDIST_SRC, jp, exec_all) :
1012 dependency::done;
1013
1014 for (unsigned j = 0; j < regs_read(inst, i); j++) {
1015 const fs_reg r = byte_offset(inst->src[i], REG_SIZE * j);
1016 sb.set(r, shadow(sb.get(r), rd_dep));
1017 }
1018 }
1019
1020 if (inst->reads_accumulator_implicitly())
1021 sb.set(brw_acc_reg(8), dependency(TGL_REGDIST_SRC, jp, exec_all));
1022
1023 if (is_send(inst) && inst->base_mrf != -1) {
1024 const dependency rd_dep = dependency(TGL_SBID_SRC, ip, exec_all);
1025
1026 for (unsigned j = 0; j < inst->mlen; j++)
1027 sb.set(brw_uvec_mrf(8, inst->base_mrf + j, 0), rd_dep);
1028 }
1029
1030 /* Track any destination registers of this instruction. */
1031 const dependency wr_dep =
1032 is_unordered(inst) ? dependency(TGL_SBID_DST, ip, exec_all) :
1033 is_ordered ? dependency(TGL_REGDIST_DST, jp, exec_all) :
1034 dependency();
1035
1036 if (inst->writes_accumulator_implicitly(devinfo))
1037 sb.set(brw_acc_reg(8), wr_dep);
1038
1039 if (is_valid(wr_dep) && inst->dst.file != BAD_FILE &&
1040 !inst->dst.is_null()) {
1041 for (unsigned j = 0; j < regs_written(inst); j++)
1042 sb.set(byte_offset(inst->dst, REG_SIZE * j), wr_dep);
1043 }
1044 }
1045
1046 /**
1047 * Calculate scoreboard objects locally that represent any pending (and
1048 * unconditionally resolved) dependencies at the end of each block of the
1049 * program.
1050 */
1051 scoreboard *
gather_block_scoreboards(const fs_visitor *shader, const ordered_address *jps)1052 gather_block_scoreboards(const fs_visitor *shader,
1053 const ordered_address *jps)
1054 {
1055 scoreboard *sbs = new scoreboard[shader->cfg->num_blocks];
1056 unsigned ip = 0;
1057
1058 foreach_block_and_inst(block, fs_inst, inst, shader->cfg)
1059 update_inst_scoreboard(shader, jps, inst, ip++, sbs[block->num]);
1060
1061 return sbs;
1062 }
1063
1064 /**
1065 * Propagate data dependencies globally through the control flow graph
1066 * until a fixed point is reached.
1067 *
1068 * Calculates the set of dependencies potentially pending at the beginning
1069 * of each block, and returns it as an array of scoreboard objects.
1070 */
1071 scoreboard *
propagate_block_scoreboards(const fs_visitor *shader, const ordered_address *jps, equivalence_relation &eq)1072 propagate_block_scoreboards(const fs_visitor *shader,
1073 const ordered_address *jps,
1074 equivalence_relation &eq)
1075 {
1076 const scoreboard *delta_sbs = gather_block_scoreboards(shader, jps);
1077 scoreboard *in_sbs = new scoreboard[shader->cfg->num_blocks];
1078 scoreboard *out_sbs = new scoreboard[shader->cfg->num_blocks];
1079
1080 for (bool progress = true; progress;) {
1081 progress = false;
1082
1083 foreach_block(block, shader->cfg) {
1084 const scoreboard sb = shadow(in_sbs[block->num],
1085 delta_sbs[block->num]);
1086
1087 if (sb != out_sbs[block->num]) {
1088 foreach_list_typed(bblock_link, child_link, link,
1089 &block->children) {
1090 scoreboard &in_sb = in_sbs[child_link->block->num];
1091 int delta[IDX(TGL_PIPE_ALL)];
1092
1093 for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
1094 delta[p] = jps[child_link->block->start_ip].jp[p]
1095 - jps[block->end_ip].jp[p]
1096 - ordered_unit(shader->devinfo,
1097 static_cast<const fs_inst *>(block->end()), p);
1098
1099 in_sb = merge(eq, in_sb, transport(sb, delta));
1100 }
1101
1102 out_sbs[block->num] = sb;
1103 progress = true;
1104 }
1105 }
1106 }
1107
1108 delete[] delta_sbs;
1109 delete[] out_sbs;
1110
1111 return in_sbs;
1112 }
1113
1114 /**
1115 * Return the list of potential dependencies of each instruction in the
1116 * shader based on the result of global dependency analysis.
1117 */
1118 dependency_list *
gather_inst_dependencies(const fs_visitor *shader, const ordered_address *jps)1119 gather_inst_dependencies(const fs_visitor *shader,
1120 const ordered_address *jps)
1121 {
1122 const struct intel_device_info *devinfo = shader->devinfo;
1123 equivalence_relation eq(num_instructions(shader));
1124 scoreboard *sbs = propagate_block_scoreboards(shader, jps, eq);
1125 const unsigned *ids = eq.flatten();
1126 dependency_list *deps = new dependency_list[num_instructions(shader)];
1127 unsigned ip = 0;
1128
1129 foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
1130 const bool exec_all = inst->force_writemask_all;
1131 const tgl_pipe p = inferred_exec_pipe(devinfo, inst);
1132 scoreboard &sb = sbs[block->num];
1133
1134 for (unsigned i = 0; i < inst->sources; i++) {
1135 for (unsigned j = 0; j < regs_read(inst, i); j++)
1136 add_dependency(ids, deps[ip], dependency_for_read(
1137 sb.get(byte_offset(inst->src[i], REG_SIZE * j))));
1138 }
1139
1140 if (inst->reads_accumulator_implicitly()) {
1141 /* Wa_22012725308:
1142 *
1143 * "When the accumulator registers are used as source and/or
1144 * destination, hardware does not ensure prevention of write
1145 * after read hazard across execution pipes."
1146 */
1147 const dependency dep = sb.get(brw_acc_reg(8));
1148 if (dep.ordered && !is_single_pipe(dep.jp, p))
1149 add_dependency(ids, deps[ip], dep);
1150 }
1151
1152 if (is_send(inst) && inst->base_mrf != -1) {
1153 for (unsigned j = 0; j < inst->mlen; j++)
1154 add_dependency(ids, deps[ip], dependency_for_read(
1155 sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
1156 }
1157
1158 if (is_unordered(inst) && !inst->eot)
1159 add_dependency(ids, deps[ip],
1160 dependency(TGL_SBID_SET, ip, exec_all));
1161
1162 if (!inst->no_dd_check) {
1163 if (inst->dst.file != BAD_FILE && !inst->dst.is_null() &&
1164 !inst->dst.is_accumulator()) {
1165 for (unsigned j = 0; j < regs_written(inst); j++) {
1166 add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
1167 sb.get(byte_offset(inst->dst, REG_SIZE * j))));
1168 }
1169 }
1170
1171 if (inst->writes_accumulator_implicitly(devinfo) ||
1172 inst->dst.is_accumulator()) {
1173 /* Wa_22012725308:
1174 *
1175 * "When the accumulator registers are used as source and/or
1176 * destination, hardware does not ensure prevention of write
1177 * after read hazard across execution pipes."
1178 */
1179 const dependency dep = sb.get(brw_acc_reg(8));
1180 if (dep.ordered && !is_single_pipe(dep.jp, p))
1181 add_dependency(ids, deps[ip], dep);
1182 }
1183
1184 if (is_send(inst) && inst->base_mrf != -1) {
1185 for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
1186 add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
1187 sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
1188 }
1189 }
1190
1191 update_inst_scoreboard(shader, jps, inst, ip, sb);
1192 ip++;
1193 }
1194
1195 delete[] sbs;
1196 delete[] ids;
1197
1198 return deps;
1199 }
1200
1201 /** @} */
1202
1203 /**
1204 * Allocate SBID tokens to track the execution of every out-of-order
1205 * instruction of the shader.
1206 */
1207 dependency_list *
allocate_inst_dependencies(const fs_visitor *shader, const dependency_list *deps0)1208 allocate_inst_dependencies(const fs_visitor *shader,
1209 const dependency_list *deps0)
1210 {
1211 /* XXX - Use bin-packing algorithm to assign hardware SBIDs optimally in
1212 * shaders with a large number of SEND messages.
1213 */
1214
1215 /* Allocate an unordered dependency ID to hardware SBID translation
1216 * table with as many entries as instructions there are in the shader,
1217 * which is the maximum number of unordered IDs we can find in the
1218 * program.
1219 */
1220 unsigned *ids = new unsigned[num_instructions(shader)];
1221 for (unsigned ip = 0; ip < num_instructions(shader); ip++)
1222 ids[ip] = ~0u;
1223
1224 dependency_list *deps1 = new dependency_list[num_instructions(shader)];
1225 unsigned next_id = 0;
1226
1227 for (unsigned ip = 0; ip < num_instructions(shader); ip++) {
1228 for (unsigned i = 0; i < deps0[ip].size(); i++) {
1229 const dependency &dep = deps0[ip][i];
1230
1231 if (dep.unordered && ids[dep.id] == ~0u)
1232 ids[dep.id] = (next_id++) & 0xf;
1233
1234 add_dependency(ids, deps1[ip], dep);
1235 }
1236 }
1237
1238 delete[] ids;
1239
1240 return deps1;
1241 }
1242
1243 /**
1244 * Emit dependency information provided by \p deps into the shader,
1245 * inserting additional SYNC instructions for dependencies that can't be
1246 * represented directly by annotating existing instructions.
1247 */
1248 void
emit_inst_dependencies(fs_visitor *shader, const ordered_address *jps, const dependency_list *deps)1249 emit_inst_dependencies(fs_visitor *shader,
1250 const ordered_address *jps,
1251 const dependency_list *deps)
1252 {
1253 const struct intel_device_info *devinfo = shader->devinfo;
1254 unsigned ip = 0;
1255
1256 foreach_block_and_inst_safe(block, fs_inst, inst, shader->cfg) {
1257 const bool exec_all = inst->force_writemask_all;
1258 const bool ordered_mode =
1259 baked_ordered_dependency_mode(devinfo, inst, deps[ip], jps[ip]);
1260 const tgl_sbid_mode unordered_mode =
1261 baked_unordered_dependency_mode(devinfo, inst, deps[ip], jps[ip]);
1262 tgl_swsb swsb = !ordered_mode ? tgl_swsb() :
1263 ordered_dependency_swsb(deps[ip], jps[ip], exec_all);
1264
1265 for (unsigned i = 0; i < deps[ip].size(); i++) {
1266 const dependency &dep = deps[ip][i];
1267
1268 if (dep.unordered) {
1269 if (unordered_mode == dep.unordered &&
1270 exec_all >= dep.exec_all && !swsb.mode) {
1271 /* Bake unordered dependency into the instruction's SWSB if
1272 * possible, except in cases where the current instruction
1273 * isn't marked NoMask but the dependency is, since that
1274 * might lead to data coherency issues due to
1275 * Wa_1407528679.
1276 */
1277 swsb.sbid = dep.id;
1278 swsb.mode = dep.unordered;
1279 } else {
1280 /* Emit dependency into the SWSB of an extra SYNC
1281 * instruction.
1282 */
1283 const fs_builder ibld = fs_builder(shader, block, inst)
1284 .exec_all().group(1, 0);
1285 fs_inst *sync = ibld.emit(BRW_OPCODE_SYNC, ibld.null_reg_ud(),
1286 brw_imm_ud(TGL_SYNC_NOP));
1287 sync->sched.sbid = dep.id;
1288 sync->sched.mode = dep.unordered;
1289 assert(!(sync->sched.mode & TGL_SBID_SET));
1290 }
1291 }
1292 }
1293
1294 for (unsigned i = 0; i < deps[ip].size(); i++) {
1295 const dependency &dep = deps[ip][i];
1296
1297 if (dep.ordered &&
1298 find_ordered_dependency(deps[ip], jps[ip], true) &&
1299 (!ordered_mode || dep.exec_all > exec_all)) {
1300 /* If the current instruction is not marked NoMask but an
1301 * ordered dependency is, perform the synchronization as a
1302 * separate NoMask SYNC instruction in order to avoid data
1303 * coherency issues due to Wa_1407528679. The similar
1304 * scenario with unordered dependencies should have been
1305 * handled above.
1306 */
1307 const fs_builder ibld = fs_builder(shader, block, inst)
1308 .exec_all().group(1, 0);
1309 fs_inst *sync = ibld.emit(BRW_OPCODE_SYNC, ibld.null_reg_ud(),
1310 brw_imm_ud(TGL_SYNC_NOP));
1311 sync->sched = ordered_dependency_swsb(deps[ip], jps[ip], true);
1312 break;
1313 }
1314 }
1315
1316 /* Update the IR. */
1317 inst->sched = swsb;
1318 inst->no_dd_check = inst->no_dd_clear = false;
1319 ip++;
1320 }
1321 }
1322 }
1323
1324 bool
lower_scoreboard()1325 fs_visitor::lower_scoreboard()
1326 {
1327 if (devinfo->ver >= 12) {
1328 const ordered_address *jps = ordered_inst_addresses(this);
1329 const dependency_list *deps0 = gather_inst_dependencies(this, jps);
1330 const dependency_list *deps1 = allocate_inst_dependencies(this, deps0);
1331 emit_inst_dependencies(this, jps, deps1);
1332 delete[] deps1;
1333 delete[] deps0;
1334 delete[] jps;
1335 }
1336
1337 return true;
1338 }
1339