Lines Matching refs:cpu
31 v3d_load_utile(void *cpu, uint32_t cpu_stride,
41 /* Store each 8-byte line to cpu-side destination,
44 "vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
45 "vst1.8 d1, [%[cpu]], %[cpu_stride]\n"
46 "vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
47 "vst1.8 d3, [%[cpu]], %[cpu_stride]\n"
48 "vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
49 "vst1.8 d5, [%[cpu]], %[cpu_stride]\n"
50 "vst1.8 d6, [%[cpu]], %[cpu_stride]\n"
51 "vst1.8 d7, [%[cpu]]\n"
52 : [cpu] "+r"(cpu)
58 void *cpu2 = cpu + 8;
64 /* Store each 16-byte line in 2 parts to the cpu-side
68 "vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
70 "vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
72 "vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
74 "vst1.8 d6, [%[cpu]]\n"
76 : [cpu] "+r"(cpu),
90 /* Store each 8-byte line to cpu-side destination,
93 "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
94 "st1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
95 "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
96 "st1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
97 "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
98 "st1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
99 "st1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
100 "st1 {v3.D}[1], [%[cpu]]\n"
101 : [cpu] "+r"(cpu)
107 void *cpu2 = cpu + 8;
113 /* Store each 16-byte line in 2 parts to the cpu-side
117 "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
119 "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
121 "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
123 "st1 {v3.D}[0], [%[cpu]]\n"
125 : [cpu] "+r"(cpu),
135 memcpy(cpu, gpu + gpu_offset, gpu_stride);
136 cpu += cpu_stride;
142 void *cpu, uint32_t cpu_stride)
147 /* Load each 8-byte line from cpu-side source,
150 "vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
151 "vld1.8 d1, [%[cpu]], %[cpu_stride]\n"
152 "vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
153 "vld1.8 d3, [%[cpu]], %[cpu_stride]\n"
154 "vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
155 "vld1.8 d5, [%[cpu]], %[cpu_stride]\n"
156 "vld1.8 d6, [%[cpu]], %[cpu_stride]\n"
157 "vld1.8 d7, [%[cpu]]\n"
162 : [cpu] "+r"(cpu)
168 void *cpu2 = cpu + 8;
170 /* Load each 16-byte line in 2 parts from the cpu-side
174 "vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
176 "vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
178 "vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
180 "vld1.8 d6, [%[cpu]]\n"
184 : [cpu] "+r"(cpu),
194 /* Load each 8-byte line from cpu-side source,
197 "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
198 "ld1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
199 "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
200 "ld1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
201 "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
202 "ld1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
203 "ld1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
204 "ld1 {v3.D}[1], [%[cpu]]\n"
207 : [cpu] "+r"(cpu)
213 void *cpu2 = cpu + 8;
215 /* Load each 16-byte line in 2 parts from the cpu-side
219 "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
221 "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
223 "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
225 "ld1 {v3.D}[0], [%[cpu]]\n"
229 : [cpu] "+r"(cpu),
239 memcpy(gpu + gpu_offset, cpu, gpu_stride);
240 cpu += cpu_stride;