1/*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22
23#include "vertexdeclaration9.h"
24#include "vertexbuffer9.h"
25#include "device9.h"
26#include "nine_helpers.h"
27#include "nine_shader.h"
28
29#include "pipe/p_format.h"
30#include "pipe/p_context.h"
31#include "util/u_math.h"
32#include "util/format/u_format.h"
33#include "translate/translate.h"
34
35#define DBG_CHANNEL DBG_VERTEXDECLARATION
36
37static inline enum pipe_format decltype_format(BYTE type)
38{
39    switch (type) {
40    case D3DDECLTYPE_FLOAT1:    return PIPE_FORMAT_R32_FLOAT;
41    case D3DDECLTYPE_FLOAT2:    return PIPE_FORMAT_R32G32_FLOAT;
42    case D3DDECLTYPE_FLOAT3:    return PIPE_FORMAT_R32G32B32_FLOAT;
43    case D3DDECLTYPE_FLOAT4:    return PIPE_FORMAT_R32G32B32A32_FLOAT;
44    case D3DDECLTYPE_D3DCOLOR:  return PIPE_FORMAT_B8G8R8A8_UNORM;
45    case D3DDECLTYPE_UBYTE4:    return PIPE_FORMAT_R8G8B8A8_USCALED;
46    case D3DDECLTYPE_SHORT2:    return PIPE_FORMAT_R16G16_SSCALED;
47    case D3DDECLTYPE_SHORT4:    return PIPE_FORMAT_R16G16B16A16_SSCALED;
48    case D3DDECLTYPE_UBYTE4N:   return PIPE_FORMAT_R8G8B8A8_UNORM;
49    case D3DDECLTYPE_SHORT2N:   return PIPE_FORMAT_R16G16_SNORM;
50    case D3DDECLTYPE_SHORT4N:   return PIPE_FORMAT_R16G16B16A16_SNORM;
51    case D3DDECLTYPE_USHORT2N:  return PIPE_FORMAT_R16G16_UNORM;
52    case D3DDECLTYPE_USHORT4N:  return PIPE_FORMAT_R16G16B16A16_UNORM;
53    case D3DDECLTYPE_UDEC3:     return PIPE_FORMAT_R10G10B10X2_USCALED;
54    case D3DDECLTYPE_DEC3N:     return PIPE_FORMAT_R10G10B10X2_SNORM;
55    case D3DDECLTYPE_FLOAT16_2: return PIPE_FORMAT_R16G16_FLOAT;
56    case D3DDECLTYPE_FLOAT16_4: return PIPE_FORMAT_R16G16B16A16_FLOAT;
57    default:
58        assert(!"Implementation error !");
59    }
60    return PIPE_FORMAT_NONE;
61}
62
63static inline unsigned decltype_size(BYTE type)
64{
65    switch (type) {
66    case D3DDECLTYPE_FLOAT1: return 1 * sizeof(float);
67    case D3DDECLTYPE_FLOAT2: return 2 * sizeof(float);
68    case D3DDECLTYPE_FLOAT3: return 3 * sizeof(float);
69    case D3DDECLTYPE_FLOAT4: return 4 * sizeof(float);
70    case D3DDECLTYPE_D3DCOLOR: return 1 * sizeof(DWORD);
71    case D3DDECLTYPE_UBYTE4: return 4 * sizeof(BYTE);
72    case D3DDECLTYPE_SHORT2: return 2 * sizeof(short);
73    case D3DDECLTYPE_SHORT4: return 4 * sizeof(short);
74    case D3DDECLTYPE_UBYTE4N: return 4 * sizeof(BYTE);
75    case D3DDECLTYPE_SHORT2N: return 2 * sizeof(short);
76    case D3DDECLTYPE_SHORT4N: return 4 * sizeof(short);
77    case D3DDECLTYPE_USHORT2N: return 2 * sizeof(short);
78    case D3DDECLTYPE_USHORT4N: return 4 * sizeof(short);
79    case D3DDECLTYPE_UDEC3: return 4;
80    case D3DDECLTYPE_DEC3N: return 4;
81    case D3DDECLTYPE_FLOAT16_2: return 2 * 2;
82    case D3DDECLTYPE_FLOAT16_4: return 4 * 2;
83    default:
84        assert(!"Implementation error !");
85    }
86    return 0;
87}
88
89/* Actually, arbitrary usage index values are permitted, but a
90 * simple lookup table won't work in that case. Let's just wait
91 * with making this more generic until we need it.
92 */
93static inline boolean
94nine_d3ddeclusage_check(unsigned usage, unsigned usage_idx)
95{
96    switch (usage) {
97    case D3DDECLUSAGE_POSITIONT:
98    case D3DDECLUSAGE_TESSFACTOR:
99    case D3DDECLUSAGE_DEPTH:
100    case D3DDECLUSAGE_NORMAL:
101    case D3DDECLUSAGE_TANGENT:
102    case D3DDECLUSAGE_BINORMAL:
103    case D3DDECLUSAGE_POSITION:
104    case D3DDECLUSAGE_BLENDWEIGHT:
105    case D3DDECLUSAGE_BLENDINDICES:
106    case D3DDECLUSAGE_COLOR:
107        return TRUE;
108    case D3DDECLUSAGE_PSIZE:
109    case D3DDECLUSAGE_FOG:
110    case D3DDECLUSAGE_SAMPLE:
111        return usage_idx <= 0;
112    case D3DDECLUSAGE_TEXCOORD:
113        return usage_idx <= 15;
114    default:
115        return FALSE;
116    }
117}
118
119#define NINE_DECLUSAGE_CASE0(n) case D3DDECLUSAGE_##n: return NINE_DECLUSAGE_##n
120#define NINE_DECLUSAGE_CASEi(n) case D3DDECLUSAGE_##n: return NINE_DECLUSAGE_i(n, usage_idx)
121uint16_t
122nine_d3d9_to_nine_declusage(unsigned usage, unsigned usage_idx)
123{
124    if (!nine_d3ddeclusage_check(usage, usage_idx))
125        ERR("D3DDECLUSAGE_%u[%u]\n",usage,usage_idx);
126    assert(nine_d3ddeclusage_check(usage, usage_idx));
127    switch (usage) {
128    NINE_DECLUSAGE_CASEi(POSITION);
129    NINE_DECLUSAGE_CASEi(BLENDWEIGHT);
130    NINE_DECLUSAGE_CASEi(BLENDINDICES);
131    NINE_DECLUSAGE_CASEi(NORMAL);
132    NINE_DECLUSAGE_CASE0(PSIZE);
133    NINE_DECLUSAGE_CASEi(TEXCOORD);
134    NINE_DECLUSAGE_CASEi(TANGENT);
135    NINE_DECLUSAGE_CASEi(BINORMAL);
136    NINE_DECLUSAGE_CASE0(TESSFACTOR);
137    NINE_DECLUSAGE_CASEi(POSITIONT);
138    NINE_DECLUSAGE_CASEi(COLOR);
139    NINE_DECLUSAGE_CASE0(DEPTH);
140    NINE_DECLUSAGE_CASE0(FOG);
141    NINE_DECLUSAGE_CASE0(SAMPLE);
142    default:
143        assert(!"Invalid DECLUSAGE.");
144        return NINE_DECLUSAGE_NONE;
145    }
146}
147
148static const char *nine_declusage_names[] =
149{
150    [NINE_DECLUSAGE_POSITION]        = "POSITION",
151    [NINE_DECLUSAGE_BLENDWEIGHT]     = "BLENDWEIGHT",
152    [NINE_DECLUSAGE_BLENDINDICES]    = "BLENDINDICES",
153    [NINE_DECLUSAGE_NORMAL]          = "NORMAL",
154    [NINE_DECLUSAGE_PSIZE]           = "PSIZE",
155    [NINE_DECLUSAGE_TEXCOORD]        = "TEXCOORD",
156    [NINE_DECLUSAGE_TANGENT]         = "TANGENT",
157    [NINE_DECLUSAGE_BINORMAL]        = "BINORMAL",
158    [NINE_DECLUSAGE_TESSFACTOR]      = "TESSFACTOR",
159    [NINE_DECLUSAGE_POSITIONT]       = "POSITIONT",
160    [NINE_DECLUSAGE_COLOR]           = "DIFFUSE",
161    [NINE_DECLUSAGE_DEPTH]           = "DEPTH",
162    [NINE_DECLUSAGE_FOG]             = "FOG",
163    [NINE_DECLUSAGE_NONE]            = "(NONE)",
164};
165static inline const char *
166nine_declusage_name(unsigned ndcl)
167{
168    return nine_declusage_names[ndcl % NINE_DECLUSAGE_COUNT];
169}
170
171HRESULT
172NineVertexDeclaration9_ctor( struct NineVertexDeclaration9 *This,
173                             struct NineUnknownParams *pParams,
174                             const D3DVERTEXELEMENT9 *pElements )
175{
176    const D3DCAPS9 *caps;
177    unsigned i, nelems;
178    DBG("This=%p pParams=%p pElements=%p\n", This, pParams, pElements);
179
180    /* wine */
181    for (nelems = 0;
182         pElements[nelems].Stream != 0xFF;
183         ++nelems) {
184        user_assert(pElements[nelems].Type != D3DDECLTYPE_UNUSED, E_FAIL);
185        user_assert(!(pElements[nelems].Offset & 3), E_FAIL);
186    }
187
188    caps = NineDevice9_GetCaps(pParams->device);
189    user_assert(nelems <= caps->MaxStreams, D3DERR_INVALIDCALL);
190
191    HRESULT hr = NineUnknown_ctor(&This->base, pParams);
192    if (FAILED(hr)) { return hr; }
193
194    This->nelems = nelems;
195    This->decls = CALLOC(This->nelems+1, sizeof(D3DVERTEXELEMENT9));
196    This->elems = CALLOC(This->nelems, sizeof(struct pipe_vertex_element));
197    This->usage_map = CALLOC(This->nelems, sizeof(uint16_t));
198    if (!This->decls || !This->elems || !This->usage_map) { return E_OUTOFMEMORY; }
199    memcpy(This->decls, pElements, sizeof(D3DVERTEXELEMENT9)*(This->nelems+1));
200
201    for (i = 0; i < This->nelems; ++i) {
202        uint16_t usage = nine_d3d9_to_nine_declusage(This->decls[i].Usage,
203                                                     This->decls[i].UsageIndex);
204        This->usage_map[i] = usage;
205
206        if (This->decls[i].Usage == D3DDECLUSAGE_POSITIONT)
207            This->position_t = TRUE;
208
209        This->elems[i].src_offset = This->decls[i].Offset;
210        This->elems[i].instance_divisor = 0;
211        This->elems[i].vertex_buffer_index = This->decls[i].Stream;
212        This->elems[i].src_format = decltype_format(This->decls[i].Type);
213        This->elems[i].dual_slot = false;
214        /* XXX Remember Method (tesselation), Usage, UsageIndex */
215
216        DBG("VERTEXELEMENT[%u]: Stream=%u Offset=%u Type=%s DeclUsage=%s%d\n", i,
217            This->decls[i].Stream,
218            This->decls[i].Offset,
219            util_format_name(This->elems[i].src_format),
220            nine_declusage_name(usage),
221            usage / NINE_DECLUSAGE_COUNT);
222    }
223
224    return D3D_OK;
225}
226
227void
228NineVertexDeclaration9_dtor( struct NineVertexDeclaration9 *This )
229{
230    DBG("This=%p\n", This);
231
232    FREE(This->decls);
233    FREE(This->elems);
234    FREE(This->usage_map);
235
236    NineUnknown_dtor(&This->base);
237}
238
239HRESULT NINE_WINAPI
240NineVertexDeclaration9_GetDeclaration( struct NineVertexDeclaration9 *This,
241                                       D3DVERTEXELEMENT9 *pElement,
242                                       UINT *pNumElements )
243{
244    if (!pElement) {
245        user_assert(pNumElements, D3DERR_INVALIDCALL);
246        *pNumElements = This->nelems+1;
247        return D3D_OK;
248    }
249    if (pNumElements) { *pNumElements = This->nelems+1; }
250    memcpy(pElement, This->decls, sizeof(D3DVERTEXELEMENT9)*(This->nelems+1));
251    return D3D_OK;
252}
253
254IDirect3DVertexDeclaration9Vtbl NineVertexDeclaration9_vtable = {
255    (void *)NineUnknown_QueryInterface,
256    (void *)NineUnknown_AddRef,
257    (void *)NineUnknown_Release,
258    (void *)NineUnknown_GetDevice, /* actually part of VertexDecl9 iface */
259    (void *)NineVertexDeclaration9_GetDeclaration
260};
261
262static const GUID *NineVertexDeclaration9_IIDs[] = {
263    &IID_IDirect3DVertexDeclaration9,
264    &IID_IUnknown,
265    NULL
266};
267
268HRESULT
269NineVertexDeclaration9_new( struct NineDevice9 *pDevice,
270                            const D3DVERTEXELEMENT9 *pElements,
271                            struct NineVertexDeclaration9 **ppOut )
272{
273    NINE_DEVICE_CHILD_NEW(VertexDeclaration9, ppOut, /* args */ pDevice, pElements);
274}
275
276HRESULT
277NineVertexDeclaration9_new_from_fvf( struct NineDevice9 *pDevice,
278                                     DWORD FVF,
279                                     struct NineVertexDeclaration9 **ppOut )
280{
281    D3DVERTEXELEMENT9 elems[16], decl_end = D3DDECL_END();
282    unsigned texcount, i, betas, nelems = 0;
283    BYTE beta_index = 0xFF;
284
285    switch (FVF & D3DFVF_POSITION_MASK) {
286        case D3DFVF_XYZ: /* simple XYZ */
287        case D3DFVF_XYZB1:
288        case D3DFVF_XYZB2:
289        case D3DFVF_XYZB3:
290        case D3DFVF_XYZB4:
291        case D3DFVF_XYZB5: /* XYZ with beta values */
292            elems[nelems].Type = D3DDECLTYPE_FLOAT3;
293            elems[nelems].Usage = D3DDECLUSAGE_POSITION;
294            elems[nelems].UsageIndex = 0;
295            ++nelems;
296            /* simple XYZ has no beta values. break. */
297            if ((FVF & D3DFVF_POSITION_MASK) == D3DFVF_XYZ) { break; }
298
299            betas = (((FVF & D3DFVF_XYZB5)-D3DFVF_XYZB1)>>1)+1;
300            if (FVF & D3DFVF_LASTBETA_D3DCOLOR) {
301                beta_index = D3DDECLTYPE_D3DCOLOR;
302            } else if (FVF & D3DFVF_LASTBETA_UBYTE4) {
303                beta_index = D3DDECLTYPE_UBYTE4;
304            } else if ((FVF & D3DFVF_XYZB5) == D3DFVF_XYZB5) {
305                beta_index = D3DDECLTYPE_FLOAT1;
306            }
307            if (beta_index != 0xFF) { --betas; }
308
309            if (betas > 0) {
310                switch (betas) {
311                    case 1: elems[nelems].Type = D3DDECLTYPE_FLOAT1; break;
312                    case 2: elems[nelems].Type = D3DDECLTYPE_FLOAT2; break;
313                    case 3: elems[nelems].Type = D3DDECLTYPE_FLOAT3; break;
314                    case 4: elems[nelems].Type = D3DDECLTYPE_FLOAT4; break;
315                    default:
316                        assert(!"Implementation error!");
317                }
318                elems[nelems].Usage = D3DDECLUSAGE_BLENDWEIGHT;
319                elems[nelems].UsageIndex = 0;
320                ++nelems;
321            }
322
323            if (beta_index != 0xFF) {
324                elems[nelems].Type = beta_index;
325                elems[nelems].Usage = D3DDECLUSAGE_BLENDINDICES;
326                elems[nelems].UsageIndex = 0;
327                ++nelems;
328            }
329            break;
330
331        case D3DFVF_XYZW: /* simple XYZW */
332        case D3DFVF_XYZRHW: /* pretransformed XYZW */
333            elems[nelems].Type = D3DDECLTYPE_FLOAT4;
334            elems[nelems].Usage =
335                ((FVF & D3DFVF_POSITION_MASK) == D3DFVF_XYZW) ?
336                D3DDECLUSAGE_POSITION : D3DDECLUSAGE_POSITIONT;
337            elems[nelems].UsageIndex = 0;
338            ++nelems;
339            break;
340
341        default:
342            (void)user_error(!"Position doesn't match any known combination");
343    }
344
345    /* normals, psize and colors */
346    if (FVF & D3DFVF_NORMAL) {
347        elems[nelems].Type = D3DDECLTYPE_FLOAT3;
348        elems[nelems].Usage = D3DDECLUSAGE_NORMAL;
349        elems[nelems].UsageIndex = 0;
350        ++nelems;
351    }
352    if (FVF & D3DFVF_PSIZE) {
353        elems[nelems].Type = D3DDECLTYPE_FLOAT1;
354        elems[nelems].Usage = D3DDECLUSAGE_PSIZE;
355        elems[nelems].UsageIndex = 0;
356        ++nelems;
357    }
358    if (FVF & D3DFVF_DIFFUSE) {
359        elems[nelems].Type = D3DDECLTYPE_D3DCOLOR;
360        elems[nelems].Usage = D3DDECLUSAGE_COLOR;
361        elems[nelems].UsageIndex = 0;
362        ++nelems;
363    }
364    if (FVF & D3DFVF_SPECULAR) {
365        elems[nelems].Type = D3DDECLTYPE_D3DCOLOR;
366        elems[nelems].Usage = D3DDECLUSAGE_COLOR;
367        elems[nelems].UsageIndex = 1;
368        ++nelems;
369    }
370
371    /* textures */
372    texcount = (FVF & D3DFVF_TEXCOUNT_MASK) >> D3DFVF_TEXCOUNT_SHIFT;
373    if (user_error(texcount <= 8)) { texcount = 8; }
374
375    for (i = 0; i < texcount; ++i) {
376        switch ((FVF >> (16+i*2)) & 0x3) {
377            case D3DFVF_TEXTUREFORMAT1:
378                elems[nelems].Type = D3DDECLTYPE_FLOAT1;
379                break;
380
381            case D3DFVF_TEXTUREFORMAT2:
382                elems[nelems].Type = D3DDECLTYPE_FLOAT2;
383                break;
384
385            case D3DFVF_TEXTUREFORMAT3:
386                elems[nelems].Type = D3DDECLTYPE_FLOAT3;
387                break;
388
389            case D3DFVF_TEXTUREFORMAT4:
390                elems[nelems].Type = D3DDECLTYPE_FLOAT4;
391                break;
392
393            default:
394                assert(!"Implementation error!");
395        }
396        elems[nelems].Usage = D3DDECLUSAGE_TEXCOORD;
397        elems[nelems].UsageIndex = i;
398        ++nelems;
399    }
400
401    /* fill out remaining data */
402    for (i = 0; i < nelems; ++i) {
403        elems[i].Stream = 0;
404        elems[i].Offset = (i == 0) ? 0 : (elems[i-1].Offset +
405                                          decltype_size(elems[i-1].Type));
406        elems[i].Method = D3DDECLMETHOD_DEFAULT;
407    }
408    elems[nelems++] = decl_end;
409
410    NINE_DEVICE_CHILD_NEW(VertexDeclaration9, ppOut, /* args */ pDevice, elems);
411}
412
413void
414NineVertexDeclaration9_FillStreamOutputInfo(
415    struct NineVertexDeclaration9 *This,
416    struct nine_vs_output_info *ShaderOutputsInfo,
417    unsigned numOutputs,
418    struct pipe_stream_output_info *so )
419{
420    unsigned so_outputs = 0;
421    int i, j;
422
423    memset(so, 0, sizeof(struct pipe_stream_output_info));
424
425    for (i = 0; i < numOutputs; i++) {
426        BYTE output_semantic = ShaderOutputsInfo[i].output_semantic;
427        unsigned output_semantic_index = ShaderOutputsInfo[i].output_semantic_index;
428
429        for (j = 0; j < This->nelems; j++) {
430            if ((This->decls[j].Usage == output_semantic ||
431                 (output_semantic == D3DDECLUSAGE_POSITION &&
432                  This->decls[j].Usage == D3DDECLUSAGE_POSITIONT)) &&
433                This->decls[j].UsageIndex == output_semantic_index) {
434                DBG("Matching %s %d: o%d -> %d\n",
435                    nine_declusage_name(nine_d3d9_to_nine_declusage(This->decls[j].Usage, 0)),
436                    This->decls[j].UsageIndex, i, j);
437                so->output[so_outputs].register_index = ShaderOutputsInfo[i].output_index;
438                so->output[so_outputs].start_component = 0;
439                if (ShaderOutputsInfo[i].mask & 8)
440                    so->output[so_outputs].num_components = 4;
441                else if (ShaderOutputsInfo[i].mask & 4)
442                    so->output[so_outputs].num_components = 3;
443                else if (ShaderOutputsInfo[i].mask & 2)
444                    so->output[so_outputs].num_components = 2;
445                else
446                    so->output[so_outputs].num_components = 1;
447                so->output[so_outputs].output_buffer = 0;
448                so->output[so_outputs].dst_offset = so_outputs * sizeof(float[4])/4;
449                so->output[so_outputs].stream = 0;
450                so_outputs++;
451                break;
452            }
453        }
454    }
455
456    so->num_outputs = so_outputs;
457    so->stride[0] = so_outputs * sizeof(float[4])/4;
458}
459
460/* ProcessVertices runs stream output into a temporary buffer to capture
461 * all outputs.
462 * Now we have to convert them to the format and order set by the vertex
463 * declaration, for which we use u_translate.
464 * This is necessary if the vertex declaration contains elements using a
465 * non float32 format, because stream output only supports f32/u32/s32.
466 */
467HRESULT
468NineVertexDeclaration9_ConvertStreamOutput(
469    struct NineVertexDeclaration9 *This,
470    struct NineVertexBuffer9 *pDstBuf,
471    UINT DestIndex,
472    UINT VertexCount,
473    void *pSrcBuf,
474    const struct pipe_stream_output_info *so )
475{
476    struct translate *translate;
477    struct translate_key transkey;
478    HRESULT hr;
479    unsigned i;
480    void *dst_map;
481
482    DBG("This=%p pDstBuf=%p DestIndex=%u VertexCount=%u pSrcBuf=%p so=%p\n",
483        This, pDstBuf, DestIndex, VertexCount, pSrcBuf, so);
484
485    transkey.output_stride = 0;
486    for (i = 0; i < This->nelems; ++i) {
487        enum pipe_format format;
488
489        switch (so->output[i].num_components) {
490        case 1: format = PIPE_FORMAT_R32_FLOAT; break;
491        case 2: format = PIPE_FORMAT_R32G32_FLOAT; break;
492        case 3: format = PIPE_FORMAT_R32G32B32_FLOAT; break;
493        default:
494            assert(so->output[i].num_components == 4);
495            format = PIPE_FORMAT_R32G32B32A32_FLOAT;
496            break;
497        }
498        transkey.element[i].type = TRANSLATE_ELEMENT_NORMAL;
499        transkey.element[i].input_format = format;
500        transkey.element[i].input_buffer = 0;
501        transkey.element[i].input_offset = so->output[i].dst_offset * 4;
502        transkey.element[i].instance_divisor = 0;
503
504        transkey.element[i].output_format = This->elems[i].src_format;
505        transkey.element[i].output_offset = This->elems[i].src_offset;
506        transkey.output_stride +=
507            util_format_get_blocksize(This->elems[i].src_format);
508
509        assert(!(transkey.output_stride & 3));
510    }
511    transkey.nr_elements = This->nelems;
512
513    translate = translate_create(&transkey);
514    if (!translate)
515        return E_OUTOFMEMORY;
516
517    hr = NineVertexBuffer9_Lock(pDstBuf,
518                                transkey.output_stride * DestIndex,
519                                transkey.output_stride * VertexCount,
520                                &dst_map, D3DLOCK_DISCARD);
521    if (FAILED(hr))
522        goto out;
523
524    translate->set_buffer(translate, 0, pSrcBuf, so->stride[0] * 4, ~0);
525
526    translate->run(translate, 0, VertexCount, 0, 0, dst_map);
527
528    NineVertexBuffer9_Unlock(pDstBuf);
529out:
530    translate->release(translate); /* TODO: cache these */
531    return hr;
532}
533