1/*
2 * libtxc_dxtn
3 * Version:  1.0
4 *
5 * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef TEXCOMPRESS_S3TC_TMP_H
26#define TEXCOMPRESS_S3TC_TMP_H
27
28#ifdef __APPLE__
29#include <OpenGL/gl.h>
30#else
31#include <GL/gl.h>
32#endif
33
34typedef GLubyte GLchan;
35#define UBYTE_TO_CHAN(b)  (b)
36#define CHAN_MAX 255
37#define RCOMP 0
38#define GCOMP 1
39#define BCOMP 2
40#define ACOMP 3
41
42#define EXP5TO8R(packedcol)					\
43   ((((packedcol) >> 8) & 0xf8) | (((packedcol) >> 13) & 0x7))
44
45#define EXP6TO8G(packedcol)					\
46   ((((packedcol) >> 3) & 0xfc) | (((packedcol) >>  9) & 0x3))
47
48#define EXP5TO8B(packedcol)					\
49   ((((packedcol) << 3) & 0xf8) | (((packedcol) >>  2) & 0x7))
50
51#define EXP4TO8(col)						\
52   ((col) | ((col) << 4))
53
54/* inefficient. To be efficient, it would be necessary to decode 16 pixels at once */
55
56static void dxt135_decode_imageblock ( const GLubyte *img_block_src,
57                         GLint i, GLint j, GLuint dxt_type, GLvoid *texel ) {
58   GLchan *rgba = (GLchan *) texel;
59   const GLushort color0 = img_block_src[0] | (img_block_src[1] << 8);
60   const GLushort color1 = img_block_src[2] | (img_block_src[3] << 8);
61   const GLuint bits = img_block_src[4] | (img_block_src[5] << 8) |
62      (img_block_src[6] << 16) | ((GLuint)img_block_src[7] << 24);
63   /* What about big/little endian? */
64   GLubyte bit_pos = 2 * (j * 4 + i) ;
65   GLubyte code = (GLubyte) ((bits >> bit_pos) & 3);
66
67   rgba[ACOMP] = CHAN_MAX;
68   switch (code) {
69   case 0:
70      rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color0) );
71      rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color0) );
72      rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color0) );
73      break;
74   case 1:
75      rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color1) );
76      rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color1) );
77      rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color1) );
78      break;
79   case 2:
80      if ((dxt_type > 1) || (color0 > color1)) {
81         rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) * 2 + EXP5TO8R(color1)) / 3) );
82         rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) * 2 + EXP6TO8G(color1)) / 3) );
83         rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) * 2 + EXP5TO8B(color1)) / 3) );
84      }
85      else {
86         rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1)) / 2) );
87         rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1)) / 2) );
88         rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1)) / 2) );
89      }
90      break;
91   case 3:
92      if ((dxt_type > 1) || (color0 > color1)) {
93         rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1) * 2) / 3) );
94         rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1) * 2) / 3) );
95         rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1) * 2) / 3) );
96      }
97      else {
98         rgba[RCOMP] = 0;
99         rgba[GCOMP] = 0;
100         rgba[BCOMP] = 0;
101         if (dxt_type == 1) rgba[ACOMP] = UBYTE_TO_CHAN(0);
102      }
103      break;
104   default:
105   /* CANNOT happen (I hope) */
106      break;
107   }
108}
109
110
111static void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
112                         GLint i, GLint j, GLvoid *texel)
113{
114   /* Extract the (i,j) pixel from pixdata and return it
115    * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
116    */
117
118   const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
119   dxt135_decode_imageblock(blksrc, (i&3), (j&3), 0, texel);
120}
121
122
123static void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
124                         GLint i, GLint j, GLvoid *texel)
125{
126   /* Extract the (i,j) pixel from pixdata and return it
127    * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
128    */
129
130   const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
131   dxt135_decode_imageblock(blksrc, (i&3), (j&3), 1, texel);
132}
133
134static void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
135                         GLint i, GLint j, GLvoid *texel) {
136
137   /* Extract the (i,j) pixel from pixdata and return it
138    * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
139    */
140
141   GLchan *rgba = (GLchan *) texel;
142   const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
143   const GLubyte anibble = (blksrc[((j&3) * 4 + (i&3)) / 2] >> (4 * (i&1))) & 0xf;
144   dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
145   rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8(anibble)) );
146}
147
148static void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
149                         GLint i, GLint j, GLvoid *texel) {
150
151   /* Extract the (i,j) pixel from pixdata and return it
152    * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
153    */
154
155   GLchan *rgba = (GLchan *) texel;
156   const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
157   const GLubyte alpha0 = blksrc[0];
158   const GLubyte alpha1 = blksrc[1];
159   const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
160   const GLubyte acodelow = blksrc[2 + bit_pos / 8];
161   const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
162   const GLubyte code = (acodelow >> (bit_pos & 0x7) |
163      (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
164   dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
165   if (code == 0)
166      rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
167   else if (code == 1)
168      rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
169   else if (alpha0 > alpha1)
170      rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
171   else if (code < 6)
172      rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
173   else if (code == 6)
174      rgba[ACOMP] = 0;
175   else
176      rgba[ACOMP] = CHAN_MAX;
177}
178
179
180/* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
181   not sure if this really reflects visual perception */
182#define REDWEIGHT 4
183#define GREENWEIGHT 16
184#define BLUEWEIGHT 1
185
186#define ALPHACUT 127
187
188static void fancybasecolorsearch( UNUSED GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
189                           GLint numxpixels, GLint numypixels, UNUSED GLint type, UNUSED GLboolean haveAlpha)
190{
191   /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
192
193   /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
194      if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
195      due to their alpha value will influence the result */
196   GLint i, j, colors, z;
197   GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
198   GLint colordist, blockerrlin[2][3];
199   GLubyte nrcolor[2];
200   GLint pixerrorcolorbest[3] = {0};
201   GLubyte enc = 0;
202   GLubyte cv[4][4];
203   GLubyte testcolor[2][3];
204
205/*   fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
206      bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
207   if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
208      ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
209      testcolor[0][0] = bestcolor[0][0];
210      testcolor[0][1] = bestcolor[0][1];
211      testcolor[0][2] = bestcolor[0][2];
212      testcolor[1][0] = bestcolor[1][0];
213      testcolor[1][1] = bestcolor[1][1];
214      testcolor[1][2] = bestcolor[1][2];
215   }
216   else {
217      testcolor[1][0] = bestcolor[0][0];
218      testcolor[1][1] = bestcolor[0][1];
219      testcolor[1][2] = bestcolor[0][2];
220      testcolor[0][0] = bestcolor[1][0];
221      testcolor[0][1] = bestcolor[1][1];
222      testcolor[0][2] = bestcolor[1][2];
223   }
224
225   for (i = 0; i < 3; i ++) {
226      cv[0][i] = testcolor[0][i];
227      cv[1][i] = testcolor[1][i];
228      cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
229      cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
230   }
231
232   blockerrlin[0][0] = 0;
233   blockerrlin[0][1] = 0;
234   blockerrlin[0][2] = 0;
235   blockerrlin[1][0] = 0;
236   blockerrlin[1][1] = 0;
237   blockerrlin[1][2] = 0;
238
239   nrcolor[0] = 0;
240   nrcolor[1] = 0;
241
242   for (j = 0; j < numypixels; j++) {
243      for (i = 0; i < numxpixels; i++) {
244         pixerrorbest = 0xffffffff;
245         for (colors = 0; colors < 4; colors++) {
246            colordist = srccolors[j][i][0] - (cv[colors][0]);
247            pixerror = colordist * colordist * REDWEIGHT;
248            pixerrorred = colordist;
249            colordist = srccolors[j][i][1] - (cv[colors][1]);
250            pixerror += colordist * colordist * GREENWEIGHT;
251            pixerrorgreen = colordist;
252            colordist = srccolors[j][i][2] - (cv[colors][2]);
253            pixerror += colordist * colordist * BLUEWEIGHT;
254            pixerrorblue = colordist;
255            if (pixerror < pixerrorbest) {
256               enc = colors;
257               pixerrorbest = pixerror;
258               pixerrorcolorbest[0] = pixerrorred;
259               pixerrorcolorbest[1] = pixerrorgreen;
260               pixerrorcolorbest[2] = pixerrorblue;
261            }
262         }
263         if (enc == 0) {
264            for (z = 0; z < 3; z++) {
265               blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
266            }
267            nrcolor[0] += 3;
268         }
269         else if (enc == 2) {
270            for (z = 0; z < 3; z++) {
271               blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
272            }
273            nrcolor[0] += 2;
274            for (z = 0; z < 3; z++) {
275               blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
276            }
277            nrcolor[1] += 1;
278         }
279         else if (enc == 3) {
280            for (z = 0; z < 3; z++) {
281               blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
282            }
283            nrcolor[0] += 1;
284            for (z = 0; z < 3; z++) {
285               blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
286            }
287            nrcolor[1] += 2;
288         }
289         else if (enc == 1) {
290            for (z = 0; z < 3; z++) {
291               blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
292            }
293            nrcolor[1] += 3;
294         }
295      }
296   }
297   if (nrcolor[0] == 0) nrcolor[0] = 1;
298   if (nrcolor[1] == 0) nrcolor[1] = 1;
299   for (j = 0; j < 2; j++) {
300      for (i = 0; i < 3; i++) {
301	 GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
302	 if (newvalue <= 0)
303	    testcolor[j][i] = 0;
304	 else if (newvalue >= 255)
305	    testcolor[j][i] = 255;
306	 else testcolor[j][i] = newvalue;
307      }
308   }
309
310   if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
311       (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
312       (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
313       /* both colors are so close they might get encoded as the same 16bit values */
314      GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
315
316      coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
317      coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
318      coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
319      coldiffmax = coldiffred;
320      if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
321      if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
322      if (coldiffmax > 0) {
323         if (coldiffmax > 4) factor = 2;
324         else if (coldiffmax > 2) factor = 3;
325         else factor = 4;
326         /* Won't do much if the color value is near 255... */
327         /* argh so many ifs */
328         if (testcolor[1][1] >= testcolor[0][1]) {
329            ind1 = 1; ind0 = 0;
330         }
331         else {
332            ind1 = 0; ind0 = 1;
333         }
334         if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
335            testcolor[ind1][1] += factor * coldiffgreen;
336         else testcolor[ind1][1] = 255;
337         if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
338            if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
339               testcolor[ind1][0] += factor * coldiffred;
340            else testcolor[ind1][0] = 255;
341         }
342         else {
343            if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
344               testcolor[ind0][0] += factor * coldiffred;
345            else testcolor[ind0][0] = 255;
346         }
347         if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
348            if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
349               testcolor[ind1][2] += factor * coldiffblue;
350            else testcolor[ind1][2] = 255;
351         }
352         else {
353            if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
354               testcolor[ind0][2] += factor * coldiffblue;
355            else testcolor[ind0][2] = 255;
356         }
357      }
358   }
359
360   if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
361      ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
362      for (i = 0; i < 3; i++) {
363         bestcolor[0][i] = testcolor[0][i];
364         bestcolor[1][i] = testcolor[1][i];
365      }
366   }
367   else {
368      for (i = 0; i < 3; i++) {
369         bestcolor[0][i] = testcolor[1][i];
370         bestcolor[1][i] = testcolor[0][i];
371      }
372   }
373
374/*     fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
375     bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
376}
377
378
379
380static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
381                           GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
382{
383   /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
384
385   GLint i, j, colors;
386   GLuint testerror, testerror2, pixerror, pixerrorbest;
387   GLint colordist;
388   GLushort color0, color1, tempcolor;
389   GLuint bits = 0, bits2 = 0;
390   GLubyte *colorptr;
391   GLubyte enc = 0;
392   GLubyte cv[4][4];
393
394   bestcolor[0][0] = bestcolor[0][0] & 0xf8;
395   bestcolor[0][1] = bestcolor[0][1] & 0xfc;
396   bestcolor[0][2] = bestcolor[0][2] & 0xf8;
397   bestcolor[1][0] = bestcolor[1][0] & 0xf8;
398   bestcolor[1][1] = bestcolor[1][1] & 0xfc;
399   bestcolor[1][2] = bestcolor[1][2] & 0xf8;
400
401   color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
402   color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
403   if (color0 < color1) {
404      tempcolor = color0; color0 = color1; color1 = tempcolor;
405      colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
406   }
407
408
409   for (i = 0; i < 3; i++) {
410      cv[0][i] = bestcolor[0][i];
411      cv[1][i] = bestcolor[1][i];
412      cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
413      cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
414   }
415
416   testerror = 0;
417   for (j = 0; j < numypixels; j++) {
418      for (i = 0; i < numxpixels; i++) {
419         pixerrorbest = 0xffffffff;
420         for (colors = 0; colors < 4; colors++) {
421            colordist = srccolors[j][i][0] - cv[colors][0];
422            pixerror = colordist * colordist * REDWEIGHT;
423            colordist = srccolors[j][i][1] - cv[colors][1];
424            pixerror += colordist * colordist * GREENWEIGHT;
425            colordist = srccolors[j][i][2] - cv[colors][2];
426            pixerror += colordist * colordist * BLUEWEIGHT;
427            if (pixerror < pixerrorbest) {
428               pixerrorbest = pixerror;
429               enc = colors;
430            }
431         }
432         testerror += pixerrorbest;
433         bits |= (uint32_t)enc << (2 * (j * 4 + i));
434      }
435   }
436   /* some hw might disagree but actually decoding should always use 4-color encoding
437      for non-dxt1 formats */
438   if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
439      for (i = 0; i < 3; i++) {
440         cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
441         /* this isn't used. Looks like the black color constant can only be used
442            with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
443            it will decode 3 to black even with DXT3/5), and due to how the color searching works
444            it won't get used even then */
445         cv[3][i] = 0;
446      }
447      testerror2 = 0;
448      for (j = 0; j < numypixels; j++) {
449         for (i = 0; i < numxpixels; i++) {
450            pixerrorbest = 0xffffffff;
451            if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
452               enc = 3;
453               pixerrorbest = 0; /* don't calculate error */
454            }
455            else {
456               /* we're calculating the same what we have done already for colors 0-1 above... */
457               for (colors = 0; colors < 3; colors++) {
458                  colordist = srccolors[j][i][0] - cv[colors][0];
459                  pixerror = colordist * colordist * REDWEIGHT;
460                  colordist = srccolors[j][i][1] - cv[colors][1];
461                  pixerror += colordist * colordist * GREENWEIGHT;
462                  colordist = srccolors[j][i][2] - cv[colors][2];
463                  pixerror += colordist * colordist * BLUEWEIGHT;
464                  if (pixerror < pixerrorbest) {
465                     pixerrorbest = pixerror;
466                     /* need to exchange colors later */
467                     if (colors > 1) enc = colors;
468                     else enc = colors ^ 1;
469                  }
470               }
471            }
472            testerror2 += pixerrorbest;
473            bits2 |= (uint32_t)enc << (2 * (j * 4 + i));
474         }
475      }
476   } else {
477      testerror2 = 0xffffffff;
478   }
479
480   /* finally we're finished, write back colors and bits */
481   if ((testerror > testerror2) || (haveAlpha)) {
482      *blkaddr++ = color1 & 0xff;
483      *blkaddr++ = color1 >> 8;
484      *blkaddr++ = color0 & 0xff;
485      *blkaddr++ = color0 >> 8;
486      *blkaddr++ = bits2 & 0xff;
487      *blkaddr++ = ( bits2 >> 8) & 0xff;
488      *blkaddr++ = ( bits2 >> 16) & 0xff;
489      *blkaddr = bits2 >> 24;
490   }
491   else {
492      *blkaddr++ = color0 & 0xff;
493      *blkaddr++ = color0 >> 8;
494      *blkaddr++ = color1 & 0xff;
495      *blkaddr++ = color1 >> 8;
496      *blkaddr++ = bits & 0xff;
497      *blkaddr++ = ( bits >> 8) & 0xff;
498      *blkaddr++ = ( bits >> 16) & 0xff;
499      *blkaddr = bits >> 24;
500   }
501}
502
503static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
504                         GLint numxpixels, GLint numypixels, GLuint type )
505{
506/* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
507   present in the picture as base colors */
508
509   /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
510      vectors are weighted similar to their importance in rgb-luminance conversion
511      doesn't work too well though...
512      This seems to be a rather difficult problem */
513
514   GLubyte *bestcolor[2];
515   GLubyte basecolors[2][3];
516   GLubyte i, j;
517   GLuint lowcv, highcv, testcv;
518   GLboolean haveAlpha = GL_FALSE;
519
520   lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
521                          srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
522                          srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
523   bestcolor[0] = bestcolor[1] = srccolors[0][0];
524   for (j = 0; j < numypixels; j++) {
525      for (i = 0; i < numxpixels; i++) {
526         /* don't use this as a base color if the pixel will get black/transparent anyway */
527         if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) {
528            testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
529                     srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
530                     srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
531            if (testcv > highcv) {
532               highcv = testcv;
533               bestcolor[1] = srccolors[j][i];
534            }
535            else if (testcv < lowcv) {
536               lowcv = testcv;
537               bestcolor[0] = srccolors[j][i];
538            }
539         }
540         else haveAlpha = GL_TRUE;
541      }
542   }
543   /* make sure the original color values won't get touched... */
544   for (j = 0; j < 2; j++) {
545      for (i = 0; i < 3; i++) {
546         basecolors[j][i] = bestcolor[j][i];
547      }
548   }
549   bestcolor[0] = basecolors[0];
550   bestcolor[1] = basecolors[1];
551
552   /* try to find better base colors */
553   fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
554   /* find the best encoding for these colors, and store the result */
555   storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
556}
557
558static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
559                         GLubyte alphaenc[16])
560{
561   *blkaddr++ = alphabase1;
562   *blkaddr++ = alphabase2;
563   *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
564   *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
565   *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
566   *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
567   *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
568   *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
569}
570
571static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
572                            GLint numxpixels, GLint numypixels)
573{
574   GLubyte alphabase[2], alphause[2];
575   GLshort alphatest[2];
576   GLuint alphablockerror1, alphablockerror2, alphablockerror3;
577   GLubyte i, j, aindex, acutValues[7];
578   GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
579   GLboolean alphaabsmin = GL_FALSE;
580   GLboolean alphaabsmax = GL_FALSE;
581   GLshort alphadist;
582
583   /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
584   alphabase[0] = 0xff; alphabase[1] = 0x0;
585   for (j = 0; j < numypixels; j++) {
586      for (i = 0; i < numxpixels; i++) {
587         if (srccolors[j][i][3] == 0)
588            alphaabsmin = GL_TRUE;
589         else if (srccolors[j][i][3] == 255)
590            alphaabsmax = GL_TRUE;
591         else {
592            if (srccolors[j][i][3] > alphabase[1])
593               alphabase[1] = srccolors[j][i][3];
594            if (srccolors[j][i][3] < alphabase[0])
595               alphabase[0] = srccolors[j][i][3];
596         }
597      }
598   }
599
600
601   if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
602      /* shortcut here since it is a very common case (and also avoids later problems) */
603      /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
604      /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
605
606      *blkaddr++ = srccolors[0][0][3];
607      blkaddr++;
608      *blkaddr++ = 0;
609      *blkaddr++ = 0;
610      *blkaddr++ = 0;
611      *blkaddr++ = 0;
612      *blkaddr++ = 0;
613      *blkaddr++ = 0;
614/*      fprintf(stderr, "enc0 used\n");*/
615      return;
616   }
617
618   /* find best encoding for alpha0 > alpha1 */
619   /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
620   alphablockerror1 = 0x0;
621   alphablockerror2 = 0xffffffff;
622   alphablockerror3 = 0xffffffff;
623   if (alphaabsmin) alphause[0] = 0;
624   else alphause[0] = alphabase[0];
625   if (alphaabsmax) alphause[1] = 255;
626   else alphause[1] = alphabase[1];
627   /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
628   for (aindex = 0; aindex < 7; aindex++) {
629      /* don't forget here is always rounded down */
630      acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
631   }
632
633   for (j = 0; j < numypixels; j++) {
634      for (i = 0; i < numxpixels; i++) {
635         /* maybe it's overkill to have the most complicated calculation just for the error
636            calculation which we only need to figure out if encoding1 or encoding2 is better... */
637         if (srccolors[j][i][3] > acutValues[0]) {
638            alphaenc1[4*j + i] = 0;
639            alphadist = srccolors[j][i][3] - alphause[1];
640         }
641         else if (srccolors[j][i][3] > acutValues[1]) {
642            alphaenc1[4*j + i] = 2;
643            alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
644         }
645         else if (srccolors[j][i][3] > acutValues[2]) {
646            alphaenc1[4*j + i] = 3;
647            alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
648         }
649         else if (srccolors[j][i][3] > acutValues[3]) {
650            alphaenc1[4*j + i] = 4;
651            alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
652         }
653         else if (srccolors[j][i][3] > acutValues[4]) {
654            alphaenc1[4*j + i] = 5;
655            alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
656         }
657         else if (srccolors[j][i][3] > acutValues[5]) {
658            alphaenc1[4*j + i] = 6;
659            alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
660         }
661         else if (srccolors[j][i][3] > acutValues[6]) {
662            alphaenc1[4*j + i] = 7;
663            alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
664         }
665         else {
666            alphaenc1[4*j + i] = 1;
667            alphadist = srccolors[j][i][3] - alphause[0];
668         }
669         alphablockerror1 += alphadist * alphadist;
670      }
671   }
672/*      for (i = 0; i < 16; i++) {
673         fprintf(stderr, "%d ", alphaenc1[i]);
674      }
675      fprintf(stderr, "cutVals ");
676      for (i = 0; i < 8; i++) {
677         fprintf(stderr, "%d ", acutValues[i]);
678      }
679      fprintf(stderr, "srcVals ");
680      for (j = 0; j < numypixels; j++)
681         for (i = 0; i < numxpixels; i++) {
682            fprintf(stderr, "%d ", srccolors[j][i][3]);
683         }
684
685      fprintf(stderr, "\n");
686   }*/
687   /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
688      are false but try it anyway */
689   if (alphablockerror1 >= 32) {
690
691      /* don't bother if encoding is already very good, this condition should also imply
692      we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
693      alphablockerror2 = 0;
694      for (aindex = 0; aindex < 5; aindex++) {
695         /* don't forget here is always rounded down */
696         acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
697      }
698      for (j = 0; j < numypixels; j++) {
699         for (i = 0; i < numxpixels; i++) {
700             /* maybe it's overkill to have the most complicated calculation just for the error
701               calculation which we only need to figure out if encoding1 or encoding2 is better... */
702            if (srccolors[j][i][3] == 0) {
703               alphaenc2[4*j + i] = 6;
704               alphadist = 0;
705            }
706            else if (srccolors[j][i][3] == 255) {
707               alphaenc2[4*j + i] = 7;
708               alphadist = 0;
709            }
710            else if (srccolors[j][i][3] <= acutValues[0]) {
711               alphaenc2[4*j + i] = 0;
712               alphadist = srccolors[j][i][3] - alphabase[0];
713            }
714            else if (srccolors[j][i][3] <= acutValues[1]) {
715               alphaenc2[4*j + i] = 2;
716               alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
717            }
718            else if (srccolors[j][i][3] <= acutValues[2]) {
719               alphaenc2[4*j + i] = 3;
720               alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
721            }
722            else if (srccolors[j][i][3] <= acutValues[3]) {
723               alphaenc2[4*j + i] = 4;
724               alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
725            }
726            else if (srccolors[j][i][3] <= acutValues[4]) {
727               alphaenc2[4*j + i] = 5;
728               alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
729            }
730            else {
731               alphaenc2[4*j + i] = 1;
732               alphadist = srccolors[j][i][3] - alphabase[1];
733            }
734            alphablockerror2 += alphadist * alphadist;
735         }
736      }
737
738
739      /* skip this if the error is already very small
740         this encoding is MUCH better on average than #2 though, but expensive! */
741      if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
742         GLshort blockerrlin1 = 0;
743         GLshort blockerrlin2 = 0;
744         GLubyte nralphainrangelow = 0;
745         GLubyte nralphainrangehigh = 0;
746         alphatest[0] = 0xff;
747         alphatest[1] = 0x0;
748         /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
749         for (j = 0; j < numypixels; j++) {
750            for (i = 0; i < numxpixels; i++) {
751               if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
752                  alphatest[1] = srccolors[j][i][3];
753               if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
754                  alphatest[0] = srccolors[j][i][3];
755            }
756         }
757          /* shouldn't happen too often, don't really care about those degenerated cases */
758          if (alphatest[1] <= alphatest[0]) {
759             alphatest[0] = 1;
760             alphatest[1] = 254;
761/*             fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
762         }
763         for (aindex = 0; aindex < 5; aindex++) {
764         /* don't forget here is always rounded down */
765            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
766         }
767
768         /* find the "average" difference between the alpha values and the next encoded value.
769            This is then used to calculate new base values.
770            Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
771            since they will see more improvement, and also because the values in the middle are somewhat
772            likely to get no improvement at all (because the base values might move in different directions)?
773            OTOH it would mean the values in the middle are even less likely to get an improvement
774         */
775         for (j = 0; j < numypixels; j++) {
776            for (i = 0; i < numxpixels; i++) {
777               if (srccolors[j][i][3] <= alphatest[0] / 2) {
778               }
779               else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
780               }
781               else if (srccolors[j][i][3] <= acutValues[0]) {
782                  blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
783                  nralphainrangelow += 1;
784               }
785               else if (srccolors[j][i][3] <= acutValues[1]) {
786                  blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
787                  blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
788                  nralphainrangelow += 1;
789                  nralphainrangehigh += 1;
790               }
791               else if (srccolors[j][i][3] <= acutValues[2]) {
792                  blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
793                  blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
794                  nralphainrangelow += 1;
795                  nralphainrangehigh += 1;
796               }
797               else if (srccolors[j][i][3] <= acutValues[3]) {
798                  blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
799                  blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
800                  nralphainrangelow += 1;
801                  nralphainrangehigh += 1;
802               }
803               else if (srccolors[j][i][3] <= acutValues[4]) {
804                  blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
805                  blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
806                  nralphainrangelow += 1;
807                  nralphainrangehigh += 1;
808                  }
809               else {
810                  blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
811                  nralphainrangehigh += 1;
812               }
813            }
814         }
815         /* shouldn't happen often, needed to avoid div by zero */
816         if (nralphainrangelow == 0) nralphainrangelow = 1;
817         if (nralphainrangehigh == 0) nralphainrangehigh = 1;
818         alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
819/*         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
820         fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
821         /* again shouldn't really happen often... */
822         if (alphatest[0] < 0) {
823            alphatest[0] = 0;
824/*            fprintf(stderr, "adj alpha base val to 0\n");*/
825         }
826         alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
827         if (alphatest[1] > 255) {
828            alphatest[1] = 255;
829/*            fprintf(stderr, "adj alpha base val to 255\n");*/
830         }
831
832         alphablockerror3 = 0;
833         for (aindex = 0; aindex < 5; aindex++) {
834         /* don't forget here is always rounded down */
835            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
836         }
837         for (j = 0; j < numypixels; j++) {
838            for (i = 0; i < numxpixels; i++) {
839                /* maybe it's overkill to have the most complicated calculation just for the error
840                  calculation which we only need to figure out if encoding1 or encoding2 is better... */
841               if (srccolors[j][i][3] <= alphatest[0] / 2) {
842                  alphaenc3[4*j + i] = 6;
843                  alphadist = srccolors[j][i][3];
844               }
845               else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
846                  alphaenc3[4*j + i] = 7;
847                  alphadist = 255 - srccolors[j][i][3];
848               }
849               else if (srccolors[j][i][3] <= acutValues[0]) {
850                  alphaenc3[4*j + i] = 0;
851                  alphadist = srccolors[j][i][3] - alphatest[0];
852               }
853               else if (srccolors[j][i][3] <= acutValues[1]) {
854                 alphaenc3[4*j + i] = 2;
855                 alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
856               }
857               else if (srccolors[j][i][3] <= acutValues[2]) {
858                  alphaenc3[4*j + i] = 3;
859                  alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
860               }
861               else if (srccolors[j][i][3] <= acutValues[3]) {
862                  alphaenc3[4*j + i] = 4;
863                  alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
864               }
865               else if (srccolors[j][i][3] <= acutValues[4]) {
866                  alphaenc3[4*j + i] = 5;
867                  alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
868               }
869               else {
870                  alphaenc3[4*j + i] = 1;
871                  alphadist = srccolors[j][i][3] - alphatest[1];
872               }
873               alphablockerror3 += alphadist * alphadist;
874            }
875         }
876      }
877   }
878  /* write the alpha values and encoding back. */
879   if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
880/*      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
881      writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
882   }
883   else if (alphablockerror2 <= alphablockerror3) {
884/*      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
885      writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
886   }
887   else {
888/*      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
889      writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
890   }
891}
892
893static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
894                         GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
895{
896   GLubyte i, j, c;
897   const GLchan *curaddr;
898   for (j = 0; j < numypixels; j++) {
899      curaddr = srcaddr + j * srcRowStride * comps;
900      for (i = 0; i < numxpixels; i++) {
901         for (c = 0; c < comps; c++) {
902            srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
903         }
904      }
905   }
906}
907
908
909static void
910tx_compress_dxt1(int srccomps, int width, int height,
911                 const GLubyte *srcPixData, GLubyte *dest, int dstRowStride,
912                 unsigned dstComps)
913{
914   GLenum destFormat = dstComps == 3 ? GL_COMPRESSED_RGB_S3TC_DXT1_EXT
915                                     : GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
916   GLubyte *blkaddr = dest;
917   GLubyte srcpixels[4][4][4];
918   const GLchan *srcaddr = srcPixData;
919   int numxpixels, numypixels;
920
921   /* hmm we used to get called without dstRowStride... */
922   int dstRowDiff = dstRowStride >= (width * 2) ?
923                    dstRowStride - (((width + 3) & ~3) * 2) : 0;
924   /* fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
925              width, height, dstRowStride); */
926   for (int j = 0; j < height; j += 4) {
927      if (height > j + 3) numypixels = 4;
928      else numypixels = height - j;
929      srcaddr = srcPixData + j * width * srccomps;
930      for (int i = 0; i < width; i += 4) {
931         if (width > i + 3) numxpixels = 4;
932         else numxpixels = width - i;
933         extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
934         encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
935         srcaddr += srccomps * numxpixels;
936         blkaddr += 8;
937      }
938      blkaddr += dstRowDiff;
939   }
940}
941
942static void
943tx_compress_dxt3(int srccomps, int width, int height,
944                 const GLubyte *srcPixData, GLubyte *dest, int dstRowStride)
945{
946   GLenum destFormat = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
947   GLubyte *blkaddr = dest;
948   GLubyte srcpixels[4][4][4];
949   const GLchan *srcaddr = srcPixData;
950   int numxpixels, numypixels;
951
952   int dstRowDiff = dstRowStride >= (width * 4) ?
953                    dstRowStride - (((width + 3) & ~3) * 4) : 0;
954   /* fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
955              width, height, dstRowStride); */
956   for (int j = 0; j < height; j += 4) {
957      if (height > j + 3) numypixels = 4;
958      else numypixels = height - j;
959      srcaddr = srcPixData + j * width * srccomps;
960      for (int i = 0; i < width; i += 4) {
961         if (width > i + 3) numxpixels = 4;
962         else numxpixels = width - i;
963         extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
964         *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
965         *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
966         *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
967         *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
968         *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
969         *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
970         *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
971         *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
972         encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
973         srcaddr += srccomps * numxpixels;
974         blkaddr += 8;
975      }
976      blkaddr += dstRowDiff;
977   }
978}
979
980static void
981tx_compress_dxt5(int srccomps, int width, int height,
982                 const GLubyte *srcPixData, GLubyte *dest, int dstRowStride)
983{
984   GLenum destFormat = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
985   GLubyte *blkaddr = dest;
986   GLubyte srcpixels[4][4][4];
987   const GLchan *srcaddr = srcPixData;
988   int numxpixels, numypixels;
989
990   int dstRowDiff = dstRowStride >= (width * 4) ?
991                    dstRowStride - (((width + 3) & ~3) * 4) : 0;
992   /* fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
993              width, height, dstRowStride); */
994   for (int j = 0; j < height; j += 4) {
995      if (height > j + 3) numypixels = 4;
996      else numypixels = height - j;
997      srcaddr = srcPixData + j * width * srccomps;
998      for (int i = 0; i < width; i += 4) {
999         if (width > i + 3) numxpixels = 4;
1000         else numxpixels = width - i;
1001         extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
1002         encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
1003         encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
1004         srcaddr += srccomps * numxpixels;
1005         blkaddr += 16;
1006      }
1007      blkaddr += dstRowDiff;
1008   }
1009}
1010
1011static void
1012tx_compress_dxtn(GLint srccomps, GLint width, GLint height,
1013                 const GLubyte *srcPixData, GLenum destFormat,
1014                 GLubyte *dest, GLint dstRowStride)
1015{
1016   switch (destFormat) {
1017   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
1018      tx_compress_dxt1(srccomps, width, height, srcPixData,
1019                       dest, dstRowStride, 3);
1020      break;
1021   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
1022      tx_compress_dxt1(srccomps, width, height, srcPixData,
1023                       dest, dstRowStride, 4);
1024      break;
1025   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
1026      tx_compress_dxt3(srccomps, width, height, srcPixData,
1027                       dest, dstRowStride);
1028      break;
1029   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
1030      tx_compress_dxt5(srccomps, width, height, srcPixData,
1031                       dest, dstRowStride);
1032      break;
1033   default:
1034      unreachable("unknown DXTn format");
1035   }
1036}
1037
1038#endif
1039