Initial Contribution
diff --git a/opengl/libagl/dxt.cpp b/opengl/libagl/dxt.cpp
new file mode 100644
index 0000000..238c81f
--- /dev/null
+++ b/opengl/libagl/dxt.cpp
@@ -0,0 +1,636 @@
+/* libs/opengles/dxt.cpp
+**
+** Copyright 2007, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License"); 
+** you may not use this file except in compliance with the License. 
+** You may obtain a copy of the License at 
+**
+**     http://www.apache.org/licenses/LICENSE-2.0 
+**
+** Unless required by applicable law or agreed to in writing, software 
+** distributed under the License is distributed on an "AS IS" BASIS, 
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+** See the License for the specific language governing permissions and 
+** limitations under the License.
+*/
+
+#define TIMING 0
+
+#if TIMING
+#include <sys/time.h> // for optimization timing
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+#include <GLES/gl.h>
+#include <utils/Endian.h>
+
+#include "context.h"
+
+#define TIMING 0
+
+namespace android {
+
+static uint8_t avg23tab[64*64];
+static volatile int tables_initialized = 0;
+
+// Definitions below are equivalent to these over the valid range of arguments
+//  #define div5(x) ((x)/5)
+//  #define div7(x) ((x)/7)
+
+// Use fixed-point to divide by 5 and 7
+// 3277 = 2^14/5 + 1
+// 2341 = 2^14/7 + 1
+#define div5(x) (((x)*3277) >> 14)
+#define div7(x) (((x)*2341) >> 14)
+
+// Table with entry [a << 6 | b] = (2*a + b)/3 for 0 <= a,b < 64
+#define avg23(x0,x1) avg23tab[((x0) << 6) | (x1)]
+
+// Extract 5/6/5 RGB
+#define red(x)   (((x) >> 11) & 0x1f)
+#define green(x) (((x) >>  5) & 0x3f)
+#define blue(x)  ( (x)        & 0x1f)
+
+/*
+ * Convert 5/6/5 RGB (as 3 ints) to 8/8/8
+ *
+ * Operation count: 8 <<, 0 &, 5 |
+ */
+inline static int rgb565SepTo888(int r, int g, int b)
+
+{
+    return ((((r << 3) | (r >> 2)) << 16) |
+            (((g << 2) | (g >> 4)) <<  8) |
+             ((b << 3) | (b >> 2)));
+}
+
+/*
+ * Convert 5/6/5 RGB (as a single 16-bit word) to 8/8/8
+ *
+ *                   r4r3r2r1 r0g5g4g3 g2g1g0b4 b3b2b1b0   rgb
+ *            r4r3r2 r1r0g5g4 g3g2g1g0 b4b3b2b1 b0 0 0 0   rgb << 3
+ * r4r3r2r1 r0r4r3r2 g5g4g3g2 g1g0g5g4 b4b3b2b1 b0b4b3b2   desired result
+ *
+ * Construct the 24-bit RGB word as:
+ *
+ * r4r3r2r1 r0------ -------- -------- -------- --------  (rgb << 8) & 0xf80000
+ *            r4r3r2 -------- -------- -------- --------  (rgb << 3) & 0x070000
+ *                   g5g4g3g2 g1g0---- -------- --------  (rgb << 5) & 0x00fc00
+ *                                g5g4 -------- --------  (rgb >> 1) & 0x000300
+ *                                     b4b3b2b1 b0------  (rgb << 3) & 0x0000f8
+ *                                                b4b3b2  (rgb >> 2) & 0x000007
+ *
+ * Operation count: 5 <<, 6 &, 5 | (n.b. rgb >> 3 is used twice)
+ */
+inline static int rgb565To888(int rgb)
+
+{
+    int rgb3 = rgb >> 3;
+    return (((rgb << 8) & 0xf80000) |
+            ( rgb3      & 0x070000) |
+            ((rgb << 5) & 0x00fc00) |
+            ((rgb >> 1) & 0x000300) |
+            ( rgb3      & 0x0000f8) |
+            ((rgb >> 2) & 0x000007));
+}
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+static uint32_t swap(uint32_t x) {
+    int b0 = (x >> 24) & 0xff;
+    int b1 = (x >> 16) & 0xff;
+    int b2 = (x >>  8) & 0xff;
+    int b3 = (x      ) & 0xff;
+    
+    return (uint32_t)((b3 << 24) | (b2 << 16) | (b1 << 8) | b0);
+}
+#endif
+
+static void
+init_tables()
+{
+    if (tables_initialized) {
+        return;
+    }
+
+    for (int i = 0; i < 64; i++) {
+        for (int j = 0; j < 64; j++) {
+            int avg = (2*i + j)/3;
+            avg23tab[(i << 6) | j] = avg;
+        }
+    }
+
+    asm volatile ("" : : : "memory");
+    tables_initialized = 1;
+}
+
+/*
+ * Utility to scan a DXT1 compressed texture to determine whether it
+ * contains a transparent pixel (color0 < color1, code == 3).  This
+ * may be useful if the application lacks information as to whether
+ * the true format is GL_COMPRESSED_RGB_S3TC_DXT1_EXT or
+ * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT.
+ */
+bool
+DXT1HasAlpha(const GLvoid *data, int width, int height) {    
+#if TIMING
+    struct timeval start_t, end_t;
+    struct timezone tz;
+    
+    gettimeofday(&start_t, &tz);
+#endif
+
+    bool hasAlpha = false;
+
+    int xblocks = (width + 3)/4;
+    int yblocks = (height + 3)/4;
+    int numblocks = xblocks*yblocks;
+
+    uint32_t const *d32 = (uint32_t *)data;
+    for (int b = 0; b < numblocks; b++) {
+        uint32_t colors = *d32++;
+        
+#if __BYTE_ORDER == __BIG_ENDIAN
+        colors = swap(colors);
+#endif
+        
+        uint16_t color0 = colors & 0xffff;
+        uint16_t color1 = colors >> 16;
+        
+        if (color0 < color1) {
+            // There's no need to endian-swap within 'bits'
+            // since we don't care which pixel is the transparent one
+            uint32_t bits = *d32++;
+            
+            // Detect if any (odd, even) pair of bits are '11'
+            //      bits: b31 b30 b29 ... b3 b2 b1 b0
+            // bits >> 1: b31 b31 b30 ... b4 b3 b2 b1
+            //         &: b31 (b31 & b30) (b29 & b28) ... (b2 & b1) (b1 & b0)
+            //  & 0x55..:   0 (b31 & b30)       0     ...     0     (b1 & b0)
+            if (((bits & (bits >> 1)) & 0x55555555) != 0) {
+                hasAlpha = true;
+                goto done;
+            }
+        } else {
+            // Skip 4 bytes
+            ++d32;
+        }
+    }
+    
+ done:
+#if TIMING
+    gettimeofday(&end_t, &tz);
+    long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 +
+        (end_t.tv_usec - start_t.tv_usec);
+    
+    printf("Scanned w=%d h=%d in %ld usec\n", width, height, usec);
+#endif
+    
+    return hasAlpha;
+}
+
+static void
+decodeDXT1(const GLvoid *data, int width, int height,
+           void *surface, int stride,
+           bool hasAlpha)
+    
+{
+    init_tables();
+    
+    uint32_t const *d32 = (uint32_t *)data;
+    
+    // Color table for the current block
+    uint16_t c[4];
+    c[0] = c[1] = c[2] = c[3] = 0;
+    
+    // Specified colors from the previous block
+    uint16_t prev_color0 = 0x0000;
+    uint16_t prev_color1 = 0x0000;
+    
+    uint16_t* rowPtr = (uint16_t*)surface;
+    for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
+        uint16_t *blockPtr = rowPtr;
+        for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
+            uint32_t colors = *d32++;
+            uint32_t bits = *d32++;
+            
+#if __BYTE_ORDER == __BIG_ENDIAN
+            colors = swap(colors);
+            bits = swap(bits);
+#endif
+            
+            // Raw colors
+            uint16_t color0 = colors & 0xffff;
+            uint16_t color1 = colors >> 16;
+            
+            // If the new block has the same base colors as the
+            // previous one, we don't need to recompute the color
+            // table c[]
+            if (color0 != prev_color0 || color1 != prev_color1) {
+                // Store raw colors for comparison with next block
+                prev_color0 = color0;
+                prev_color1 = color1;
+                
+                int r0 =   red(color0);
+                int g0 = green(color0);
+                int b0 =  blue(color0);
+
+                int r1 =   red(color1);
+                int g1 = green(color1);
+                int b1 =  blue(color1);                
+                
+                if (hasAlpha) {
+                    c[0] = (r0 << 11) | ((g0 >> 1) << 6) | (b0 << 1) | 0x1;
+                    c[1] = (r1 << 11) | ((g1 >> 1) << 6) | (b1 << 1) | 0x1;
+                } else {
+                    c[0] = color0;
+                    c[1] = color1;
+                }
+                
+                int r2, g2, b2, r3, g3, b3, a3;
+                
+                int bbits = bits >> 1;
+                bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
+                bool has3 = ((bbits &  bits) & 0x55555555) != 0;
+                
+                if (has2 || has3) {
+                    if (color0 > color1) {
+                        r2 = avg23(r0, r1);
+                        g2 = avg23(g0, g1);
+                        b2 = avg23(b0, b1);
+                        
+                        r3 = avg23(r1, r0);
+                        g3 = avg23(g1, g0);
+                        b3 = avg23(b1, b0);
+                        a3 = 1;
+                    } else {
+                        r2 = (r0 + r1) >> 1;
+                        g2 = (g0 + g1) >> 1;
+                        b2 = (b0 + b1) >> 1;
+                        
+                        r3 = g3 = b3 = a3 = 0;
+                    }
+                    if (hasAlpha) {
+                        c[2] = (r2 << 11) | ((g2 >> 1) << 6) |
+                            (b2 << 1) | 0x1;
+                        c[3] = (r3 << 11) | ((g3 >> 1) << 6) |
+                            (b3 << 1) | a3;
+                    } else {
+                        c[2] = (r2 << 11) | (g2 << 5) | b2;
+                        c[3] = (r3 << 11) | (g3 << 5) | b3;
+                    }
+                }
+            }
+            
+            uint16_t* blockRowPtr = blockPtr;
+            for (int y = 0; y < 4; y++, blockRowPtr += stride) {
+                // Don't process rows past the botom
+                if (base_y + y >= height) {
+                    break;
+                }
+                
+                int w = min(width - base_x, 4);
+                for (int x = 0; x < w; x++) {
+                    int code = bits & 0x3;
+                    bits >>= 2;
+                    
+                    blockRowPtr[x] = c[code];
+                }
+            }
+        }
+    }
+}
+    
+// Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE
+static void
+decodeDXT3(const GLvoid *data, int width, int height,
+           void *surface, int stride)
+
+{
+    init_tables();
+    
+    uint32_t const *d32 = (uint32_t *)data;
+    
+    // Specified colors from the previous block
+    uint16_t prev_color0 = 0x0000;
+    uint16_t prev_color1 = 0x0000;
+
+    // Color table for the current block
+    uint32_t c[4];
+    c[0] = c[1] = c[2] = c[3] = 0;
+
+    uint32_t* rowPtr = (uint32_t*)surface;
+    for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
+        uint32_t *blockPtr = rowPtr;
+        for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
+            
+#if __BYTE_ORDER == __BIG_ENDIAN
+            uint32_t alphahi = *d32++;
+            uint32_t alphalo = *d32++;
+            alphahi = swap(alphahi);
+            alphalo = swap(alphalo);
+#else
+            uint32_t alphalo = *d32++;
+            uint32_t alphahi = *d32++;
+#endif
+
+            uint32_t colors = *d32++;
+            uint32_t bits = *d32++;
+            
+#if __BYTE_ORDER == __BIG_ENDIAN
+            colors = swap(colors);
+            bits = swap(bits);
+#endif
+            
+            uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo;
+
+            // Raw colors
+            uint16_t color0 = colors & 0xffff;
+            uint16_t color1 = colors >> 16;
+
+            // If the new block has the same base colors as the
+            // previous one, we don't need to recompute the color
+            // table c[]
+            if (color0 != prev_color0 || color1 != prev_color1) {
+                // Store raw colors for comparison with next block
+                prev_color0 = color0;
+                prev_color1 = color1;
+                
+                int bbits = bits >> 1;
+                bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
+                bool has3 = ((bbits &  bits) & 0x55555555) != 0;
+                
+                if (has2 || has3) {
+                    int r0 =   red(color0);
+                    int g0 = green(color0);
+                    int b0 =  blue(color0);
+                    
+                    int r1 =   red(color1);
+                    int g1 = green(color1);
+                    int b1 =  blue(color1);
+                    
+                    int r2 = avg23(r0, r1);
+                    int g2 = avg23(g0, g1);
+                    int b2 = avg23(b0, b1);
+                    
+                    int r3 = avg23(r1, r0);
+                    int g3 = avg23(g1, g0);
+                    int b3 = avg23(b1, b0);
+
+                    c[0] = rgb565SepTo888(r0, g0, b0);
+                    c[1] = rgb565SepTo888(r1, g1, b1);
+                    c[2] = rgb565SepTo888(r2, g2, b2);
+                    c[3] = rgb565SepTo888(r3, g3, b3);
+                } else {
+                    // Convert to 8 bits
+                    c[0] = rgb565To888(color0);
+                    c[1] = rgb565To888(color1);
+                }
+            }
+
+            uint32_t* blockRowPtr = blockPtr;
+            for (int y = 0; y < 4; y++, blockRowPtr += stride) {
+                // Don't process rows past the botom
+                if (base_y + y >= height) {
+                    break;
+                }
+                
+                int w = min(width - base_x, 4);
+                for (int x = 0; x < w; x++) {
+                    int a = alpha & 0xf;
+                    alpha >>= 4;
+
+                    int code = bits & 0x3;
+                    bits >>= 2;
+
+                    blockRowPtr[x] = c[code] | (a << 28) | (a << 24);
+                }
+            }
+        }
+    }
+}
+
+// Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE
+static void
+decodeDXT5(const GLvoid *data, int width, int height,
+           void *surface, int stride)
+
+{
+    init_tables();
+    
+    uint32_t const *d32 = (uint32_t *)data;
+    
+    // Specified alphas from the previous block
+    uint8_t prev_alpha0 = 0x00;
+    uint8_t prev_alpha1 = 0x00;
+
+    // Specified colors from the previous block
+    uint16_t prev_color0 = 0x0000;
+     uint16_t prev_color1 = 0x0000;
+
+    // Alpha table for the current block
+    uint8_t a[8];
+    a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = 0;
+
+    // Color table for the current block
+    uint32_t c[4];
+    c[0] = c[1] = c[2] = c[3] = 0;
+
+    int good_a5 = 0;
+    int bad_a5 = 0;
+    int good_a6 = 0;
+    int bad_a6 = 0;
+    int good_a7 = 0;
+    int bad_a7 = 0;
+
+    uint32_t* rowPtr = (uint32_t*)surface;
+    for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
+        uint32_t *blockPtr = rowPtr;
+        for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
+            
+#if __BYTE_ORDER == __BIG_ENDIAN
+            uint32_t alphahi = *d32++;
+            uint32_t alphalo = *d32++;
+            alphahi = swap(alphahi);
+            alphalo = swap(alphalo);
+#else
+             uint32_t alphalo = *d32++;
+             uint32_t alphahi = *d32++;
+#endif
+
+            uint32_t colors = *d32++;
+            uint32_t bits = *d32++;
+            
+#if __BYTE_ORDER == __BIG_ENDIANx
+            colors = swap(colors);
+            bits = swap(bits);
+#endif
+            
+            uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo;
+            uint64_t alpha0 = alpha & 0xff;
+            alpha >>= 8;
+            uint64_t alpha1 = alpha & 0xff;
+            alpha >>= 8;
+
+            if (alpha0 != prev_alpha0 || alpha1 != prev_alpha1) {
+                prev_alpha0 = alpha0;
+                prev_alpha1 = alpha1;
+                
+                a[0] = alpha0;
+                a[1] = alpha1;
+                int a01 = alpha0 + alpha1 - 1;
+                if (alpha0 > alpha1) {
+                    a[2] = div7(6*alpha0 +   alpha1);
+                    a[4] = div7(4*alpha0 + 3*alpha1);
+                    a[6] = div7(2*alpha0 + 5*alpha1);
+
+                    // Use symmetry to derive half of the values
+                    // A few values will be off by 1 (~.5%)
+                    // Alternate which values are computed directly
+                    // and which are derived to try to reduce bias
+                    a[3] = a01 - a[6];
+                    a[5] = a01 - a[4];
+                    a[7] = a01 - a[2];
+                } else {
+                    a[2] = div5(4*alpha0 +   alpha1);
+                    a[4] = div5(2*alpha0 + 3*alpha1);
+                    a[3] = a01 - a[4];
+                    a[5] = a01 - a[2];
+                    a[6] = 0x00;
+                    a[7] = 0xff;
+                }
+            }
+
+            // Raw colors
+            uint16_t color0 = colors & 0xffff;
+            uint16_t color1 = colors >> 16;
+
+            // If the new block has the same base colors as the
+            // previous one, we don't need to recompute the color
+            // table c[]
+            if (color0 != prev_color0 || color1 != prev_color1) {
+                // Store raw colors for comparison with next block
+                prev_color0 = color0;
+                prev_color1 = color1;
+                
+                int bbits = bits >> 1;
+                bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
+                bool has3 = ((bbits &  bits) & 0x55555555) != 0;
+                
+                if (has2 || has3) {
+                    int r0 =   red(color0);
+                    int g0 = green(color0);
+                    int b0 =  blue(color0);
+                    
+                    int r1 =   red(color1);
+                    int g1 = green(color1);
+                    int b1 =  blue(color1);
+                
+                    int r2 = avg23(r0, r1);
+                    int g2 = avg23(g0, g1);
+                    int b2 = avg23(b0, b1);
+                    
+                    int r3 = avg23(r1, r0);
+                    int g3 = avg23(g1, g0);
+                    int b3 = avg23(b1, b0);
+
+                    c[0] = rgb565SepTo888(r0, g0, b0);
+                    c[1] = rgb565SepTo888(r1, g1, b1);
+                    c[2] = rgb565SepTo888(r2, g2, b2);
+                    c[3] = rgb565SepTo888(r3, g3, b3);
+                } else {
+                    // Convert to 8 bits
+                    c[0] = rgb565To888(color0);
+                    c[1] = rgb565To888(color1);
+                }                
+            }
+
+            uint32_t* blockRowPtr = blockPtr;
+            for (int y = 0; y < 4; y++, blockRowPtr += stride) {
+                // Don't process rows past the botom
+                if (base_y + y >= height) {
+                    break;
+                }
+                
+                int w = min(width - base_x, 4);
+                for (int x = 0; x < w; x++) {
+                    int acode = alpha & 0x7;
+                    alpha >>= 3;
+
+                    int code = bits & 0x3;
+                    bits >>= 2;
+
+                    blockRowPtr[x] = c[code] | (a[acode] << 24);
+                }
+            }
+        }
+    }
+}
+   
+/*
+ * Decode a DXT-compressed texture into memory.  DXT textures consist of
+ * a series of 4x4 pixel blocks in left-to-right, top-down order.
+ * The number of blocks is given by ceil(width/4)*ceil(height/4).
+ *
+ * 'data' points to the texture data. 'width' and 'height' indicate the
+ * dimensions of the texture.  We assume width and height are >= 0 but
+ * do not require them to be powers of 2 or divisible by any factor.
+ *
+ * The output is written to 'surface' with each scanline separated by
+ * 'stride' 2- or 4-byte words.
+ *
+ * 'format' indicates the type of compression and must be one of the following:
+ *
+ *   GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+ *      The output is written as 5/6/5 opaque RGB (16 bit words).
+ *      8 bytes are read from 'data' for each block.
+ *
+ *   GL_COMPRESSED_RGBA_S3TC_DXT1_EXT
+ *      The output is written as 5/5/5/1 RGBA (16 bit words)
+ *      8 bytes are read from 'data' for each block.
+ *
+ *   GL_COMPRESSED_RGBA_S3TC_DXT3_EXT
+ *   GL_COMPRESSED_RGBA_S3TC_DXT5_EXT
+ *      The output is written as 8/8/8/8 ARGB (32 bit words)
+ *      16 bytes are read from 'data' for each block.
+ */
+void
+decodeDXT(const GLvoid *data, int width, int height,
+          void *surface, int stride, int format)
+{
+#if TIMING
+    struct timeval start_t, end_t;
+    struct timezone tz;
+    
+    gettimeofday(&start_t, &tz);
+#endif
+
+    switch (format) {
+    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+        decodeDXT1(data, width, height, surface, stride, false);
+        break;
+        
+    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+        decodeDXT1(data, width, height, surface, stride, true);
+        break;
+        
+    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+        decodeDXT3(data, width, height, surface, stride);
+        break;
+        
+    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+        decodeDXT5(data, width, height, surface, stride);
+        break;
+    }
+    
+#if TIMING
+    gettimeofday(&end_t, &tz);
+    long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 +
+        (end_t.tv_usec - start_t.tv_usec);
+    
+    printf("Loaded w=%d h=%d in %ld usec\n", width, height, usec);
+#endif
+}
+
+} // namespace android