Remove "predefined" elements from Java layer.  Static elements continue to exist but are no longer treated as a special version of element.
diff --git a/libs/rs/java/Fall/res/raw/fall.c b/libs/rs/java/Fall/res/raw/fall.c
index c09f43c..346006c 100644
--- a/libs/rs/java/Fall/res/raw/fall.c
+++ b/libs/rs/java/Fall/res/raw/fall.c
@@ -47,6 +47,20 @@
 // The higher, the smaller the ripple
 #define RIPPLE_HEIGHT 10.0f
 
+float g_SkyOffsetX;
+float g_SkyOffsetY;
+
+struct vert_s {
+    float nx;
+    float ny;
+    float nz;
+    float s;
+    float t;
+    float x;
+    float y;
+    float z;
+};
+
 int offset(int x, int y, int width) {
     return x + 1 + (y + 1) * (width + 2);
 }
@@ -150,8 +164,8 @@
     int *map = loadArrayI32(RSID_REFRACTION_MAP, 0);
     float *vertices = loadTriangleMeshVerticesF(NAMED_WaterMesh);
 
-    float fw = (float) width;
-    float fh = (float) height;
+    float fw = 1.f / width;
+    float fh = 1.f / height;
     float fy = (1.0f / 512.0f) * (1.0f / RIPPLE_HEIGHT);
 
     int h = height - 1;
@@ -175,8 +189,8 @@
             if (v >= height) v = height - 1;
 
             int index = (offset + w) << 3;
-            vertices[index + 3] = u / fw;
-            vertices[index + 4] = v / fh;
+            vertices[index + 3] = u * fw;
+            vertices[index + 4] = v * fh;
 
             // Update Z coordinate of the vertex
             vertices[index + 7] = dy * fy;
@@ -196,76 +210,26 @@
         int x = 0;
         int yOffset = y * width;
         for ( ; x < width; x += 1) {
-            int o = (yOffset + x) << 3;
-            int o1 = o + 8;
-            int ow = o + w8;
+            int o = ((yOffset + x) << 3);
+            int o1 = o + 8 + 5;
+            int ow = o + w8 + 5;
             int ow1 = ow + 8;
 
-            // V1
-            float v1x = vertices[o + 5];
-            float v1y = vertices[o + 6];
-            float v1z = vertices[o + 7];
-
-            // V2
-            float v2x = vertices[o1 + 5];
-            float v2y = vertices[o1 + 6];
-            float v2z = vertices[o1 + 7];
-
-            // V3
-            float v3x = vertices[ow + 5];
-            float v3y = vertices[ow + 6];
-            float v3z = vertices[ow + 7];
-
-            // N1
-            float n1x = v2x - v1x;
-            float n1y = v2y - v1y;
-            float n1z = v2z - v1z;
-
-            // N2
-            float n2x = v3x - v1x;
-            float n2y = v3y - v1y;
-            float n2z = v3z - v1z;
-
-            // N1 x N2
-            float n3x = n1y * n2z - n1z * n2y;
-            float n3y = n1z * n2x - n1x * n2z;
-            float n3z = n1x * n2y - n1y * n2x;
-
-            // Normalize
-            float len = 1.0f / magf3(n3x, n3y, n3z);
-            n3x *= len;
-            n3y *= len;
-            n3z *= len;
-
-            // V2
-            v2x = vertices[ow1 + 5];
-            v2y = vertices[ow1 + 6];
-            v2z = vertices[ow1 + 7];
-
-            // N1
-            n1x = v2x - v1x;
-            n1y = v2y - v1y;
-            n1z = v2z - v1z;
-
-            // N2
-            n2x = v3x - v1x;
-            n2y = v3y - v1y;
-            n2z = v3z - v1z;
+            struct vec3_s n1, n2, n3;
+            vec3Sub(&n1, (struct vec3_s *)(vertices + o1 + 5), (struct vec3_s *)(vertices + o + 5));
+            vec3Sub(&n2, (struct vec3_s *)(vertices + ow + 5), (struct vec3_s *)(vertices + o + 5));
+            vec3Cross(&n3, &n1, &n2);
+            vec3Norm(&n3);
 
             // Average of previous normal and N1 x N2
-            n3x = n3x * 0.5f + (n1y * n2z - n1z * n2y) * 0.5f;
-            n3y = n3y * 0.5f + (n1z * n2x - n1x * n2z) * 0.5f;
-            n3z = n3z * 0.5f + (n1x * n2y - n1y * n2x) * 0.5f;
+            vec3Sub(&n1, (struct vec3_s *)(vertices + ow1 + 5), (struct vec3_s *)(vertices + o + 5));
+            vec3Cross(&n2, &n1, &n2);
+            vec3Add(&n3, &n3, &n2);
+            vec3Norm(&n3);
 
-            // Normalize
-            len = 1.0f / magf3(n3x, n3y, n3z);
-            n3x *= len;
-            n3y *= len;
-            n3z *= len;
-
-            vertices[o + 0] = n3x;
-            vertices[o + 1] = n3y;
-            vertices[o + 2] = -n3z;
+            vertices[o + 0] = n3.x;
+            vertices[o + 1] = n3.y;
+            vertices[o + 2] = -n3.z;
 
             // reset Z
             //vertices[(yOffset + x) << 3 + 7] = 0.0f;
@@ -433,15 +397,15 @@
     bindProgramFragmentStore(NAMED_PFSLeaf);
     bindTexture(NAMED_PFSky, 0, NAMED_TSky);
 
-    float x = State->skyOffsetX + State->skySpeedX;
-    float y = State->skyOffsetY + State->skySpeedY;
+    float x = g_SkyOffsetX + State->skySpeedX;
+    float y = g_SkyOffsetY + State->skySpeedY;
 
     if (x > 1.0f) x = 0.0f;
     if (x < -1.0f) x = 0.0f;
     if (y > 1.0f) y = 0.0f;
 
-    storeF(RSID_STATE, OFFSETOF_WorldState_skyOffsetX, x);
-    storeF(RSID_STATE, OFFSETOF_WorldState_skyOffsetY, y);
+    g_SkyOffsetX = x;
+    g_SkyOffsetY = y;
 
     float matrix[16];
     matrixLoadTranslate(matrix, x, y, 0.0f);
@@ -509,7 +473,7 @@
     drawRiverbed();
     drawSky();
     drawLighting();
-    drawLeaves();
+    //drawLeaves();
     //drawNormals();
 
     return 1;
diff --git a/libs/rs/java/Fall/src/com/android/fall/rs/FallRS.java b/libs/rs/java/Fall/src/com/android/fall/rs/FallRS.java
index 8a33d66..33aa9ab 100644
--- a/libs/rs/java/Fall/src/com/android/fall/rs/FallRS.java
+++ b/libs/rs/java/Fall/src/com/android/fall/rs/FallRS.java
@@ -44,7 +44,7 @@
     private static final int MESH_RESOLUTION = 48;
 
     private static final int RSID_STATE = 0;
-    
+
     private static final int TEXTURES_COUNT = 3;
     private static final int LEAVES_TEXTURES_COUNT = 4;
     private static final int RSID_TEXTURE_RIVERBED = 0;
@@ -52,7 +52,7 @@
     private static final int RSID_TEXTURE_SKY = 2;
 
     private static final int RSID_RIPPLE_MAP = 1;
-    
+
     private static final int RSID_REFRACTION_MAP = 2;
 
     private static final int RSID_LEAVES = 3;
@@ -70,7 +70,21 @@
     private static final int LEAF_STRUCT_DELTAX = 9;
     private static final int LEAF_STRUCT_DELTAY = 10;
 
-    private static final int RSID_DROP = 4;    
+    class Leaf {
+        float x;
+        float y;
+        float scale;
+        float angle;
+        float spin;
+        float u1;
+        float u2;
+        float altitude;
+        float rippled;
+        float deltaX;
+        float deltaY;
+    }
+
+    private static final int RSID_DROP = 4;
 
     private Resources mResources;
     private RenderScript mRS;
@@ -175,10 +189,10 @@
 
         float quadWidth = 2.0f / (float) wResolution;
         float quadHeight = glHeight / (float) hResolution;
-        
+
         wResolution += 2;
-        hResolution += 2;        
-        
+        hResolution += 2;
+
         for (int y = 0; y <= hResolution; y++) {
             final boolean shift = (y & 0x1) == 0;
             final float yOffset = y * quadHeight - glHeight / 2.0f - quadHeight;
@@ -267,12 +281,10 @@
         public int leavesCount;
         public float glWidth;
         public float glHeight;
-        public float skyOffsetX;
-        public float skyOffsetY;
         public float skySpeedX;
         public float skySpeedY;
     }
-    
+
     static class DropState {
         public int dropX;
         public int dropY;
@@ -295,11 +307,11 @@
         mStateType = Type.createFromClass(mRS, WorldState.class, 1, "WorldState");
         mState = Allocation.createTyped(mRS, mStateType);
         mState.data(worldState);
-        
+
         mDrop = new DropState();
         mDrop.dropX = -1;
         mDrop.dropY = -1;
-        
+
         mDropType = Type.createFromClass(mRS, DropState.class, 1, "DropState");
         mDropState = Allocation.createTyped(mRS, mDropType);
         mDropState.data(mDrop);
@@ -346,7 +358,7 @@
         final Allocation allocation = Allocation.createFromBitmap(mRS, b, RGBA_8888, false);
         allocation.setName(name);
         return allocation;
-    }    
+    }
 
     private void createProgramFragment() {
         Sampler.Builder sampleBuilder = new Sampler.Builder(mRS);
@@ -368,7 +380,7 @@
         mPfLighting = builder.create();
         mPfLighting.setName("PFLighting");
         mPfLighting.bindSampler(sampler, 0);
-        
+
         builder = new ProgramFragment.Builder(mRS, null, null);
         builder.setTexEnable(true, 0);
         builder.setTexEnvMode(MODULATE, 0);
@@ -407,7 +419,7 @@
         mPvLight = builder.create();
         mPvLight.bindAllocation(pvOrthoAlloc);
         mPvLight.setName("PVLight");
-        
+
         builder = new ProgramVertex.Builder(mRS, null, null);
         builder.setTextureMatrixEnable(true);
         mPvSky = builder.create();
diff --git a/libs/rs/rs.spec b/libs/rs/rs.spec
index cb4dd00..ac2e738 100644
--- a/libs/rs/rs.spec
+++ b/libs/rs/rs.spec
@@ -39,10 +39,6 @@
 ElementBegin {
 }
 
-ElementAddPredefined {
-	param RsElementPredefined predef
-	}
-
 ElementAdd {
 	param RsDataKind dataKind
 	param RsDataType dataType
@@ -99,8 +95,8 @@
 AllocationCreateFromBitmap {
 	param uint32_t width
 	param uint32_t height
-	param RsElementPredefined dstFmt
-	param RsElementPredefined srcFmt
+	param RsElement dstFmt
+	param RsElement srcFmt
 	param bool genMips
 	param const void * data
 	ret RsAllocation
@@ -109,8 +105,8 @@
 AllocationCreateFromBitmapBoxed {
 	param uint32_t width
 	param uint32_t height
-	param RsElementPredefined dstFmt
-	param RsElementPredefined srcFmt
+	param RsElement dstFmt
+	param RsElement srcFmt
 	param bool genMips
 	param const void * data
 	ret RsAllocation
diff --git a/libs/rs/rsAllocation.cpp b/libs/rs/rsAllocation.cpp
index 1f49ca1..c267e16 100644
--- a/libs/rs/rsAllocation.cpp
+++ b/libs/rs/rsAllocation.cpp
@@ -310,40 +310,54 @@
     }
 }
 
-static ElementConverter_t pickConverter(RsElementPredefined dstFmt, RsElementPredefined srcFmt)
+static ElementConverter_t pickConverter(const Element *dst, const Element *src)
 {
-    if ((dstFmt == RS_ELEMENT_RGB_565) &&
-        (srcFmt == RS_ELEMENT_RGB_565)) {
-        return elementConverter_cpy_16;
+    GLenum srcGLType = src->getGLType();
+    GLenum srcGLFmt = src->getGLFormat();
+    GLenum dstGLType = dst->getGLType();
+    GLenum dstGLFmt = dst->getGLFormat();
+
+    if (srcGLFmt == dstGLFmt && srcGLType == dstGLType) {
+        switch(dst->getSizeBytes()) {
+        case 4:
+            return elementConverter_cpy_32;
+        case 2:
+            return elementConverter_cpy_16;
+        case 1:
+            return elementConverter_cpy_8;
+        }
     }
 
-    if ((dstFmt == RS_ELEMENT_RGB_565) &&
-        (srcFmt == RS_ELEMENT_RGB_888)) {
+    if (srcGLType == GL_UNSIGNED_BYTE &&
+        srcGLFmt == GL_RGB &&
+        dstGLType == GL_UNSIGNED_SHORT_5_6_5 &&
+        dstGLType == GL_RGB) {
+
         return elementConverter_888_to_565;
     }
 
-    if ((dstFmt == RS_ELEMENT_RGB_565) &&
-        (srcFmt == RS_ELEMENT_RGBA_8888)) {
+    if (srcGLType == GL_UNSIGNED_BYTE &&
+        srcGLFmt == GL_RGBA &&
+        dstGLType == GL_UNSIGNED_SHORT_5_6_5 &&
+        dstGLType == GL_RGB) {
+
         return elementConverter_8888_to_565;
     }
 
-    if ((dstFmt == RS_ELEMENT_RGBA_8888) &&
-        (srcFmt == RS_ELEMENT_RGBA_8888)) {
-        return elementConverter_cpy_32;
-    }
-
-    LOGE("pickConverter, unsuported combo, src %i,  dst %i", srcFmt, dstFmt);
+    LOGE("pickConverter, unsuported combo, src %p,  dst %p", src, dst);
     return 0;
 }
 
 
-RsAllocation rsi_AllocationCreateFromBitmap(Context *rsc, uint32_t w, uint32_t h, RsElementPredefined dstFmt, RsElementPredefined srcFmt,  bool genMips, const void *data)
+RsAllocation rsi_AllocationCreateFromBitmap(Context *rsc, uint32_t w, uint32_t h, RsElement _dst, RsElement _src,  bool genMips, const void *data)
 {
+    const Element *src = static_cast<const Element *>(_src);
+    const Element *dst = static_cast<const Element *>(_dst);
     rsAssert(!(w & (w-1)));
     rsAssert(!(h & (h-1)));
 
     //LOGE("rsi_AllocationCreateFromBitmap %i %i %i %i %i", w, h, dstFmt, srcFmt, genMips);
-    rsi_TypeBegin(rsc, rsi_ElementGetPredefined(rsc, dstFmt));
+    rsi_TypeBegin(rsc, _dst);
     rsi_TypeAdd(rsc, RS_DIMENSION_X, w);
     rsi_TypeAdd(rsc, RS_DIMENSION_Y, h);
     if (genMips) {
@@ -359,7 +373,7 @@
     }
     texAlloc->incUserRef();
 
-    ElementConverter_t cvt = pickConverter(dstFmt, srcFmt);
+    ElementConverter_t cvt = pickConverter(dst, src);
     cvt(texAlloc->getPtr(), data, w * h);
 
     if (genMips) {
@@ -375,21 +389,18 @@
     return texAlloc;
 }
 
-static uint32_t fmtToBits(RsElementPredefined fmt)
+RsAllocation rsi_AllocationCreateFromBitmapBoxed(Context *rsc, uint32_t w, uint32_t h, RsElement _dst, RsElement _src, bool genMips, const void *data)
 {
-    return 16;
-}
-
-RsAllocation rsi_AllocationCreateFromBitmapBoxed(Context *rsc, uint32_t w, uint32_t h, RsElementPredefined dstFmt, RsElementPredefined srcFmt, bool genMips, const void *data)
-{
+    const Element *srcE = static_cast<const Element *>(_src);
+    const Element *dstE = static_cast<const Element *>(_dst);
     uint32_t w2 = rsHigherPow2(w);
     uint32_t h2 = rsHigherPow2(h);
 
     if ((w2 == w) && (h2 == h)) {
-        return rsi_AllocationCreateFromBitmap(rsc, w, h, dstFmt, srcFmt, genMips, data);
+        return rsi_AllocationCreateFromBitmap(rsc, w, h, _dst, _src, genMips, data);
     }
 
-    uint32_t bpp = fmtToBits(srcFmt) >> 3;
+    uint32_t bpp = srcE->getSizeBytes();
     size_t size = w2 * h2 * bpp;
     uint8_t *tmp = static_cast<uint8_t *>(malloc(size));
     memset(tmp, 0, size);
@@ -401,7 +412,7 @@
         src += w * bpp;
     }
 
-    RsAllocation ret = rsi_AllocationCreateFromBitmap(rsc, w2, h2, dstFmt, srcFmt, genMips, tmp);
+    RsAllocation ret = rsi_AllocationCreateFromBitmap(rsc, w2, h2, _dst, _src, genMips, tmp);
     free(tmp);
     return ret;
 
diff --git a/libs/rs/rsContext.cpp b/libs/rs/rsContext.cpp
index c132915..04f6e07 100644
--- a/libs/rs/rsContext.cpp
+++ b/libs/rs/rsContext.cpp
@@ -184,10 +184,10 @@
 
     LOGV("RS: Frame (%lli),   Script %2.1f (%lli),  Clear & Swap %2.1f (%lli),  Idle %2.1f (%lli),  Internal %2.1f (%lli)",
          frame / 1000000,
-         100.0 * mTimers[RS_TIMER_IDLE] / total, mTimers[RS_TIMER_IDLE] / 1000000,
-         100.0 * mTimers[RS_TIMER_INTERNAL] / total, mTimers[RS_TIMER_INTERNAL] / 1000000,
          100.0 * mTimers[RS_TIMER_SCRIPT] / total, mTimers[RS_TIMER_SCRIPT] / 1000000,
-         100.0 * mTimers[RS_TIMER_CLEAR_SWAP] / total, mTimers[RS_TIMER_CLEAR_SWAP] / 1000000);
+         100.0 * mTimers[RS_TIMER_CLEAR_SWAP] / total, mTimers[RS_TIMER_CLEAR_SWAP] / 1000000,
+         100.0 * mTimers[RS_TIMER_IDLE] / total, mTimers[RS_TIMER_IDLE] / 1000000,
+         100.0 * mTimers[RS_TIMER_INTERNAL] / total, mTimers[RS_TIMER_INTERNAL] / 1000000);
 }
 
 void Context::setupCheck()
diff --git a/libs/rs/rsScriptC.cpp b/libs/rs/rsScriptC.cpp
index 9d9eb1b..8230cbc 100644
--- a/libs/rs/rsScriptC.cpp
+++ b/libs/rs/rsScriptC.cpp
@@ -286,6 +286,10 @@
     char buf[256];
     String8 tmp;
 
+    str->append("struct vec2_s {float x; float y;};");
+    str->append("struct vec3_s {float x; float y; float z;};");
+    str->append("struct vec4_s {float x; float y; float z; float w;};");
+
     for (size_t ct=0; ct < MAX_SCRIPT_BANKS; ct++) {
         const Type *t = mConstantBufferTypes[ct].get();
         if (!t) {
diff --git a/libs/rs/rsScriptC_Lib.cpp b/libs/rs/rsScriptC_Lib.cpp
index 84a39aa..5b19f17 100644
--- a/libs/rs/rsScriptC_Lib.cpp
+++ b/libs/rs/rsScriptC_Lib.cpp
@@ -36,6 +36,23 @@
     Context * rsc = tls->mContext; \
     ScriptC * sc = (ScriptC *) tls->mScript
 
+typedef struct {
+    float x;
+    float y;
+    float z;
+} vec3_t;
+
+typedef struct {
+    float x;
+    float y;
+    float z;
+    float w;
+} vec4_t;
+
+typedef struct {
+    float x;
+    float y;
+} vec2_t;
 
 //////////////////////////////////////////////////////////////////////////////
 // IO routines
@@ -161,6 +178,60 @@
     memcpy(&f[offset], m, sizeof(rsc_Matrix));
 }
 
+//////////////////////////////////////////////////////////////////////////////
+// Vec3 routines
+//////////////////////////////////////////////////////////////////////////////
+
+static void SC_vec3Norm(vec3_t *v)
+{
+    float len = sqrtf(v->x * v->x + v->y * v->y + v->z * v->z);
+    len = 1 / len;
+    v->x *= len;
+    v->y *= len;
+    v->z *= len;
+}
+
+static float SC_vec3Length(const vec3_t *v)
+{
+    return sqrtf(v->x * v->x + v->y * v->y + v->z * v->z);
+}
+
+static void SC_vec3Add(vec3_t *dest, const vec3_t *lhs, const vec3_t *rhs)
+{
+    dest->x = lhs->x + rhs->x;
+    dest->y = lhs->y + rhs->y;
+    dest->z = lhs->z + rhs->z;
+}
+
+static void SC_vec3Sub(vec3_t *dest, const vec3_t *lhs, const vec3_t *rhs)
+{
+    dest->x = lhs->x - rhs->x;
+    dest->y = lhs->y - rhs->y;
+    dest->z = lhs->z - rhs->z;
+}
+
+static void SC_vec3Cross(vec3_t *dest, const vec3_t *lhs, const vec3_t *rhs)
+{
+    float x = lhs->y * rhs->z  - lhs->z * rhs->y;
+    float y = lhs->z * rhs->x  - lhs->x * rhs->z;
+    float z = lhs->x * rhs->y  - lhs->y * rhs->x;
+    dest->x = x;
+    dest->y = y;
+    dest->z = z;
+}
+
+static float SC_vec3Dot(const vec3_t *lhs, const vec3_t *rhs)
+{
+    return lhs->x * rhs->x + lhs->y * rhs->y + lhs->z * rhs->z;
+}
+
+static void SC_vec3Scale(vec3_t *lhs, float scale)
+{
+    lhs->x *= scale;
+    lhs->y *= scale;
+    lhs->z *= scale;
+}
+
 
 //////////////////////////////////////////////////////////////////////////////
 // Math routines
@@ -175,15 +246,15 @@
     const float A =   1.0f / (2.0f * M_PI);
     const float B = -16.0f;
     const float C =   8.0f;
-    
+
     // scale angle for easy argument reduction
     x *= A;
-    
+
     if (fabsf(x) >= 0.5f) {
         // argument reduction
         x = x - ceilf(x + 0.5f) + 1.0f;
     }
-    
+
     const float y = B * x * fabsf(x) + C * x;
     return 0.2215f * (y * fabsf(y) - y) + y;
 }
@@ -195,15 +266,15 @@
     const float A =   1.0f / (2.0f * M_PI);
     const float B = -16.0f;
     const float C =   8.0f;
-    
+
     // scale angle for easy argument reduction
     x *= A;
-    
+
     if (fabsf(x) >= 0.5f) {
         // argument reduction
         x = x - ceilf(x + 0.5f) + 1.0f;
     }
-    
+
     const float y = B * x * fabsf(x) + C * x;
     return 0.2215f * (y * fabsf(y) - y) + y;
 }
@@ -1038,6 +1109,22 @@
     { "vec2Rand", (void *)&SC_vec2Rand,
         "void", "(float *vec, float maxLen)" },
 
+    // vec3
+    { "vec3Norm", (void *)&SC_vec3Norm,
+        "void", "(struct vec3_s *)" },
+    { "vec3Length", (void *)&SC_vec3Length,
+        "float", "(struct vec3_s *)" },
+    { "vec3Add", (void *)&SC_vec3Add,
+        "void", "(struct vec3_s *dest, struct vec3_s *lhs, struct vec3_s *rhs)" },
+    { "vec3Sub", (void *)&SC_vec3Sub,
+        "void", "(struct vec3_s *dest, struct vec3_s *lhs, struct vec3_s *rhs)" },
+    { "vec3Cross", (void *)&SC_vec3Cross,
+        "void", "(struct vec3_s *dest, struct vec3_s *lhs, struct vec3_s *rhs)" },
+    { "vec3Dot", (void *)&SC_vec3Dot,
+        "float", "(struct vec3_s *lhs, struct vec3_s *rhs)" },
+    { "vec3Scale", (void *)&SC_vec3Scale,
+        "void", "(struct vec3_s *lhs, float scale)" },
+
     // context
     { "bindProgramFragment", (void *)&SC_bindProgramFragment,
         "void", "(int)" },