Thread launch strategies.

Change-Id: I506df786e815205a8e51906c2b517302c1ef2471
diff --git a/libs/rs/java/ImageProcessing/src/com/android/rs/image/horizontal_blur.rs b/libs/rs/java/ImageProcessing/src/com/android/rs/image/horizontal_blur.rs
index 4ed5aba4..58c9acf 100644
--- a/libs/rs/java/ImageProcessing/src/com/android/rs/image/horizontal_blur.rs
+++ b/libs/rs/java/ImageProcessing/src/com/android/rs/image/horizontal_blur.rs
@@ -8,43 +8,23 @@
     const uchar4 *input = (const uchar4 *)rsGetElementAt(fs->ain, 0, y);
 
     float3 blurredPixel = 0;
-    float3 currentPixel = 0;
-
     const float *gPtr = fs->gaussian;
     if ((x > fs->radius) && (x < (fs->width - fs->radius))) {
         const uchar4 *i = input + (x - fs->radius);
         for(int r = -fs->radius; r <= fs->radius; r ++) {
-            currentPixel.x = (float)(i->x);
-            currentPixel.y = (float)(i->y);
-            currentPixel.z = (float)(i->z);
-            blurredPixel += currentPixel * gPtr[0];
+            blurredPixel += convert_float3(i->xyz) * gPtr[0];
             gPtr++;
             i++;
         }
     } else {
         for(int r = -fs->radius; r <= fs->radius; r ++) {
             // Stepping left and right away from the pixel
-            int validW = x + r;
-            // Clamp to zero and width max() isn't exposed for ints yet
-            if(validW < 0) {
-                validW = 0;
-            }
-            if(validW > fs->width - 1) {
-                validW = fs->width - 1;
-            }
-            //int validW = rsClamp(w + r, 0, width - 1);
-
-            currentPixel.x = (float)(input[validW].x);
-            currentPixel.y = (float)(input[validW].y);
-            currentPixel.z = (float)(input[validW].z);
-
-            blurredPixel += currentPixel * gPtr[0];
+            int validW = rsClamp(x + r, (uint)0, (uint)(fs->width - 1));
+            blurredPixel += convert_float3(input[validW].xyz) * gPtr[0];
             gPtr++;
         }
     }
 
-    output->x = (uint8_t)blurredPixel.x;
-    output->y = (uint8_t)blurredPixel.y;
-    output->z = (uint8_t)blurredPixel.z;
+    output->xyz = convert_uchar3(blurredPixel);
 }
 
diff --git a/libs/rs/rsScript.h b/libs/rs/rsScript.h
index 455ece7..0a20344 100644
--- a/libs/rs/rsScript.h
+++ b/libs/rs/rsScript.h
@@ -56,6 +56,8 @@
 
         char * mScriptText;
         uint32_t mScriptTextLength;
+
+        bool mIsThreadable;
     };
     Enviroment_t mEnviroment;
 
diff --git a/libs/rs/rsScriptC.cpp b/libs/rs/rsScriptC.cpp
index 7c7b037..a140e22 100644
--- a/libs/rs/rsScriptC.cpp
+++ b/libs/rs/rsScriptC.cpp
@@ -278,7 +278,7 @@
     }
 
 
-    if ((rsc->getWorkerPoolSize() > 1) &&
+    if ((rsc->getWorkerPoolSize() > 1) && mEnviroment.mIsThreadable &&
         ((mtls.dimY * mtls.dimZ * mtls.dimArray) > 1)) {
 
         //LOGE("launch 1");
@@ -350,10 +350,12 @@
 static BCCvoid* symbolLookup(BCCvoid* pContext, const BCCchar* name)
 {
     const ScriptCState::SymbolTable_t *sym;
+    ScriptC *s = (ScriptC *)pContext;
     sym = ScriptCState::lookupSymbol(name);
     if (sym) {
         return sym->mPtr;
     }
+    s->mEnviroment.mIsThreadable = false;
     sym = ScriptCState::lookupSymbolCL(name);
     if (sym) {
         return sym->mPtr;
@@ -371,8 +373,9 @@
     LOGV("ScriptCState::runCompiler ");
 
     s->mBccScript = bccCreateScript();
+    s->mEnviroment.mIsThreadable = true;
     bccScriptBitcode(s->mBccScript, s->mEnviroment.mScriptText, s->mEnviroment.mScriptTextLength);
-    bccRegisterSymbolCallback(s->mBccScript, symbolLookup, NULL);
+    bccRegisterSymbolCallback(s->mBccScript, symbolLookup, s);
     bccCompileScript(s->mBccScript);
     bccGetScriptLabel(s->mBccScript, "root", (BCCvoid**) &s->mProgram.mRoot);
     bccGetScriptLabel(s->mBccScript, "init", (BCCvoid**) &s->mProgram.mInit);
diff --git a/libs/rs/scriptc/rs_math.rsh b/libs/rs/scriptc/rs_math.rsh
index 45f6bf4..bb4aafb 100644
--- a/libs/rs/scriptc/rs_math.rsh
+++ b/libs/rs/scriptc/rs_math.rsh
@@ -112,8 +112,19 @@
 extern void __attribute__((overloadable))
     rsSendToClientBlocking(int cmdID, const void *data, uint len);
 
+
 // Script to Script
+enum rs_for_each_strategy {
+    RS_FOR_EACH_STRATEGY_SERIAL,
+    RS_FOR_EACH_STRATEGY_DONT_CARE,
+    RS_FOR_EACH_STRATEGY_DST_LINEAR,
+    RS_FOR_EACH_STRATEGY_TILE_SMALL,
+    RS_FOR_EACH_STRATEGY_TILE_MEDIUM,
+    RS_FOR_EACH_STRATEGY_TILE_LARGE
+};
+
 typedef struct rs_script_call {
+    enum rs_for_each_strategy strategy;
     uint32_t xStart;
     uint32_t xEnd;
     uint32_t yStart;
@@ -122,7 +133,6 @@
     uint32_t zEnd;
     uint32_t arrayStart;
     uint32_t arrayEnd;
-
 } rs_script_call_t;
 
 extern void __attribute__((overloadable))