Rough implemetation of ForEach.
Remove launchID from root graphics script.

Change-Id: I9f80c0d4df1264f2ee1624a6d7216b9dfdf8502e
diff --git a/libs/rs/rsContext.cpp b/libs/rs/rsContext.cpp
index f99e5f2..449e5dc 100644
--- a/libs/rs/rsContext.cpp
+++ b/libs/rs/rsContext.cpp
@@ -127,14 +127,14 @@
 }
 
 
-uint32_t Context::runScript(Script *s, uint32_t launchID)
+uint32_t Context::runScript(Script *s)
 {
     ObjectBaseRef<ProgramFragment> frag(mFragment);
     ObjectBaseRef<ProgramVertex> vtx(mVertex);
     ObjectBaseRef<ProgramStore> store(mFragmentStore);
     ObjectBaseRef<ProgramRaster> raster(mRaster);
 
-    uint32_t ret = s->run(this, launchID);
+    uint32_t ret = s->run(this);
 
     mFragment.set(frag);
     mVertex.set(vtx);
@@ -157,7 +157,7 @@
 
     timerSet(RS_TIMER_SCRIPT);
     mStateFragmentStore.mLast.clear();
-    uint32_t ret = runScript(mRootScript.get(), 0);
+    uint32_t ret = runScript(mRootScript.get());
 
     checkError("runRootScript");
     if (mError != RS_ERROR_NONE) {
diff --git a/libs/rs/rsContext.h b/libs/rs/rsContext.h
index 9df07dc..31bf5d6 100644
--- a/libs/rs/rsContext.h
+++ b/libs/rs/rsContext.h
@@ -107,7 +107,7 @@
 
     uint32_t getMessageToClient(void *data, size_t *receiveLen, size_t bufferLen, bool wait);
     bool sendMessageToClient(void *data, uint32_t cmdID, size_t len, bool waitForSpace);
-    uint32_t runScript(Script *s, uint32_t launchID);
+    uint32_t runScript(Script *s);
 
     void initToClient();
     void deinitToClient();
diff --git a/libs/rs/rsScript.h b/libs/rs/rsScript.h
index ff13087..ea6aec5 100644
--- a/libs/rs/rsScript.h
+++ b/libs/rs/rsScript.h
@@ -64,9 +64,13 @@
 
     void setVar(uint32_t slot, const void *val, uint32_t len);
 
+    virtual void runForEach(Context *rsc, const Allocation *ain, Allocation *aout) = 0;
+    virtual void runForEach(Context *rsc, const Allocation *ain, Allocation *aout, uint32_t xStart, uint32_t xEnd) = 0;
+    virtual void runForEach(Context *rsc, const Allocation *ain, Allocation *aout, uint32_t xStart, uint32_t yStart, uint32_t xEnd, uint32_t yEnd) = 0;
+
     virtual void Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len) = 0;
-    virtual void setupScript() = 0;
-    virtual uint32_t run(Context *, uint32_t launchID) = 0;
+    virtual void setupScript(Context *rsc) = 0;
+    virtual uint32_t run(Context *) = 0;
 };
 
 
diff --git a/libs/rs/rsScriptC.cpp b/libs/rs/rsScriptC.cpp
index 3217e64..374a07f 100644
--- a/libs/rs/rsScriptC.cpp
+++ b/libs/rs/rsScriptC.cpp
@@ -49,8 +49,12 @@
     mEnviroment.mScriptText = NULL;
 }
 
-void ScriptC::setupScript()
+void ScriptC::setupScript(Context *rsc)
 {
+    setupGLState(rsc);
+    mEnviroment.mStartTimeMillis
+                = nanoseconds_to_milliseconds(systemTime(SYSTEM_TIME_MONOTONIC));
+
     for (uint32_t ct=0; ct < mEnviroment.mFieldCount; ct++) {
         if (!mSlots[ct].get())
             continue;
@@ -87,29 +91,19 @@
     return NULL;
 }
 
-void ScriptC::setTLS()
+Script * ScriptC::setTLS(Script *sc)
 {
     Context::ScriptTLSStruct * tls = (Context::ScriptTLSStruct *)
                                   pthread_getspecific(Context::gThreadTLSKey);
     rsAssert(tls);
-    tls->mScript = this;
+    Script *old = tls->mScript;
+    tls->mScript = sc;
+    return old;
 }
 
-void ScriptC::clearTLS()
-{
-    Context::ScriptTLSStruct * tls = (Context::ScriptTLSStruct *)
-                                  pthread_getspecific(Context::gThreadTLSKey);
-    rsAssert(tls);
-    tls->mScript = NULL;
-}
 
-uint32_t ScriptC::run(Context *rsc, uint32_t launchIndex)
+void ScriptC::setupGLState(Context *rsc)
 {
-    if (mProgram.mRoot == NULL) {
-        rsc->setError(RS_ERROR_BAD_SCRIPT, "Attempted to run bad script");
-        return 0;
-    }
-
     if (mEnviroment.mFragmentStore.get()) {
         rsc->setFragmentStore(mEnviroment.mFragmentStore.get());
     }
@@ -122,22 +116,75 @@
     if (mEnviroment.mRaster.get()) {
         rsc->setRaster(mEnviroment.mRaster.get());
     }
+}
 
-    if (launchIndex == 0) {
-        mEnviroment.mStartTimeMillis
-                = nanoseconds_to_milliseconds(systemTime(SYSTEM_TIME_MONOTONIC));
+uint32_t ScriptC::run(Context *rsc)
+{
+    if (mProgram.mRoot == NULL) {
+        rsc->setError(RS_ERROR_BAD_SCRIPT, "Attempted to run bad script");
+        return 0;
     }
-    setupScript();
+
+    setupScript(rsc);
 
     uint32_t ret = 0;
-    setTLS();
+    Script * oldTLS = setTLS(this);
     //LOGE("ScriptC::run %p", mProgram.mRoot);
     ret = mProgram.mRoot();
-    clearTLS();
+    setTLS(oldTLS);
     //LOGE("ScriptC::run ret %i", ret);
     return ret;
 }
 
+void ScriptC::runForEach(Context *rsc, const Allocation *ain, Allocation *aout,
+                         uint32_t xStart, uint32_t yStart, uint32_t xEnd, uint32_t yEnd)
+{
+    LOGE("ScriptC::runForEach not implemented");
+}
+
+void ScriptC::runForEach(Context *rsc, const Allocation *ain, Allocation *aout, uint32_t xStart, uint32_t xEnd)
+{
+    uint32_t dimX = ain->getType()->getDimX();
+    rsAssert(xStart < dimX);
+    rsAssert(xEnd <= dimX);
+    rsAssert(ain->getType()->getDimY() == 0);
+    rsAssert(ain->getType()->getDimZ() == 0);
+
+    if (xStart >= dimX) xStart = dimX - 1;
+    if (xEnd >= dimX) xEnd = dimX - 1;
+    if (xStart > xEnd) return;
+
+    setupScript(rsc);
+    Script * oldTLS = setTLS(this);
+
+    typedef int (*rs_t)(const void *, void *, uint32_t);
+    const uint8_t *ptrIn = (const uint8_t *)ain->getPtr();
+    uint32_t strideIn = ain->getType()->getElementSizeBytes();
+
+    uint8_t *ptrOut = NULL;
+    uint32_t strideOut = 0;
+    if (aout) {
+        ptrOut = (uint8_t *)aout->getPtr();
+        strideOut = aout->getType()->getElementSizeBytes();
+    }
+
+    for (uint32_t ct=xStart; ct < xEnd; ct++) {
+        ((rs_t)mProgram.mRoot) (ptrIn + (strideIn * ct), ptrOut + (strideOut * ct), ct);
+    }
+
+    setTLS(oldTLS);
+}
+
+void ScriptC::runForEach(Context *rsc, const Allocation *ain, Allocation *aout)
+{
+    if (ain->getType()->getDimY()) {
+        runForEach(rsc, ain, aout, 0, 0, 0xffffffff, 0xffffffff);
+    } else {
+        runForEach(rsc, ain, aout, 0, 0xffffffff);
+    }
+}
+
+
 void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len)
 {
     //LOGE("rsi_ScriptInvoke %i", slot);
@@ -146,8 +193,8 @@
         rsc->setError(RS_ERROR_BAD_SCRIPT, "Calling invoke on bad script");
         return;
     }
-    setupScript();
-    setTLS();
+    setupScript(rsc);
+    Script * oldTLS = setTLS(this);
 
     const uint32_t * dPtr = (const uint32_t *)data;
     switch(len) {
@@ -175,7 +222,7 @@
          mEnviroment.mInvokeFunctions[slot])(dPtr[0], dPtr[1], dPtr[2], dPtr[3], dPtr[4]);
         break;
     }
-    clearTLS();
+    setTLS(oldTLS);
 }
 
 ScriptCState::ScriptCState()
diff --git a/libs/rs/rsScriptC.h b/libs/rs/rsScriptC.h
index 216c6f2..a144234 100644
--- a/libs/rs/rsScriptC.h
+++ b/libs/rs/rsScriptC.h
@@ -52,17 +52,24 @@
 
     const Allocation *ptrToAllocation(const void *) const;
 
-    void setTLS();
-    void clearTLS();
 
     virtual void Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len);
 
-    virtual void setupScript();
-    virtual uint32_t run(Context *, uint32_t launchID);
-    
+    virtual uint32_t run(Context *);
+
+    virtual void runForEach(Context *rsc, const Allocation *ain, Allocation *aout);
+    virtual void runForEach(Context *rsc, const Allocation *ain, Allocation *aout, uint32_t xStart, uint32_t xEnd);
+    virtual void runForEach(Context *rsc, const Allocation *ain, Allocation *aout, uint32_t xStart, uint32_t yStart, uint32_t xEnd, uint32_t yEnd);
+
+
     virtual void serialize(OStream *stream) const {    }
     virtual A3DClassID getClassId() const { return A3D_CLASS_ID_SCRIPT_C; }
     static Type *createFromStream(Context *rsc, IStream *stream) { return NULL; }
+
+protected:
+    void setupScript(Context *);
+    void setupGLState(Context *);
+    Script * setTLS(Script *);
 };
 
 class ScriptCState
diff --git a/libs/rs/rsScriptC_Lib.cpp b/libs/rs/rsScriptC_Lib.cpp
index 0c10fca..3f3ff23 100644
--- a/libs/rs/rsScriptC_Lib.cpp
+++ b/libs/rs/rsScriptC_Lib.cpp
@@ -540,7 +540,7 @@
 static void SC_scriptCall(int scriptID)
 {
     GET_TLS();
-    rsc->runScript((Script *)scriptID, 0);
+    rsc->runScript((Script *)scriptID);
 }
 
 int SC_divsi3(int a, int b)
@@ -556,6 +556,58 @@
 }
 
 
+void SC_ForEachii(RsScript vs, RsAllocation vin)
+{
+    GET_TLS();
+    Script *s = static_cast<Script *>(vs);
+    Allocation *ain = static_cast<Allocation *>(vin);
+    s->runForEach(rsc, ain, NULL);
+}
+
+void SC_ForEachiii(RsScript vs, RsAllocation vin, RsAllocation vout)
+{
+    GET_TLS();
+    Script *s = static_cast<Script *>(vs);
+    Allocation *ain = static_cast<Allocation *>(vin);
+    Allocation *aout = static_cast<Allocation *>(vout);
+    s->runForEach(rsc, ain, aout);
+}
+
+void SC_ForEachiiii(RsScript vs, RsAllocation vin, int xStart, int xEnd)
+{
+    GET_TLS();
+    Script *s = static_cast<Script *>(vs);
+    Allocation *ain = static_cast<Allocation *>(vin);
+    s->runForEach(rsc, ain, NULL, xStart, xEnd);
+}
+
+void SC_ForEachiiiii(RsScript vs, RsAllocation vin, RsAllocation vout, int xStart, int xEnd)
+{
+    GET_TLS();
+    Script *s = static_cast<Script *>(vs);
+    Allocation *ain = static_cast<Allocation *>(vin);
+    Allocation *aout = static_cast<Allocation *>(vout);
+    s->runForEach(rsc, ain, aout, xStart, xEnd);
+}
+
+void SC_ForEachiiiiii(RsScript vs, RsAllocation vin, int xStart, int yStart, int xEnd, int yEnd)
+{
+    GET_TLS();
+    Script *s = static_cast<Script *>(vs);
+    Allocation *ain = static_cast<Allocation *>(vin);
+    s->runForEach(rsc, ain, NULL, xStart, yStart, xEnd, yEnd);
+}
+
+void SC_ForEachiiiiiii(RsScript vs, RsAllocation vin, RsAllocation vout, int xStart, int yStart, int xEnd, int yEnd)
+{
+    GET_TLS();
+    Script *s = static_cast<Script *>(vs);
+    Allocation *ain = static_cast<Allocation *>(vin);
+    Allocation *aout = static_cast<Allocation *>(vout);
+    s->runForEach(rsc, ain, aout, xStart, yStart, xEnd, yEnd);
+}
+
+
 //////////////////////////////////////////////////////////////////////////////
 // Class implementation
 //////////////////////////////////////////////////////////////////////////////
@@ -640,6 +692,12 @@
     { "rsMatrixScale", (void *)&SC_matrixScale },
     { "rsMatrixTranslate", (void *)&SC_matrixTranslate },
 
+    { "_Z9rsForEachii", (void *)&SC_ForEachii },
+    { "_Z9rsForEachiii", (void *)&SC_ForEachiii },
+    { "_Z9rsForEachiiii", (void *)&SC_ForEachiiii },
+    { "_Z9rsForEachiiiii", (void *)&SC_ForEachiiiii },
+    { "_Z9rsForEachiiiiii", (void *)&SC_ForEachiiiiii },
+    { "_Z9rsForEachiiiiiii", (void *)&SC_ForEachiiiiiii },
 
 ////////////////////////////////////////////////////////////////////
 
diff --git a/libs/rs/scriptc/rs_math.rsh b/libs/rs/scriptc/rs_math.rsh
index 33e7ee4..91c4303 100644
--- a/libs/rs/scriptc/rs_math.rsh
+++ b/libs/rs/scriptc/rs_math.rsh
@@ -66,6 +66,14 @@
 extern void rsMatrixScale(rs_matrix4x4 *mat, float x, float y, float z);
 extern void rsMatrixTranslate(rs_matrix4x4 *mat, float x, float y, float z);
 
+// Script to Script
+extern void __attribute__((overloadable))rsForEach(rs_script, rs_allocation input);
+extern void __attribute__((overloadable))rsForEach(rs_script, rs_allocation input, rs_allocation output);
+extern void __attribute__((overloadable))rsForEach(rs_script, rs_allocation input, int xStart, int xEnd);
+extern void __attribute__((overloadable))rsForEach(rs_script, rs_allocation input, rs_allocation output, int xStart, int xEnd);
+extern void __attribute__((overloadable))rsForEach(rs_script, rs_allocation input, int xStart, int yStart, int xEnd, int yEnd);
+extern void __attribute__((overloadable))rsForEach(rs_script, rs_allocation input, rs_allocation output, int xStart, int yStart, int xEnd, int yEnd);
+
 
 ///////////////////////////////////////////////////////////////////
 // non update funcs