Add a basic implementation of the reduce kernel API to the CPU
reference implementation.

Bug: 22631253

For now, this just runs a serial reduction on one thread.

Change-Id: I34c96d24bb6f44274de72bb53160abcf79d143b0
diff --git a/rsScript.h b/rsScript.h
index 6ca4fc1..bd6622d 100644
--- a/rsScript.h
+++ b/rsScript.h
@@ -84,6 +84,8 @@
             int mVersionMinor;
 
             size_t exportedVariableCount;
+            size_t exportedForEachCount;
+            size_t exportedReduceCount;
             size_t exportedFunctionCount;
             size_t exportedPragmaCount;
             char const **exportedPragmaKeyList;
@@ -130,6 +132,9 @@
                             size_t usrBytes,
                             const RsScriptCall *sc = nullptr) = 0;
 
+    virtual void runReduce(Context *rsc, uint32_t slot, const Allocation *ain,
+                           Allocation *aout, const RsScriptCall *sc) = 0;
+
     virtual void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) = 0;
     virtual void setupScript(Context *rsc) = 0;
     virtual uint32_t run(Context *) = 0;