Improve rsMatrix* documentation, fix bugs

Improves the user-facing documentation.  Fix the incorrect row & column
naming on the Get/Set API.  Fix a bug where rsMatrixLoadMultiply could
not have the destination be one of the source,
e.g. rsMatrixLoadMultiply(&l, &l, &r)

Change-Id: I42207aacf4ebe815d4a79db2aaa9c44f85864696
diff --git a/rsMatrix3x3.h b/rsMatrix3x3.h
index 05249b1..5c10846 100644
--- a/rsMatrix3x3.h
+++ b/rsMatrix3x3.h
@@ -25,12 +25,12 @@
 namespace renderscript {
 
 struct Matrix3x3 : public rs_matrix3x3 {
-    inline float get(uint32_t x, uint32_t y) const {
-        return m[x*3 + y];
+    inline float get(uint32_t col, uint32_t row) const {
+        return m[col*3 + row];
     }
 
-    inline void set(uint32_t x, uint32_t y, float v) {
-        m[x*3 + y] = v;
+    inline void set(uint32_t col, uint32_t row, float v) {
+        m[col*3 + row] = v;
     }
 
     void loadIdentity();
@@ -42,9 +42,7 @@
     void transpose();
 
     void multiply(const rs_matrix3x3 *rhs) {
-        Matrix3x3 tmp;
-        tmp.loadMultiply(this, rhs);
-        load(&tmp);
+        loadMultiply(this, rhs);
     }
 };