make inline version of premultiply, to speed up gradient creation.

We could speed-up again if we...
- respected kDither and only built 1/2 of the table for non-dither requests
- output simple params to the gpu rather than always a texture
- detected that we have no alpha, and then can skip premul per-entry



git-svn-id: http://skia.googlecode.com/svn/trunk@1772 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/include/core/SkColorPriv.h b/include/core/SkColorPriv.h
index 6fa9df3..bd680f3 100644
--- a/include/core/SkColorPriv.h
+++ b/include/core/SkColorPriv.h
@@ -216,6 +216,21 @@
            (g << SK_G32_SHIFT) | (b << SK_B32_SHIFT);
 }
 
+static inline
+SkPMColor SkPremultiplyARGBInline(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
+    SkA32Assert(a);
+    SkASSERT(r <= a);
+    SkASSERT(g <= a);
+    SkASSERT(b <= a);
+
+    if (a != 255) {
+        r = SkMulDiv255Round(r, a);
+        g = SkMulDiv255Round(g, a);
+        b = SkMulDiv255Round(b, a);
+    }
+    return SkPackARGB32(a, r, g, b);
+}
+
 SK_API extern const uint32_t gMask_00FF00FF;
 
 static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
diff --git a/src/core/SkColor.cpp b/src/core/SkColor.cpp
index 4256179..023f195 100644
--- a/src/core/SkColor.cpp
+++ b/src/core/SkColor.cpp
@@ -19,31 +19,15 @@
 #include "SkColorPriv.h"
 
 SkPMColor SkPreMultiplyARGB(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
-    if (a != 255) {
-#if 0
-        unsigned scale = SkAlpha255To256(a);
-        r = SkAlphaMul(r, scale);
-        g = SkAlphaMul(g, scale);
-        b = SkAlphaMul(b, scale);
-#else
-        r = SkMulDiv255Round(r, a);
-        g = SkMulDiv255Round(g, a);
-        b = SkMulDiv255Round(b, a);
-#endif
-    }
-    return SkPackARGB32(a, r, g, b);
+    return SkPremultiplyARGBInline(a, r, g, b);
 }
 
 SkPMColor SkPreMultiplyColor(SkColor c) {
-    unsigned a = SkColorGetA(c);
-    unsigned r = SkColorGetR(c);
-    unsigned g = SkColorGetG(c);
-    unsigned b = SkColorGetB(c);
-
-    return SkPreMultiplyARGB(a, r, g, b);
+    return SkPremultiplyARGBInline(SkColorGetA(c), SkColorGetR(c),
+                                   SkColorGetG(c), SkColorGetB(c));
 }
 
-//////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
 
 static inline SkScalar ByteToScalar(U8CPU x) {
     SkASSERT(x <= 255);
diff --git a/src/effects/SkGradientShader.cpp b/src/effects/SkGradientShader.cpp
index d8ffe5a..f44e038 100644
--- a/src/effects/SkGradientShader.cpp
+++ b/src/effects/SkGradientShader.cpp
@@ -538,11 +538,11 @@
     b = SkIntToFixed(b) + 0x8000;
 
     do {
-        cache[0] = SkPreMultiplyARGB(a >> 16, r >> 16, g >> 16, b >> 16);
-        cache[kCache32Count] = SkPreMultiplyARGB(dither_ceil_fixed_to_8(a),
-                                                 dither_fixed_to_8(r),
-                                                 dither_fixed_to_8(g),
-                                                 dither_fixed_to_8(b));
+        cache[0] = SkPremultiplyARGBInline(a >> 16, r >> 16, g >> 16, b >> 16);
+        cache[kCache32Count] = SkPremultiplyARGBInline(dither_ceil_fixed_to_8(a),
+                                                       dither_fixed_to_8(r),
+                                                       dither_fixed_to_8(g),
+                                                       dither_fixed_to_8(b));
         cache += 1;
         a += da;
         r += dr;