Speed up GrDrawState handling.
Compact GrDrawState and nested structs; dynamically only copy and compare
the fields actually active on the structs in question.
Yields 10-20% speedup of text benchmarks in GPU configuration.

Could probably get additional marginal speedup by ignoring unused
kernel fields.
Has some fragile constructs: pointer math on members to compute
sizes of portions of structs.
Removes fields necessary for GrTesselatedPathRenderer.



git-svn-id: http://skia.googlecode.com/svn/trunk@2644 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/include/gpu/GrSamplerState.h b/include/gpu/GrSamplerState.h
index 9c217db..ed49593 100644
--- a/include/gpu/GrSamplerState.h
+++ b/include/gpu/GrSamplerState.h
@@ -252,10 +252,10 @@
     }
 
 private:
-    WrapMode    fWrapX;
-    WrapMode    fWrapY;
-    SampleMode  fSampleMode;
-    Filter      fFilter;
+    WrapMode    fWrapX : 8;
+    WrapMode    fWrapY : 8;
+    SampleMode  fSampleMode : 8;
+    Filter      fFilter : 8;
     GrMatrix    fMatrix;
     bool        fSwapRAndB;
     GrRect      fTextureDomain;
@@ -263,12 +263,12 @@
     // these are undefined unless fSampleMode == kRadial2_SampleMode
     GrScalar    fRadial2CenterX1;
     GrScalar    fRadial2Radius0;
-    bool        fRadial2PosRoot;
+    SkBool8     fRadial2PosRoot;
 
     // These are undefined unless fFilter == kConvolution_Filter
-    int         fKernelWidth;
-    float       fKernel[MAX_KERNEL_WIDTH];
+    uint8_t     fKernelWidth;
     float       fImageIncrement[2];
+    float       fKernel[MAX_KERNEL_WIDTH];
 
     static const GrSamplerState gClampNoFilter;
 };
diff --git a/src/gpu/GrDefaultPathRenderer.cpp b/src/gpu/GrDefaultPathRenderer.cpp
index a8f673e..b9d9360 100644
--- a/src/gpu/GrDefaultPathRenderer.cpp
+++ b/src/gpu/GrDefaultPathRenderer.cpp
@@ -45,9 +45,9 @@
     kInvert_StencilOp,           kInvert_StencilOp,
     kKeep_StencilOp,             kKeep_StencilOp,
     kAlwaysIfInClip_StencilFunc, kAlwaysIfInClip_StencilFunc,
-    0xffffffff,                  0xffffffff,
-    0xffffffff,                  0xffffffff,
-    0xffffffff,                  0xffffffff
+    0xffff,                      0xffff,
+    0xffff,                      0xffff,
+    0xffff,                      0xffff
 };
 
 // ok not to check clip b/c stencil pass only wrote inside clip
@@ -55,9 +55,9 @@
     kZero_StencilOp,          kZero_StencilOp,
     kZero_StencilOp,          kZero_StencilOp,
     kNotEqual_StencilFunc,    kNotEqual_StencilFunc,
-    0xffffffff,               0xffffffff,
-    0x0,                      0x0,
-    0xffffffff,               0xffffffff
+    0xffff,                   0xffff,
+    0x0000,                   0x0000,
+    0xffff,                   0xffff
 };
 
 // have to check clip b/c outside clip will always be zero.
@@ -65,9 +65,9 @@
     kZero_StencilOp,            kZero_StencilOp,
     kZero_StencilOp,            kZero_StencilOp,
     kEqualIfInClip_StencilFunc, kEqualIfInClip_StencilFunc,
-    0xffffffff,                 0xffffffff,
-    0x0,                        0x0,
-    0xffffffff,                 0xffffffff
+    0xffff,                     0xffff,
+    0x0000,                     0x0000,
+    0xffff,                     0xffff
 };
 
 ////// Winding
@@ -80,9 +80,9 @@
     kIncWrap_StencilOp,             kDecWrap_StencilOp,
     kKeep_StencilOp,                kKeep_StencilOp,
     kAlwaysIfInClip_StencilFunc,    kAlwaysIfInClip_StencilFunc,
-    0xffffffff,                     0xffffffff,
-    0xffffffff,                     0xffffffff,
-    0xffffffff,                     0xffffffff
+    0xffff,                         0xffff,
+    0xffff,                         0xffff,
+    0xffff,                         0xffff
 };
 
 // if inc'ing the max value, invert to make 0
@@ -93,9 +93,9 @@
     kInvert_StencilOp,              kInvert_StencilOp,
     kIncClamp_StencilOp,            kDecClamp_StencilOp,
     kEqual_StencilFunc,             kEqual_StencilFunc,
-    0xffffffff,                     0xffffffff,
-    0xffffffff,                     0x0,
-    0xffffffff,                     0xffffffff
+    0xffff,                         0xffff,
+    0xffff,                         0x0000,
+    0xffff,                         0xffff
 };
 
 // When there are no separate faces we do two passes to setup the winding rule
@@ -106,51 +106,51 @@
     kIncWrap_StencilOp,             kIncWrap_StencilOp,
     kKeep_StencilOp,                kKeep_StencilOp,
     kAlwaysIfInClip_StencilFunc,    kAlwaysIfInClip_StencilFunc,
-    0xffffffff,                     0xffffffff,
-    0xffffffff,                     0xffffffff,
-    0xffffffff,                     0xffffffff
+    0xffff,                         0xffff,
+    0xffff,                         0xffff,
+    0xffff,                         0xffff
 };
 static const GrStencilSettings gWindSingleStencilWithWrapDec = {
     kDecWrap_StencilOp,             kDecWrap_StencilOp,
     kKeep_StencilOp,                kKeep_StencilOp,
     kAlwaysIfInClip_StencilFunc,    kAlwaysIfInClip_StencilFunc,
-    0xffffffff,                     0xffffffff,
-    0xffffffff,                     0xffffffff,
-    0xffffffff,                     0xffffffff
+    0xffff,                         0xffff,
+    0xffff,                         0xffff,
+    0xffff,                         0xffff
 };
 static const GrStencilSettings gWindSingleStencilNoWrapInc = {
     kInvert_StencilOp,              kInvert_StencilOp,
     kIncClamp_StencilOp,            kIncClamp_StencilOp,
     kEqual_StencilFunc,             kEqual_StencilFunc,
-    0xffffffff,                     0xffffffff,
-    0xffffffff,                     0xffffffff,
-    0xffffffff,                     0xffffffff
+    0xffff,                         0xffff,
+    0xffff,                         0xffff,
+    0xffff,                         0xffff
 };
 static const GrStencilSettings gWindSingleStencilNoWrapDec = {
     kInvert_StencilOp,              kInvert_StencilOp,
     kDecClamp_StencilOp,            kDecClamp_StencilOp,
     kEqual_StencilFunc,             kEqual_StencilFunc,
-    0xffffffff,                     0xffffffff,
-    0x0,                            0x0,
-    0xffffffff,                     0xffffffff
+    0xffff,                         0xffff,
+    0x0000,                         0x0000,
+    0xffff,                         0xffff
 };
 
 static const GrStencilSettings gWindColorPass = {
     kZero_StencilOp,                kZero_StencilOp,
     kZero_StencilOp,                kZero_StencilOp,
     kNonZeroIfInClip_StencilFunc,   kNonZeroIfInClip_StencilFunc,
-    0xffffffff,                     0xffffffff,
-    0x0,                            0x0,
-    0xffffffff,                     0xffffffff
+    0xffff,                         0xffff,
+    0x0000,                         0x0000,
+    0xffff,                         0xffff
 };
 
 static const GrStencilSettings gInvWindColorPass = {
     kZero_StencilOp,                kZero_StencilOp,
     kZero_StencilOp,                kZero_StencilOp,
     kEqualIfInClip_StencilFunc,     kEqualIfInClip_StencilFunc,
-    0xffffffff,                     0xffffffff,
-    0x0,                            0x0,
-    0xffffffff,                     0xffffffff
+    0xffff,                         0xffff,
+    0x0000,                         0x0000,
+    0xffff,                         0xffff
 };
 
 ////// Normal render to stencil
@@ -161,9 +161,9 @@
     kZero_StencilOp,                kZero_StencilOp,
     kIncClamp_StencilOp,            kIncClamp_StencilOp,
     kAlwaysIfInClip_StencilFunc,    kAlwaysIfInClip_StencilFunc,
-    0xffffffff,                     0xffffffff,
-    0x0,                            0x0,
-    0xffffffff,                     0xffffffff
+    0xffff,                         0xffff,
+    0x0000,                         0x0000,
+    0xffff,                         0xffff
 };
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GrDrawState.h b/src/gpu/GrDrawState.h
index 2acaf03..8401772 100644
--- a/src/gpu/GrDrawState.h
+++ b/src/gpu/GrDrawState.h
@@ -66,7 +66,9 @@
      * (below) should clamp to this value.
      */
     enum {
-        kMaxEdges = 32
+        // TODO: this should be 32 when GrTesselatedPathRenderer is used
+        // Visual Studio 2010 does not permit a member array of size 0.
+        kMaxEdges = 1
     };
 
     class Edge {
@@ -102,28 +104,78 @@
         fFirstCoverageStage = kNumStages;
     }
 
-    uint32_t                fFlagBits;
-    GrBlendCoeff            fSrcBlend;
-    GrBlendCoeff            fDstBlend;
+    uint8_t                 fFlagBits;
+    GrBlendCoeff            fSrcBlend : 8;
+    GrBlendCoeff            fDstBlend : 8;
+    DrawFace                fDrawFace : 8;
+    uint8_t                 fFirstCoverageStage;
+    SkXfermode::Mode        fColorFilterXfermode : 8;
     GrColor                 fBlendConstant;
     GrTexture*              fTextures[kNumStages];
-    GrSamplerState          fSamplerStates[kNumStages];
-    int                     fFirstCoverageStage;
     GrRenderTarget*         fRenderTarget;
     GrColor                 fColor;
-    DrawFace                fDrawFace;
     GrColor                 fColorFilterColor;
-    SkXfermode::Mode        fColorFilterXfermode;
 
     GrStencilSettings       fStencilSettings;
     GrMatrix                fViewMatrix;
+
+    // @{ Data for GrTesselatedPathRenderer
+    // TODO: currently ignored in copying & comparison for performance.
+    // Must be considered if GrTesselatedPathRenderer is being used.
+
+    int                     fEdgeAANumEdges;
     VertexEdgeType          fVertexEdgeType;
     Edge                    fEdgeAAEdges[kMaxEdges];
-    int                     fEdgeAANumEdges;
+
+    // @}
+
+    // This field must be last; it will not be copied or compared
+    // if the corresponding fTexture[] is NULL.
+    GrSamplerState          fSamplerStates[kNumStages];
+
+    // Most stages are usually not used, so conditionals here
+    // reduce the expected number of bytes touched by 50%.
     bool operator ==(const GrDrawState& s) const {
-        return 0 == memcmp(this, &s, sizeof(GrDrawState));
+        if (memcmp(this, &s, this->leadingBytes())) return false;
+
+        for (int i = 0; i < kNumStages; i++) {
+            if (fTextures[i] &&
+                memcmp(&this->fSamplerStates[i], &s.fSamplerStates[i],
+                       sizeof(GrSamplerState))) {
+                return false;
+            }
+        }
+
+        return true;
     }
     bool operator !=(const GrDrawState& s) const { return !(*this == s); }
+
+    // Most stages are usually not used, so conditionals here 
+    // reduce the expected number of bytes touched by 50%.
+    GrDrawState& operator =(const GrDrawState& s) {
+        memcpy(this, &s, this->leadingBytes());
+
+        for (int i = 0; i < kNumStages; i++) {
+            if (s.fTextures[i]) {
+                memcpy(&this->fSamplerStates[i], &s.fSamplerStates[i],
+                       sizeof(GrSamplerState));
+            }
+        }
+
+        return *this;
+    }
+
+private:
+    size_t leadingBytes() const {
+        // Can't use offsetof() with non-POD types, so stuck with pointer math.
+        // TODO: ignores GrTesselatedPathRenderer data structures. We don't
+        // have a compile-time flag that lets us know if it's being used, and
+        // checking at runtime seems to cost 5% performance.
+        return (size_t) ((unsigned char*)&fEdgeAANumEdges -
+                         (unsigned char*)&fFlagBits);
+    }
+
 };
 
 #endif
+
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index 7da560e..2753cf0 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -331,9 +331,9 @@
     kKeep_StencilOp,             kKeep_StencilOp,
     kKeep_StencilOp,             kKeep_StencilOp,
     kAlwaysIfInClip_StencilFunc, kAlwaysIfInClip_StencilFunc,
-    0,                           0,
-    0,                           0,
-    0,                           0
+    0x0000,                      0x0000,
+    0x0000,                      0x0000,
+    0x0000,                      0x0000
 };
 
 // mapping of clip-respecting stencil funcs to normal stencil funcs
@@ -580,6 +580,8 @@
 #endif
             int count = clip.getElementCount();
             int clipBit = stencilBuffer->bits();
+            SkASSERT((clipBit <= 16) &&
+                     "Ganesh only handles 16b or smaller stencil buffers");
             clipBit = (1 << (clipBit-1));
             
             bool clearToInside;
@@ -647,9 +649,9 @@
                         kIncClamp_StencilOp, kIncClamp_StencilOp,
                         kIncClamp_StencilOp, kIncClamp_StencilOp,
                         kAlways_StencilFunc, kAlways_StencilFunc,
-                        0xffffffff,          0xffffffff,
-                        0x00000000,          0x00000000,
-                        0xffffffff,          0xffffffff,
+                        0xffff,              0xffff,
+                        0x0000,              0x0000,
+                        0xffff,              0xffff,
                     };
                     SET_RANDOM_COLOR
                     if (kRect_ClipType == clip.getElementType(c)) {
diff --git a/src/gpu/GrGpuGL.cpp b/src/gpu/GrGpuGL.cpp
index d768a24..c162e11 100644
--- a/src/gpu/GrGpuGL.cpp
+++ b/src/gpu/GrGpuGL.cpp
@@ -622,8 +622,9 @@
     fActiveTextureUnitIdx = -1;
 
     // illegal values
-    fHWDrawState.fSrcBlend = (GrBlendCoeff)-1;
-    fHWDrawState.fDstBlend = (GrBlendCoeff)-1;
+    //fHWDrawState.fSrcBlend = (GrBlendCoeff)(uint8_t)-1;
+    fHWDrawState.fSrcBlend = (GrBlendCoeff)0xFF;
+    fHWDrawState.fDstBlend = (GrBlendCoeff)(uint8_t)-1;
 
     fHWDrawState.fBlendConstant = 0x00000000;
     GL_CALL(BlendColor(0,0,0,0));
@@ -1872,7 +1873,8 @@
                 GrAssert(settings->fFrontFunc < kBasicStencilFuncCount);
                 frontFunc = grToGLStencilFunc[settings->fFrontFunc];
             } else {
-                frontFunc = grToGLStencilFunc[ConvertStencilFunc(stencilClip, settings->fFrontFunc)];
+                frontFunc = grToGLStencilFunc[ConvertStencilFunc(
+                        stencilClip, settings->fFrontFunc)];
 
                 ConvertStencilFuncAndMask(settings->fFrontFunc,
                                           stencilClip,
@@ -1882,14 +1884,14 @@
                                           &frontMask);
                 frontWriteMask &= userStencilMask;
             }
-            GrAssert(settings->fFrontFailOp >= 0 &&
-                     (unsigned) settings->fFrontFailOp < GR_ARRAY_COUNT(grToGLStencilOp));
-            GrAssert(settings->fFrontPassOp >= 0 &&
-                     (unsigned) settings->fFrontPassOp < GR_ARRAY_COUNT(grToGLStencilOp));
-            GrAssert(settings->fBackFailOp >= 0 &&
-                     (unsigned) settings->fBackFailOp < GR_ARRAY_COUNT(grToGLStencilOp));
-            GrAssert(settings->fBackPassOp >= 0 &&
-                     (unsigned) settings->fBackPassOp < GR_ARRAY_COUNT(grToGLStencilOp));
+            GrAssert((size_t)
+                settings->fFrontFailOp < GR_ARRAY_COUNT(grToGLStencilOp));
+            GrAssert((size_t)
+                settings->fFrontPassOp < GR_ARRAY_COUNT(grToGLStencilOp));
+            GrAssert((size_t)
+                settings->fBackFailOp < GR_ARRAY_COUNT(grToGLStencilOp));
+            GrAssert((size_t)
+                settings->fBackPassOp < GR_ARRAY_COUNT(grToGLStencilOp));
             if (this->getCaps().fTwoSidedStencilSupport) {
                 GrGLenum backFunc;
 
@@ -1902,7 +1904,8 @@
                     GrAssert(settings->fBackFunc < kBasicStencilFuncCount);
                     backFunc = grToGLStencilFunc[settings->fBackFunc];
                 } else {
-                    backFunc = grToGLStencilFunc[ConvertStencilFunc(stencilClip, settings->fBackFunc)];
+                    backFunc = grToGLStencilFunc[ConvertStencilFunc(
+                        stencilClip, settings->fBackFunc)];
                     ConvertStencilFuncAndMask(settings->fBackFunc,
                                               stencilClip,
                                               clipStencilMask,
diff --git a/src/gpu/GrStencil.cpp b/src/gpu/GrStencil.cpp
index 376e057..a66fbfd 100644
--- a/src/gpu/GrStencil.cpp
+++ b/src/gpu/GrStencil.cpp
@@ -13,9 +13,9 @@
     kKeep_StencilOp,     kKeep_StencilOp,
     kKeep_StencilOp,     kKeep_StencilOp,
     kAlways_StencilFunc, kAlways_StencilFunc,
-    0x0,                 0x0,
-    0x0,                 0x0,
-    0x0,                 0x0
+    0x0000,              0x0000,
+    0x0000,              0x0000,
+    0x0000,              0x0000
 };
 GR_STATIC_ASSERT(0 == kKeep_StencilOp);
 GR_STATIC_ASSERT(0 == kAlways_StencilFunc);
@@ -36,17 +36,17 @@
     kReplace_StencilOp,  kReplace_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kLess_StencilFunc,   kLess_StencilFunc,
-    0xffffffff,          0xffffffff,    // unset clip bit
-    0x0,                 0x0,           // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,           // unset clip bit
+    0x0000,              0x0000,           // set clip bit
+    0xffff,              0xffff
 };
 static const GrStencilSettings gInvUserToClipReplace = {
     kReplace_StencilOp,  kReplace_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kEqual_StencilFunc,  kEqual_StencilFunc,
-    0xffffffff,          0xffffffff,    // unset clip bit
-    0x0,                 0x0,           // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,           // unset clip bit
+    0x0000,              0x0000,           // set clip bit
+    0xffff,              0xffff
 };
 
 ///////
@@ -55,17 +55,17 @@
     kReplace_StencilOp,  kReplace_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kLess_StencilFunc,   kLess_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x0,                 0x0,           // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,
+    0x0000,              0x0000,           // set clip bit
+    0xffff,              0xffff
 };
 static const GrStencilSettings gInvUserToClipIsect = {
     kReplace_StencilOp,  kReplace_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kEqual_StencilFunc,  kEqual_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x0,                 0x0,           // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,
+    0x0000,              0x0000,           // set clip bit
+    0xffff,              0xffff
 };
 
 ///////
@@ -74,17 +74,17 @@
     kReplace_StencilOp,  kReplace_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kEqual_StencilFunc,  kEqual_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x0,                 0x0,           // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,
+    0x0000,              0x0000,           // set clip bit
+    0xffff,              0xffff
 };
 static const GrStencilSettings gInvUserToClipDiff = {
     kReplace_StencilOp,  kReplace_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kLess_StencilFunc,   kLess_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x0,                 0x0,           // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,
+    0x0000,              0x0000,           // set clip bit
+    0xffff,              0xffff
 };
 
 ///////
@@ -95,9 +95,9 @@
     kReplace_StencilOp,  kReplace_StencilOp,
     kKeep_StencilOp,     kKeep_StencilOp,
     kLEqual_StencilFunc, kLEqual_StencilFunc,
-    0xffffffff,          0xffffffff,    // unset clip bit
-    0x00000001,          0x00000001,    // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,           // unset clip bit
+    0x0001,              0x0001,           // set clip bit
+    0xffff,              0xffff
 };
 
 // second pass allows anything greater than just clip bit set to pass
@@ -105,9 +105,9 @@
     kReplace_StencilOp,  kReplace_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kLEqual_StencilFunc, kLEqual_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x00000000,          0x00000000,    // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,
+    0x0000,              0x0000,           // set clip bit
+    0xffff,              0xffff
 };
 
 // for inverse first pass finds non-zerp user with clip bit set
@@ -116,9 +116,9 @@
     kReplace_StencilOp,  kReplace_StencilOp,
     kKeep_StencilOp,     kKeep_StencilOp,
     kLess_StencilFunc,   kLess_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x00000000,          0x00000000,    // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,
+    0x0000,              0x0000,           // set clip bit
+    0xffff,              0xffff
 };
 
 // second pass lets anything through with a nonzero user portion
@@ -127,9 +127,9 @@
     kReplace_StencilOp,  kReplace_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kLess_StencilFunc,   kLess_StencilFunc,
-    0xffffffff,          0xffffffff,    // unset clip bit
-    0x00000000,          0x00000000,    // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,           // unset clip bit
+    0x0000,              0x0000,           // set clip bit
+    0xffff,              0xffff
 };
 
 ///////
@@ -138,36 +138,36 @@
     kInvert_StencilOp,   kInvert_StencilOp,
     kKeep_StencilOp,     kKeep_StencilOp,
     kEqual_StencilFunc,  kEqual_StencilFunc,
-    0xffffffff,          0xffffffff,    // unset clip bit
-    0x00000000,          0x00000000,
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,           // unset clip bit
+    0x0000,              0x0000,
+    0xffff,              0xffff
 };
 
 static const GrStencilSettings gUserToClipXorPass1 = {
     kReplace_StencilOp,   kReplace_StencilOp,
     kZero_StencilOp,      kZero_StencilOp,
     kGreater_StencilFunc, kGreater_StencilFunc,
-    0xffffffff,           0xffffffff,
-    0x00000000,           0x00000000,   // set clip bit
-    0xffffffff,           0xffffffff
+    0xffff,               0xffff,
+    0x0000,               0x0000,          // set clip bit
+    0xffff,               0xffff
 };
 
 static const GrStencilSettings gInvUserToClipXorPass0 = {
     kInvert_StencilOp,   kInvert_StencilOp,
     kKeep_StencilOp,     kKeep_StencilOp,
     kEqual_StencilFunc,  kEqual_StencilFunc,
-    0xffffffff,          0xffffffff,    // unset clip bit
-    0x00000000,          0x00000000,
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,           // unset clip bit
+    0x0000,              0x0000,
+    0xffff,              0xffff
 };
 
 static const GrStencilSettings gInvUserToClipXorPass1 = {
     kReplace_StencilOp,   kReplace_StencilOp,
     kZero_StencilOp,      kZero_StencilOp,
     kLess_StencilFunc,    kLess_StencilFunc,
-    0xffffffff,           0xffffffff,
-    0x00000000,           0x00000000,   // set clip bit
-    0xffffffff,           0xffffffff
+    0xffff,               0xffff,
+    0x0000,               0x0000,          // set clip bit
+    0xffff,               0xffff
 };
 
 ///////
@@ -176,27 +176,27 @@
     kInvert_StencilOp,   kInvert_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kLess_StencilFunc,   kLess_StencilFunc,
-    0xffffffff,          0xffffffff,  // unset clip bit
-    0x00000000,          0x00000000,  // set clip bit
-    0xffffffff,          0xffffffff
+    0xffff,              0xffff,         // unset clip bit
+    0x0000,              0x0000,         // set clip bit
+    0xffff,              0xffff
 };
 
 static const GrStencilSettings gUserToClipRDiffPass1 = {
     kReplace_StencilOp,   kReplace_StencilOp,
     kZero_StencilOp,      kZero_StencilOp,
     kEqual_StencilFunc,   kEqual_StencilFunc,
-    0x00000000,           0x00000000,   // set clip bit
-    0x00000000,           0x00000000,   // set clip bit
-    0xffffffff,           0xffffffff
+    0x0000,               0x0000,          // set clip bit
+    0x0000,               0x0000,          // set clip bit
+    0xffff,               0xffff
 };
 
 static const GrStencilSettings gInvUserToClipRDiff = {
     kInvert_StencilOp,    kInvert_StencilOp,
     kZero_StencilOp,      kZero_StencilOp,
     kEqual_StencilFunc,   kEqual_StencilFunc,
-    0xffffffff,           0xffffffff,
-    0x00000000,           0x00000000, 
-    0x00000000,           0x00000000    // set clip bit
+    0xffff,               0xffff,
+    0x0000,               0x0000, 
+    0x0000,               0x0000           // set clip bit
 };
 ///////
 // Direct to Stencil
@@ -211,36 +211,36 @@
     kReplace_StencilOp,  kReplace_StencilOp,
     kReplace_StencilOp,  kReplace_StencilOp,
     kAlways_StencilFunc, kAlways_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x00000000,          0x00000000,    // set clip bit
-    0x00000000,          0x00000000     // set clipBit
+    0xffff,              0xffff,
+    0x0000,              0x0000,           // set clip bit
+    0x0000,              0x0000            // set clipBit
 };
 
 static const GrStencilSettings gUnionClip = {
     kReplace_StencilOp,  kReplace_StencilOp,
     kReplace_StencilOp,  kReplace_StencilOp,
     kAlways_StencilFunc, kAlways_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x00000000,          0x00000000,    // set clip bit
-    0x00000000,          0x00000000     // set clip bit
+    0xffff,              0xffff,
+    0x0000,              0x0000,           // set clip bit
+    0x0000,              0x0000            // set clip bit
 };
 
 static const GrStencilSettings gXorClip = {
     kInvert_StencilOp,   kInvert_StencilOp,
     kInvert_StencilOp,   kInvert_StencilOp,
     kAlways_StencilFunc, kAlways_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x00000000,          0x00000000,
-    0x00000000,          0x00000000     // set clip bit
+    0xffff,              0xffff,
+    0x0000,              0x0000,
+    0x0000,              0x0000            // set clip bit
 };
 
 static const GrStencilSettings gDiffClip = {
     kZero_StencilOp,     kZero_StencilOp,
     kZero_StencilOp,     kZero_StencilOp,
     kAlways_StencilFunc, kAlways_StencilFunc,
-    0xffffffff,          0xffffffff,
-    0x00000000,          0x00000000,
-    0x00000000,          0x00000000     // set clip bit
+    0xffff,              0xffff,
+    0x0000,              0x0000,
+    0x0000,              0x0000            // set clip bit
 };
 
 bool GrStencilSettings::GetClipPasses(GrSetOp op, 
diff --git a/src/gpu/GrStencil.h b/src/gpu/GrStencil.h
index b8610f2..207a831 100644
--- a/src/gpu/GrStencil.h
+++ b/src/gpu/GrStencil.h
@@ -88,27 +88,25 @@
  * Struct representing stencil state.
  */
 struct GrStencilSettings {
-    GrStencilOp   fFrontPassOp;     // op to perform when front faces pass
-    GrStencilOp   fBackPassOp;      // op to perform when back faces pass
-    GrStencilOp   fFrontFailOp;     // op to perform when front faces fail
-    GrStencilOp   fBackFailOp;      // op to perform when back faces fail
-    GrStencilFunc fFrontFunc;       // test function for front faces
-    GrStencilFunc fBackFunc;        // test function for back faces
-    unsigned int fFrontFuncMask;    // mask for front face test
-    unsigned int fBackFuncMask;     // mask for back face test
-    unsigned int fFrontFuncRef;     // reference value for front face test
-    unsigned int fBackFuncRef;      // reference value for back face test
-    unsigned int fFrontWriteMask;   // stencil write mask for front faces
-    unsigned int fBackWriteMask;    // stencil write mask for back faces
+    GrStencilOp fFrontPassOp : 8;    // op to perform when front faces pass
+    GrStencilOp fBackPassOp : 8;     // op to perform when back faces pass
+    GrStencilOp fFrontFailOp : 8;    // op to perform when front faces fail
+    GrStencilOp fBackFailOp : 8;     // op to perform when back faces fail
+    GrStencilFunc fFrontFunc : 8;    // test function for front faces
+    GrStencilFunc fBackFunc : 8;     // test function for back faces
+    unsigned short fFrontFuncMask;   // mask for front face test
+    unsigned short fBackFuncMask;    // mask for back face test
+    unsigned short fFrontFuncRef;    // reference value for front face test
+    unsigned short fBackFuncRef;     // reference value for back face test
+    unsigned short fFrontWriteMask;  // stencil write mask for front faces
+    unsigned short fBackWriteMask;   // stencil write mask for back faces
 
     bool operator == (const GrStencilSettings& s) const {
-        // make sure this is tightly packed.
-        GR_STATIC_ASSERT(0 == sizeof(GrStencilOp)%4);
-        GR_STATIC_ASSERT(0 == sizeof(GrStencilFunc)%4);
-        GR_STATIC_ASSERT(sizeof(GrStencilSettings) ==
-                        4*sizeof(GrStencilOp) +
-                        2*sizeof(GrStencilFunc) +
-                        6*sizeof(unsigned int));
+        // make sure this is tightly packed (< 4B padding).
+        GR_STATIC_ASSERT(sizeof(GrStencilSettings) / 4 ==
+                        (4*sizeof(uint8_t) +
+                         2*sizeof(uint8_t) +
+                         6*sizeof(unsigned short) + 3) / 4);
         return 0 == memcmp(this, &s, sizeof(GrStencilSettings));
     }
 
@@ -124,9 +122,9 @@
     void setSame(GrStencilOp passOp,
                  GrStencilOp failOp,
                  GrStencilFunc func,
-                 unsigned int funcMask,
-                 unsigned int funcRef,
-                 unsigned int writeMask) {
+                 unsigned short funcMask,
+                 unsigned short funcRef,
+                 unsigned short writeMask) {
         fFrontPassOp        = passOp;
         fBackPassOp         = passOp;
         fFrontFailOp        = failOp;
@@ -166,7 +164,7 @@
     }
     void invalidate()  {
         // just write an illegal value to the first member
-        fFrontPassOp = (GrStencilOp)-1;
+        fFrontPassOp = (GrStencilOp)(uint8_t)-1;
     }
 
 private: