Speed up GrDrawState handling.
Compact GrDrawState and nested structs; dynamically only copy and compare
the fields actually active on the structs in question.
Yields 10-20% speedup of text benchmarks in GPU configuration.

Could probably get additional marginal speedup by ignoring unused
kernel fields.
Has some fragile constructs: pointer math on members to compute
sizes of portions of structs.
Removes fields necessary for GrTesselatedPathRenderer.



git-svn-id: http://skia.googlecode.com/svn/trunk@2644 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/include/gpu/GrSamplerState.h b/include/gpu/GrSamplerState.h
index 9c217db..ed49593 100644
--- a/include/gpu/GrSamplerState.h
+++ b/include/gpu/GrSamplerState.h
@@ -252,10 +252,10 @@
     }
 
 private:
-    WrapMode    fWrapX;
-    WrapMode    fWrapY;
-    SampleMode  fSampleMode;
-    Filter      fFilter;
+    WrapMode    fWrapX : 8;
+    WrapMode    fWrapY : 8;
+    SampleMode  fSampleMode : 8;
+    Filter      fFilter : 8;
     GrMatrix    fMatrix;
     bool        fSwapRAndB;
     GrRect      fTextureDomain;
@@ -263,12 +263,12 @@
     // these are undefined unless fSampleMode == kRadial2_SampleMode
     GrScalar    fRadial2CenterX1;
     GrScalar    fRadial2Radius0;
-    bool        fRadial2PosRoot;
+    SkBool8     fRadial2PosRoot;
 
     // These are undefined unless fFilter == kConvolution_Filter
-    int         fKernelWidth;
-    float       fKernel[MAX_KERNEL_WIDTH];
+    uint8_t     fKernelWidth;
     float       fImageIncrement[2];
+    float       fKernel[MAX_KERNEL_WIDTH];
 
     static const GrSamplerState gClampNoFilter;
 };