diff --git a/src/pipe/SkGPipePriv.h b/src/pipe/SkGPipePriv.h
index 611849f..30d8b93 100644
--- a/src/pipe/SkGPipePriv.h
+++ b/src/pipe/SkGPipePriv.h
@@ -22,6 +22,19 @@
 
 #define UNIMPLEMENTED
 
+// these must be contiguous, 0...N-1
+enum PaintFlats {
+    kColorFilter_PaintFlat,
+    kMaskFilter_PaintFlat,
+    kPathEffect_PaintFlat,
+    kRasterizer_PaintFlat,
+    kShader_PaintFlat,
+    kXfermode_PaintFlat,
+
+    kLast_PaintFlat = kXfermode_PaintFlat
+};
+#define kCount_PaintFlats   (kLast_PaintFlat + 1)
+
 enum DrawOps {
     kSkip_DrawOp,   // skip an addition N bytes (N == data)
 
@@ -58,6 +71,8 @@
 
     kPaintOp_DrawOp,
 
+    kDef_PaintFlat_DrawOp,
+
     kDef_ColorFilter_DrawOp,
     kDef_DrawLooper_DrawOp,
     kDef_MaskFilter_DrawOp,
@@ -154,7 +169,9 @@
     kTextSkewX_PaintOp, // arg scalar - text
     kTypeface_PaintOp,  // arg inline (index) - text
 
-    kPathEffect_PaintOp,    // arg inline
+    kFlatIndex_PaintOp, // flags=paintflat, data=index
+
+    kPathEffect_PaintOp,
     kShader_PaintOp,
     kXfermode_PaintOp,
     kMaskFilter_PaintOp,
diff --git a/src/pipe/SkGPipeRead.cpp b/src/pipe/SkGPipeRead.cpp
index 6ea2897..d33258c 100644
--- a/src/pipe/SkGPipeRead.cpp
+++ b/src/pipe/SkGPipeRead.cpp
@@ -31,6 +31,32 @@
 #include "SkTypeface.h"
 #include "SkXfermode.h"
 
+static void set_paintflat(SkPaint* paint, SkFlattenable* obj, unsigned paintFlat) {
+    SkASSERT(paintFlat < kCount_PaintFlats);
+    switch (paintFlat) {
+        case kColorFilter_PaintFlat:
+            paint->setColorFilter((SkColorFilter*)obj);
+            break;
+        case kMaskFilter_PaintFlat:
+            paint->setMaskFilter((SkMaskFilter*)obj);
+            break;
+        case kPathEffect_PaintFlat:
+            paint->setPathEffect((SkPathEffect*)obj);
+            break;
+        case kRasterizer_PaintFlat:
+            paint->setRasterizer((SkRasterizer*)obj);
+            break;
+        case kShader_PaintFlat:
+            paint->setShader((SkShader*)obj);
+            break;
+        case kXfermode_PaintFlat:
+            paint->setXfermode((SkXfermode*)obj);
+            break;
+        default:
+            SkASSERT(!"never gets here");
+    }
+}
+
 template <typename T> class SkRefCntTDArray : public SkTDArray<T> {
 public:
     ~SkRefCntTDArray() { this->unrefAll(); }
@@ -50,6 +76,19 @@
     //  index == count. If index > count, return NULL
     SkPaint* editPaint(uint32_t drawOp32);
 
+    SkFlattenable* getFlat(unsigned index) const {
+        if (0 == index) {
+            return NULL;
+        }
+        return fFlatArray[index - 1];
+    }
+
+    void defFlattenable(PaintFlats pf, unsigned index) {
+        SkFlattenable* obj = fReader->readFlattenable();
+        *fFlatArray.append() = obj;
+        SkASSERT(index == fFlatArray.count());
+    }
+
     void addTypeface() {
         size_t size = fReader->readU32();
         const void* data = fReader->skip(SkAlign4(size));
@@ -103,8 +142,10 @@
         paint->setXfermode(id ? fXfermodes[id - 1] : NULL);
     }
     
-private:
     SkFlattenableReadBuffer* fReader;
+    SkTDArray<SkFlattenable*> fFlatArray;
+
+private:
 
     SkTDArray<SkPaint*> fPaints;
 
@@ -133,18 +174,6 @@
     return reinterpret_cast<const T*>(reader->skip(size));
 }
 
-static void readRegion(SkReader32* reader, SkRegion* rgn) {
-    size_t size = rgn->unflatten(reader->peek());
-    SkASSERT(SkAlign4(size) == size);
-    (void)reader->skip(size);
-}
-
-static void readMatrix(SkReader32* reader, SkMatrix* matrix) {
-    size_t size = matrix->unflatten(reader->peek());
-    SkASSERT(SkAlign4(size) == size);
-    (void)reader->skip(size);
-}
-
 const SkPaint& SkGPipeState::getPaint(uint32_t op32) const {
     unsigned index = DrawOp_unpackData(op32);
     if (index >= fPaints.count()) {
@@ -181,7 +210,7 @@
 static void clipRegion_rp(SkCanvas* canvas, SkReader32* reader, uint32_t op32,
                           SkGPipeState* state) {
     SkRegion rgn;
-    readRegion(reader, &rgn);
+    SkReadRegion(reader, &rgn);
     canvas->clipRegion(rgn, (SkRegion::Op)DrawOp_unpackData(op32));
 }
 
@@ -195,14 +224,14 @@
 static void setMatrix_rp(SkCanvas* canvas, SkReader32* reader, uint32_t op32,
                       SkGPipeState* state) {
     SkMatrix matrix;
-    readMatrix(reader, &matrix);
+    SkReadMatrix(reader, &matrix);
     canvas->setMatrix(matrix);
 }
 
 static void concat_rp(SkCanvas* canvas, SkReader32* reader, uint32_t op32,
                       SkGPipeState* state) {
     SkMatrix matrix;
-    readMatrix(reader, &matrix);
+    SkReadMatrix(reader, &matrix);
     canvas->concat(matrix);
 }
 
@@ -365,7 +394,7 @@
     SkMatrix matrixStorage;
     const SkMatrix* matrix = NULL;
     if (DrawOp_unpackFlags(op32) & kDrawTextOnPath_HasMatrix_DrawOpFlag) {
-        readMatrix(reader, &matrixStorage);
+        SkReadMatrix(reader, &matrixStorage);
         matrix = &matrixStorage;
     }
 
@@ -421,7 +450,9 @@
 
 static void paintOp_rp(SkCanvas*, SkReader32* reader, uint32_t op32,
                        SkGPipeState* state) {
-    SkPaint* p = state->editPaint(op32);
+    size_t offset = reader->offset();
+    size_t stop = offset + PaintOp_unpackData(op32);
+    SkPaint* p = state->editPaint(0);
     int done;
 
     do {
@@ -430,7 +461,7 @@
         unsigned data = PaintOp_unpackData(p32);
         done = PaintOp_unpackFlags(p32) & kLastOp_PaintOpFlag;
 
-        SkDebugf(" read %08X op=%d flags=%d data=%d\n", p32, op, done, data);
+//        SkDebugf(" read %08X op=%d flags=%d data=%d\n", p32, op, done, data);
 
         switch (op) {
             case kReset_PaintOp: p->reset(); break;
@@ -450,6 +481,13 @@
             case kTextScaleX_PaintOp: p->setTextScaleX(reader->readScalar()); break;
             case kTextSkewX_PaintOp: p->setTextSkewX(reader->readScalar()); break;
 
+            case kFlatIndex_PaintOp: {
+                PaintFlats pf = (PaintFlats)PaintOp_unpackFlags(p32);
+                unsigned index = data;
+                set_paintflat(p, state->getFlat(index), pf);
+                break;
+            }
+
             case kTypeface_PaintOp: state->setTypeface(p, data); break;
             case kPathEffect_PaintOp: state->setPathEffect(p, data); break;
             case kShader_PaintOp: state->setShader(p, data); break;
@@ -460,11 +498,20 @@
             case kDrawLooper_PaintOp: state->setLooper(p, data); break;
             default: SkASSERT(!"bad paintop"); return;
         }
+        SkASSERT(reader->offset() <= stop);
+        done = (reader->offset() >= stop);
     } while (!done);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
+static void def_PaintFlat_rp(SkCanvas*, SkReader32*, uint32_t op32,
+                             SkGPipeState* state) {
+    PaintFlats pf = (PaintFlats)DrawOp_unpackFlags(op32);
+    unsigned index = DrawOp_unpackData(op32);
+    state->defFlattenable(pf, index);
+}
+
 static void def_ColorFilter_rp(SkCanvas*, SkReader32*, uint32_t, SkGPipeState* state) {
     state->addColorFilter();
 }
@@ -540,6 +587,7 @@
     skew_rp,
     translate_rp,
     paintOp_rp,
+    def_PaintFlat_rp,
     def_ColorFilter_rp,
     def_DrawLooper_rp,
     def_MaskFilter_rp,
@@ -558,7 +606,8 @@
     *fPaints.append() = SkNEW(SkPaint);
 }
 
-SkGPipeState::~SkGPipeState() {    
+SkGPipeState::~SkGPipeState() {
+    fFlatArray.unrefAll();
     fPaints.deleteAll();
 }
 
diff --git a/src/pipe/SkGPipeWrite.cpp b/src/pipe/SkGPipeWrite.cpp
index d2bf3af..04625b7 100644
--- a/src/pipe/SkGPipeWrite.cpp
+++ b/src/pipe/SkGPipeWrite.cpp
@@ -21,8 +21,27 @@
 #include "SkGPipe.h"
 #include "SkGPipePriv.h"
 #include "SkStream.h"
+#include "SkTSearch.h"
 #include "SkTypeface.h"
 #include "SkWriter32.h"
+#include "SkColorFilter.h"
+#include "SkMaskFilter.h"
+#include "SkRasterizer.h"
+#include "SkShader.h"
+
+static SkFlattenable* get_paintflat(const SkPaint& paint, unsigned paintFlat) {
+    SkASSERT(paintFlat < kCount_PaintFlats);
+    switch (paintFlat) {
+        case kColorFilter_PaintFlat:    return paint.getColorFilter();
+        case kMaskFilter_PaintFlat:     return paint.getMaskFilter();
+        case kPathEffect_PaintFlat:     return paint.getPathEffect();
+        case kRasterizer_PaintFlat:     return paint.getRasterizer();
+        case kShader_PaintFlat:         return paint.getShader();
+        case kXfermode_PaintFlat:       return paint.getXfermode();
+    }
+    SkASSERT(!"never gets here");
+    return NULL;
+}
 
 static size_t estimateFlattenSize(const SkPath& path) {
     int n = path.countPoints();
@@ -40,18 +59,6 @@
     return bytes;
 }
 
-static void writeRegion(SkWriter32* writer, const SkRegion& rgn) {
-    size_t size = rgn.flatten(NULL);
-    SkASSERT(SkAlign4(size) == size);
-    rgn.flatten(writer->reserve(size));
-}
-
-static void writeMatrix(SkWriter32* writer, const SkMatrix& matrix) {
-    size_t size = matrix.flatten(NULL);
-    SkASSERT(SkAlign4(size) == size);
-    matrix.flatten(writer->reserve(size));
-}
-
 static size_t writeTypeface(SkWriter32* writer, SkTypeface* typeface) {
     SkASSERT(typeface);
     SkDynamicMemoryWStream stream;
@@ -151,7 +158,21 @@
             fBytesNotified += bytes;
         }
     }
-    
+
+    struct FlatData {
+        uint32_t    fIndex; // always > 0
+        uint32_t    fSize;
+
+        void*       data() { return (char*)this + sizeof(*this); }
+        
+        static int Compare(const FlatData* a, const FlatData* b) {
+            return memcmp(&a->fSize, &b->fSize, a->fSize + sizeof(a->fSize));
+        }
+    };
+    SkTDArray<FlatData*> fFlatArray;
+    int fCurrFlatIndex[kCount_PaintFlats];
+    int flattenToIndex(SkFlattenable* obj, PaintFlats);
+
     SkTDArray<SkPaint*> fPaints;
     unsigned writePaint(const SkPaint&);
 
@@ -167,6 +188,47 @@
     typedef SkCanvas INHERITED;
 };
 
+// return 0 for NULL (or unflattenable obj), or index-base-1
+int SkGPipeCanvas::flattenToIndex(SkFlattenable* obj, PaintFlats paintflat) {
+    if (NULL == obj) {
+        return 0;
+    }
+    
+    SkFlattenable::Factory fact = obj->getFactory();
+    if (NULL == fact) {
+        return 0;
+    }
+
+    SkFlattenableWriteBuffer tmpWriter(1024);
+    tmpWriter.writeFlattenable(obj);
+    size_t len = tmpWriter.size();
+    size_t allocSize = len + sizeof(FlatData);
+
+    SkAutoSMalloc<1024> storage(allocSize);
+    FlatData* flat = (FlatData*)storage.get();
+    flat->fSize = len;
+    tmpWriter.flatten(flat->data());
+
+    int index = SkTSearch<FlatData>((const FlatData**)fFlatArray.begin(),
+                                    fFlatArray.count(), flat, sizeof(flat),
+                                    &FlatData::Compare);
+    if (index < 0) {
+        index = ~index;
+        FlatData* copy = (FlatData*)sk_malloc_throw(allocSize);
+        memcpy(copy, flat, allocSize);
+        *fFlatArray.insert(index) = copy;
+        // call this after the insert, so that count() will have been grown
+        copy->fIndex = fFlatArray.count();
+//        SkDebugf("--- add flattenable[%d] size=%d index=%d\n", paintflat, len, copy->fIndex);
+
+        if (this->needOpBytes(len)) {
+            this->writeOp(kDef_PaintFlat_DrawOp, paintflat, copy->fIndex);
+            fWriter.write(copy->data(), len);
+        }
+    }
+    return fFlatArray[index]->fIndex;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 
 #define MIN_BLOCK_SIZE  (16 * 1024)
@@ -176,6 +238,7 @@
     fController = controller;
     fDone = false;
     fBlockSize = 0; // need first block from controller
+    sk_bzero(fCurrFlatIndex, sizeof(fCurrFlatIndex));
 
     // always begin with 1 default paint
     *fPaints.append() = SkNEW(SkPaint);
@@ -192,6 +255,7 @@
     this->finish();
 
     fPaints.deleteAll();
+    fFlatArray.freeAll();
 }
 
 bool SkGPipeCanvas::needOpBytes(size_t needed) {
@@ -332,7 +396,7 @@
         NOTIFY_SETUP(this);
         if (this->needOpBytes(matrix.flatten(NULL))) {
             this->writeOp(kConcat_DrawOp);
-            writeMatrix(&fWriter, matrix);
+            SkWriteMatrix(&fWriter, matrix);
         }
     }
     return this->INHERITED::concat(matrix);
@@ -342,7 +406,7 @@
     NOTIFY_SETUP(this);
     if (this->needOpBytes(matrix.flatten(NULL))) {
         this->writeOp(kSetMatrix_DrawOp);
-        writeMatrix(&fWriter, matrix);
+        SkWriteMatrix(&fWriter, matrix);
     }
     this->INHERITED::setMatrix(matrix);
 }
@@ -370,7 +434,7 @@
     NOTIFY_SETUP(this);
     if (this->needOpBytes(region.flatten(NULL))) {
         this->writeOp(kClipRegion_DrawOp, 0, rgnOp);
-        writeRegion(&fWriter, region);
+        SkWriteRegion(&fWriter, region);
     }
     return this->INHERITED::clipRegion(region, rgnOp);
 }
@@ -519,7 +583,7 @@
 
             path.flatten(fWriter);
             if (matrix) {
-                writeMatrix(&fWriter, *matrix);
+                SkWriteMatrix(&fWriter, *matrix);
             }
         }
     }
@@ -693,14 +757,23 @@
         base.setTypeface(paint.getTypeface());
     }
 
+    for (int i = 0; i < kCount_PaintFlats; i++) {
+        int index = this->flattenToIndex(get_paintflat(paint, i), (PaintFlats)i);
+        SkASSERT(index >= 0 && index <= fFlatArray.count());
+        if (index != fCurrFlatIndex[i]) {
+            last = ptr;
+            *ptr++ = PaintOp_packOpFlagData(kFlatIndex_PaintOp, i, index);
+            fCurrFlatIndex[i] = index;
+        }
+    }
+
     size_t size = (char*)ptr - (char*)storage;
     if (size && this->needOpBytes(size)) {
-        this->writeOp(kPaintOp_DrawOp, 0, 0);
-        size_t size = (char*)ptr - (char*)storage;
-        *last |= kLastOp_PaintOpFlag << PAINTOPS_DATA_BITS;
-        fWriter.write(storage, (char*)ptr - (char*)storage);
+        this->writeOp(kPaintOp_DrawOp, 0, size);
+//        *last |= kLastOp_PaintOpFlag << PAINTOPS_DATA_BITS;
+        fWriter.write(storage, size);
         for (size_t i = 0; i < size/4; i++) {
-            SkDebugf("[%d] %08X\n", i, storage[i]);
+//            SkDebugf("[%d] %08X\n", i, storage[i]);
         }
     }
     return 0;
