diff --git a/gpu/include/GrContext.h b/gpu/include/GrContext.h
index cb354f8..b2928f8 100644
--- a/gpu/include/GrContext.h
+++ b/gpu/include/GrContext.h
@@ -21,6 +21,7 @@
 class GrInOrderDrawBuffer;
 class GrResourceEntry;
 class GrResourceCache;
+class GrStencilBuffer;
 class GrVertexBufferAllocPool;
 
 
@@ -580,6 +581,17 @@
     void resetStats();
     const GrGpuStats& getStats() const;
     void printStats() const;
+    /**
+     * Stencil buffers add themselves to the cache using
+     * addAndLockStencilBuffer. When a SB's RT-attachment count
+     * reaches zero the SB unlocks itself using unlockStencilBuffer and is
+     * eligible for purging. findStencilBuffer is called to check the cache for
+     * a SB that matching an RT's criteria. If a match is found that has been
+     * unlocked (its attachment count has reached 0) then it will be relocked.
+     */
+    GrResourceEntry* addAndLockStencilBuffer(GrStencilBuffer* sb);
+    void unlockStencilBuffer(GrResourceEntry* sbEntry);
+    GrStencilBuffer* findStencilBuffer(int width, int height, int sampleCnt);
 
 private:
     // used to keep track of when we need to flush the draw buffer
diff --git a/gpu/include/GrRenderTarget.h b/gpu/include/GrRenderTarget.h
index 13b1a3ac..6fa9f0f 100644
--- a/gpu/include/GrRenderTarget.h
+++ b/gpu/include/GrRenderTarget.h
@@ -32,8 +32,8 @@
  * that wrap externally created render targets.
  */
 class GrRenderTarget : public GrResource {
-
 public:
+
     /**
      * @return the width of the rendertarget
      */
@@ -177,8 +177,7 @@
         , fAllocatedWidth(allocatedWidth)
         , fAllocatedHeight(allocatedHeight)
         , fConfig(config)
-        , fSampleCnt(sampleCnt)
-    {
+        , fSampleCnt(sampleCnt) {
         fResolveRect.setLargestInverted();
     }
 
@@ -193,17 +192,16 @@
         fTexture = NULL;
     }
 
-    GrStencilBuffer*  fStencilBuffer;
-
 private:
-    GrTexture*      fTexture; // not ref'ed
-    int             fWidth;
-    int             fHeight;
-    int             fAllocatedWidth;
-    int             fAllocatedHeight;
-    GrPixelConfig   fConfig;
-    int             fSampleCnt;
-    GrIRect         fResolveRect;
+    GrStencilBuffer*  fStencilBuffer;
+    GrTexture*        fTexture; // not ref'ed
+    int               fWidth;
+    int               fHeight;
+    int               fAllocatedWidth;
+    int               fAllocatedHeight;
+    GrPixelConfig     fConfig;
+    int               fSampleCnt;
+    GrIRect           fResolveRect;
 
     typedef GrResource INHERITED;
 };
diff --git a/gpu/include/GrTexture.h b/gpu/include/GrTexture.h
index 77f88fa..5a86b08 100644
--- a/gpu/include/GrTexture.h
+++ b/gpu/include/GrTexture.h
@@ -64,7 +64,9 @@
      *  Approximate number of bytes used by the texture
      */
     virtual size_t sizeInBytes() const {
-        return fAllocatedWidth * fAllocatedHeight * GrBytesPerPixel(fConfig);
+        return (size_t) fAllocatedWidth *
+                        fAllocatedHeight *
+                        GrBytesPerPixel(fConfig);
     }
 
     /**
diff --git a/gpu/src/GrContext.cpp b/gpu/src/GrContext.cpp
index bfae62e..54b2954 100644
--- a/gpu/src/GrContext.cpp
+++ b/gpu/src/GrContext.cpp
@@ -16,6 +16,7 @@
 #include "GrPathRenderer.h"
 #include "GrPathUtils.h"
 #include "GrResourceCache.h"
+#include "GrStencilBuffer.h"
 #include "GrTextStrike.h"
 #include "SkTrace.h"
 
@@ -126,9 +127,14 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 enum {
-    kNPOTBit    = 0x1,
-    kFilterBit  = 0x2,
-    kScratchBit = 0x4,
+    // flags for textures
+    kNPOTBit            = 0x1,
+    kFilterBit          = 0x2,
+    kScratchBit         = 0x4,
+
+    // resource type
+    kTextureBit         = 0x8,
+    kStencilBufferBit   = 0x10
 };
 
 GrTexture* GrContext::TextureCacheEntry::texture() const {
@@ -176,8 +182,26 @@
         v[3] |= kScratchBit;
     }
 
+    v[3] |= kTextureBit;
+
     return v[3] & kNPOTBit;
 }
+
+// we should never have more than one stencil buffer with same combo of
+// (width,height,samplecount)
+void gen_stencil_key_values(int width, int height,
+                            int sampleCnt, uint32_t v[4]) {
+    v[0] = width;
+    v[1] = height;
+    v[2] = sampleCnt;
+    v[3] = kStencilBufferBit;
+}
+
+void gen_stencil_key_values(const GrStencilBuffer* sb,
+                            uint32_t v[4]) {
+    gen_stencil_key_values(sb->width(), sb->height(),
+                           sb->numSamples(), v);
+}
 }
 
 GrContext::TextureCacheEntry GrContext::findAndLockTexture(TextureKey key,
@@ -187,7 +211,34 @@
     uint32_t v[4];
     gen_texture_key_values(fGpu, sampler, key, width, height, false, v);
     GrResourceKey resourceKey(v);
-    return TextureCacheEntry(fTextureCache->findAndLock(resourceKey));
+    return TextureCacheEntry(fTextureCache->findAndLock(resourceKey,
+                                            GrResourceCache::kNested_LockType));
+}
+
+GrResourceEntry* GrContext::addAndLockStencilBuffer(GrStencilBuffer* sb) {
+    uint32_t v[4];
+    gen_stencil_key_values(sb, v);
+    GrResourceKey resourceKey(v);
+    return fTextureCache->createAndLock(resourceKey, sb);
+}
+
+GrStencilBuffer* GrContext::findStencilBuffer(int width, int height,
+                                              int sampleCnt) {
+    uint32_t v[4];
+    gen_stencil_key_values(width, height, sampleCnt, v);
+    GrResourceKey resourceKey(v);
+    GrResourceEntry* entry = fTextureCache->findAndLock(resourceKey,
+                                            GrResourceCache::kSingle_LockType);
+    if (NULL != entry) {
+        GrStencilBuffer* sb = (GrStencilBuffer*) entry->resource();
+        return sb;
+    } else {
+        return NULL;
+    }
+}
+
+void GrContext::unlockStencilBuffer(GrResourceEntry* sbEntry) {
+    fTextureCache->unlock(sbEntry);
 }
 
 static void stretchImage(void* dst,
@@ -376,7 +427,8 @@
         uint32_t v[4];
         gen_scratch_tex_key_values(fGpu, desc, v);
         GrResourceKey key(v);
-        entry = fTextureCache->findAndLock(key);
+        entry = fTextureCache->findAndLock(key,
+                                           GrResourceCache::kNested_LockType);
         // if we miss, relax the fit of the flags...
         // then try doubling width... then height.
         if (NULL != entry || kExact_ScratchTexMatch == match) {
diff --git a/gpu/src/GrGLRenderTarget.cpp b/gpu/src/GrGLRenderTarget.cpp
index 39aa332..ccfc55a 100644
--- a/gpu/src/GrGLRenderTarget.cpp
+++ b/gpu/src/GrGLRenderTarget.cpp
@@ -78,7 +78,7 @@
     fMSColorRenderbufferID  = 0;
     GrSafeUnref(fTexIDObj);
     fTexIDObj = NULL;
-    GrSafeSetNull(fStencilBuffer);
+    this->setStencilBuffer(NULL);
 }
 
 void GrGLRenderTarget::onAbandon() {
@@ -89,6 +89,6 @@
         fTexIDObj->abandon();
         fTexIDObj = NULL;
     }
-    GrSafeSetNull(fStencilBuffer);
+    this->setStencilBuffer(NULL);
 }
 
diff --git a/gpu/src/GrGLStencilBuffer.h b/gpu/src/GrGLStencilBuffer.h
index 97394e3..f55f518 100644
--- a/gpu/src/GrGLStencilBuffer.h
+++ b/gpu/src/GrGLStencilBuffer.h
@@ -25,8 +25,9 @@
 
     GrGLStencilBuffer(GrGpu* gpu, GrGLint rbid, 
                       int width, int height,
+                      int sampleCnt,
                       const Format& format) 
-        : GrStencilBuffer(gpu, width, height, format.fStencilBits)
+        : GrStencilBuffer(gpu, width, height, format.fStencilBits, sampleCnt)
         , fFormat(format)
         , fRenderbufferID(rbid) {
     }
@@ -36,7 +37,10 @@
     }
 
     virtual size_t sizeInBytes() const {
-        return this->width() * this->height() * fFormat.fTotalBits;
+        return (size_t) this->width() *
+                        this->height() *
+                        fFormat.fTotalBits *
+                        GrMax(1,this->numSamples());
     }
 
     GrGLuint renderbufferID() const {
diff --git a/gpu/src/GrGpu.cpp b/gpu/src/GrGpu.cpp
index ad297f4..53b5c03 100644
--- a/gpu/src/GrGpu.cpp
+++ b/gpu/src/GrGpu.cpp
@@ -8,8 +8,10 @@
 
 
 #include "GrGpu.h"
+
 #include "GrBufferAllocPool.h"
 #include "GrClipIterator.h"
+#include "GrContext.h"
 #include "GrIndexBuffer.h"
 #include "GrPathRenderer.h"
 #include "GrGLStencilBuffer.h"
@@ -59,7 +61,7 @@
 }
 
 GrGpu::~GrGpu() {
-    releaseResources();
+    this->releaseResources();
 }
 
 void GrGpu::abandonResources() {
@@ -155,10 +157,24 @@
 }
 
 bool GrGpu::attachStencilBufferToRenderTarget(GrRenderTarget* rt) {
-    // TODO: use a cache of stencil buffers rather than create per-rt.
-    bool ret = this->createStencilBufferForRenderTarget(rt, rt->allocatedWidth(),
-                                                        rt->allocatedHeight());
-    if (ret) {
+    GrAssert(NULL == rt->getStencilBuffer());
+    GrStencilBuffer* sb = 
+        this->getContext()->findStencilBuffer(rt->allocatedWidth(),
+                                              rt->allocatedHeight(),
+                                              rt->numSamples());
+    if (NULL != sb) {
+        rt->setStencilBuffer(sb);
+        bool attached = this->attachStencilBufferToRenderTarget(sb, rt);
+        if (!attached) {
+            rt->setStencilBuffer(NULL);
+        }
+        return attached;
+    }
+    if (this->createStencilBufferForRenderTarget(rt, rt->allocatedWidth(),
+                                                 rt->allocatedHeight())) {
+        rt->getStencilBuffer()->ref();
+        rt->getStencilBuffer()->transferToCacheAndLock();
+
         // Right now we're clearing the stencil buffer here after it is
         // attached to an RT for the first time. When we start matching
         // stencil buffers with smaller color targets this will no longer
@@ -171,8 +187,10 @@
         fCurrDrawState.fRenderTarget = rt;
         this->clearStencil();
         fCurrDrawState.fRenderTarget = oldRT;
+        return true;
+    } else {
+        return false;
     }
-    return ret;
 }
 
 GrRenderTarget* GrGpu::createRenderTargetFrom3DApiState() {
diff --git a/gpu/src/GrGpuGL.cpp b/gpu/src/GrGpuGL.cpp
index 7162f35..8a5669c 100644
--- a/gpu/src/GrGpuGL.cpp
+++ b/gpu/src/GrGpuGL.cpp
@@ -602,6 +602,7 @@
 
     if (isRenderTarget) {
         rtDesc.fRTFBOID = desc.fPlatformRenderTarget;
+        rtDesc.fConfig = desc.fConfig;
 #if GR_USE_PLATFORM_CREATE_SAMPLE_COUNT
         if (desc.fSampleCnt) {
 #else
@@ -638,7 +639,7 @@
             format.fStencilBits = desc.fStencilBits;
             format.fTotalBits = desc.fStencilBits;
             sb = new GrGLStencilBuffer(this, 0, desc.fWidth,
-                                       desc.fHeight, format);
+                                       desc.fHeight, rtDesc.fSampleCnt, format);
         }
         rtDesc.fOwnIDs = false;
     }
@@ -836,6 +837,7 @@
     GrGLIRect viewport;
     viewport.setFromGLViewport();
     int stencilBits = get_fbo_stencil_bits(arbFBO);
+    GR_GL_GetIntegerv(GR_GL_SAMPLES, &rtDesc.fSampleCnt);
 
     GrGLStencilBuffer* sb = NULL;
     if (stencilBits) {
@@ -846,10 +848,10 @@
         format.fStencilBits = stencilBits;
         format.fTotalBits = stencilBits;
         sb = new GrGLStencilBuffer(this, 0, viewport.fWidth,
-                                   viewport.fHeight, format);
+                                   viewport.fHeight, rtDesc.fSampleCnt,
+                                   format);
     }
 
-    GR_GL_GetIntegerv(GR_GL_SAMPLES, &rtDesc.fSampleCnt);
     GrGLenum fmat = get_fbo_color_format();
     if (kUnknownGLFormat == fmat) {
         rtDesc.fConfig = get_implied_color_config(arbFBO);
@@ -1326,7 +1328,7 @@
         // that we won't go through this loop more than once after the
         // first (painful) stencil creation.
         int sIdx = (i + fLastSuccessfulStencilFmtIdx) % stencilFmtCnt;
-        // we do this if so that we don't call the multisample
+        // we do this "if" so that we don't call the multisample
         // version on a GL that doesn't have an MSAA extension.
         if (samples > 1) {
             GR_GL_NO_ERR(RenderbufferStorageMultisample(
@@ -1347,9 +1349,11 @@
             // sizes GL gives us. In that case we query for the size.
             GrGLStencilBuffer::Format format = fStencilFormats[sIdx];
             get_stencil_rb_sizes(sbID, &format);
-            sb = new GrGLStencilBuffer(this, sbID, width, height, format);
+            sb = new GrGLStencilBuffer(this, sbID, width, height, 
+                                       samples, format);
             if (this->attachStencilBufferToRenderTarget(sb, rt)) {
                 fLastSuccessfulStencilFmtIdx = sIdx;
+                rt->setStencilBuffer(sb);
                 sb->unref();
                 return true;
            }
@@ -1358,7 +1362,7 @@
         }
     }
     GR_GL(DeleteRenderbuffers(1, &sbID));
-    return NULL;
+    return false;
 }
 
 bool GrGpuGL::attachStencilBufferToRenderTarget(GrStencilBuffer* sb,
@@ -1414,7 +1418,6 @@
             }
             return false;
         } else {
-            rt->setStencilBuffer(sb);
             return true;
         }
     }
diff --git a/gpu/src/GrRenderTarget.cpp b/gpu/src/GrRenderTarget.cpp
index 8a73a84..7b26811 100644
--- a/gpu/src/GrRenderTarget.cpp
+++ b/gpu/src/GrRenderTarget.cpp
@@ -31,7 +31,10 @@
     } else {
         colorBits = GrBytesPerPixel(fConfig);
     }
-    return fAllocatedWidth * fAllocatedHeight * colorBits * GrMax(1,fSampleCnt);
+    return (size_t) fAllocatedWidth *
+                    fAllocatedHeight *
+                    colorBits *
+                    GrMax(1,fSampleCnt);
 }
 
 void GrRenderTarget::flagAsNeedingResolve(const GrIRect* rect) {
@@ -59,5 +62,13 @@
 }
 
 void GrRenderTarget::setStencilBuffer(GrStencilBuffer* stencilBuffer) {
-    GrSafeAssign(fStencilBuffer, stencilBuffer);
+    if (NULL != fStencilBuffer) {
+        fStencilBuffer->wasDetachedFromRenderTarget(this);
+        fStencilBuffer->unref();
+    }
+    fStencilBuffer = stencilBuffer;
+    if (NULL != fStencilBuffer) {
+        fStencilBuffer->wasAttachedToRenderTarget(this);
+        fStencilBuffer->ref();
+    }
 }
\ No newline at end of file
diff --git a/gpu/src/GrResourceCache.cpp b/gpu/src/GrResourceCache.cpp
index 2f5dfaf..97cbea8 100644
--- a/gpu/src/GrResourceCache.cpp
+++ b/gpu/src/GrResourceCache.cpp
@@ -155,7 +155,8 @@
 #endif
 };
 
-GrResourceEntry* GrResourceCache::findAndLock(const GrResourceKey& key) {
+GrResourceEntry* GrResourceCache::findAndLock(const GrResourceKey& key,
+                                              LockType type) {
     GrAutoResourceCacheValidate atcv(this);
 
     GrResourceEntry* entry = fCache.find(key);
@@ -163,7 +164,9 @@
         this->internalDetach(entry, false);
         // mark the entry as "busy" so it doesn't get purged
         // do this between detach and attach for locked count tracking
-        entry->lock();
+        if (kNested_LockType == type || !entry->isLocked()) {
+            entry->lock();
+        }
         this->attachToHead(entry, false);
     }
     return entry;
diff --git a/gpu/src/GrResourceCache.h b/gpu/src/GrResourceCache.h
index e431f1c..d3a8f03 100644
--- a/gpu/src/GrResourceCache.h
+++ b/gpu/src/GrResourceCache.h
@@ -209,10 +209,18 @@
     void setLimits(int maxResource, size_t maxResourceBytes);
 
     /**
+     * Controls whether locks should be nestable or not.
+     */
+    enum LockType {
+        kNested_LockType,
+        kSingle_LockType,
+    };
+
+    /**
      *  Search for an entry with the same Key. If found, "lock" it and return it.
      *  If not found, return null.
      */
-    GrResourceEntry* findAndLock(const GrResourceKey&);
+    GrResourceEntry* findAndLock(const GrResourceKey&, LockType style);
 
     /**
      *  Create a new entry, based on the specified key and resource, and return
diff --git a/gpu/src/GrStencilBuffer.cpp b/gpu/src/GrStencilBuffer.cpp
new file mode 100644
index 0000000..d004612
--- /dev/null
+++ b/gpu/src/GrStencilBuffer.cpp
@@ -0,0 +1,26 @@
+
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrStencilBuffer.h"
+
+#include "GrContext.h"
+#include "GrGpu.h"
+
+void GrStencilBuffer::wasDetachedFromRenderTarget(const GrRenderTarget* rt) {
+    GrAssert(fRTAttachmentCnt > 0);
+    if (0 == --fRTAttachmentCnt && NULL != fCacheEntry) {
+        this->getGpu()->getContext()->unlockStencilBuffer(fCacheEntry);
+        // At this point we could be deleted!
+    }
+}
+
+void GrStencilBuffer::transferToCacheAndLock() {
+    GrAssert(NULL == fCacheEntry);
+    fCacheEntry = 
+        this->getGpu()->getContext()->addAndLockStencilBuffer(this);
+}
diff --git a/gpu/src/GrStencilBuffer.h b/gpu/src/GrStencilBuffer.h
index 34fbe11..af96a20 100644
--- a/gpu/src/GrStencilBuffer.h
+++ b/gpu/src/GrStencilBuffer.h
@@ -13,11 +13,24 @@
 #include "GrClip.h"
 #include "GrResource.h"
 
+// REMOVE ME
+#include "GrRenderTarget.h"
+
+class GrRenderTarget;
+class GrResourceEntry;
+
 class GrStencilBuffer : public GrResource {
 public:
+    virtual ~GrStencilBuffer() {
+        // currently each rt that has attached this sb keeps a ref
+        // TODO: allow SB to be purged and detach itself from rts
+        GrAssert(0 == fRTAttachmentCnt);
+    }
+
     int width() const { return fWidth; }
     int height() const { return fHeight; }
     int bits() const { return fBits; }
+    int numSamples() const { return fSampleCnt; }
 
     // called to note the last clip drawn to this buffer.
     void setLastClip(const GrClip& clip, int width, int height) {
@@ -43,26 +56,43 @@
         return fLastClip;
     }
 
+    // places the sb in the cache and locks it. Caller transfers
+    // a ref to the the cache which will unref when purged.
+    void transferToCacheAndLock();
+
+    void wasAttachedToRenderTarget(const GrRenderTarget* rt) {
+        ++fRTAttachmentCnt;
+    }
+
+    void wasDetachedFromRenderTarget(const GrRenderTarget* rt);
+
 protected:
-    GrStencilBuffer(GrGpu* gpu, int width, int height, int bits)
+    GrStencilBuffer(GrGpu* gpu, int width, int height, int bits, int sampleCnt)
         : GrResource(gpu)
         , fWidth(width)
         , fHeight(height)
         , fBits(bits)
+        , fSampleCnt(sampleCnt)
         , fLastClip()
         , fLastClipWidth(-1)
-        , fLastClipHeight(-1) {
+        , fLastClipHeight(-1)
+        , fCacheEntry(NULL)
+        , fRTAttachmentCnt(0) {
     }
 
 private:
     int fWidth;
     int fHeight;
     int fBits;
+    int fSampleCnt;
 
     GrClip     fLastClip;
     int        fLastClipWidth;
     int        fLastClipHeight;
 
+    GrResourceEntry* fCacheEntry;
+    int              fRTAttachmentCnt;
+
     typedef GrResource INHERITED;
 };
 
diff --git a/gyp/gpu.gyp b/gyp/gpu.gyp
index f96e39c..718628f 100644
--- a/gyp/gpu.gyp
+++ b/gyp/gpu.gyp
@@ -180,6 +180,7 @@
         '../gpu/src/GrResourceCache.cpp',
         '../gpu/src/GrResourceCache.h',
         '../gpu/src/GrStencil.cpp',
+        '../gpu/src/GrStencilBuffer.cpp',
         '../gpu/src/GrStencilBuffer.h',
         '../gpu/src/GrTesselatedPathRenderer.cpp',
         '../gpu/src/GrTextContext.cpp',
