hwc: Full MDP Composition mode with overlap region removal

MDP bandwidth limitations can be avoided, if the overlapping region
covered by the smallest layer at a higher z-order, can be composed
on a render buffer using Copybit, and then it can be queued to MDP
along with other layers. It helps to improve performace by avoiding
GPU Composition in use cases like Home Screen pan. To enable this
feature, set system prop "persist.hwc.ptor.enable" to "true" or '1'
and enable Copybit HAL compilation.

Change-Id: Ib719d3dd42477837519518cc427f1b95356c28f7
diff --git a/libhwcomposer/hwc_copybit.cpp b/libhwcomposer/hwc_copybit.cpp
index 1df8635..f703fa0 100644
--- a/libhwcomposer/hwc_copybit.cpp
+++ b/libhwcomposer/hwc_copybit.cpp
@@ -208,6 +208,46 @@
    return -1;
 }
 
+bool CopyBit::prepareOverlap(hwc_context_t *ctx, hwc_display_contents_1_t *list,
+                            int overlapIndex) {
+
+    if (ctx->mMDP.version < qdutils::MDP_V4_0) {
+        ALOGE("%s: Invalid request", __FUNCTION__);
+        return false;
+    }
+
+    if (mEngine == NULL || !(validateParams(ctx, list))) {
+        ALOGE("%s: Invalid Params", __FUNCTION__);
+        return false;
+    }
+
+    //Allocate render buffers if they're not allocated
+    hwc_rect_t overlap = list->hwLayers[overlapIndex].displayFrame;
+    int width = overlap.right - overlap.left;
+    int height = overlap.bottom - overlap.top;
+    int alignW, alignH;
+
+    getBufferSizeAndDimensions(width, height, HAL_PIXEL_FORMAT_RGBA_8888,
+                               alignW, alignH);
+
+    if ((mAlignedWidth != alignW) || (mAlignedHeight != alignH)) {
+        // Overlap rect has changed, so free render buffers
+        freeRenderBuffers();
+    }
+
+    int ret = allocRenderBuffers(alignW, alignH, HAL_PIXEL_FORMAT_RGBA_8888);
+
+    if (ret < 0) {
+        ALOGE("%s: Render buffer allocation failed", __FUNCTION__);
+        return false;
+    }
+
+    mAlignedWidth = alignW;
+    mAlignedHeight = alignH;
+    mCurRenderBufferIndex = (mCurRenderBufferIndex + 1) % NUM_RENDER_BUFFERS;
+    return true;
+}
+
 bool CopyBit::prepare(hwc_context_t *ctx, hwc_display_contents_1_t *list,
                                                             int dpy) {
 
@@ -286,8 +326,8 @@
     //Allocate render buffers if they're not allocated
     if (ctx->mMDP.version != qdutils::MDP_V3_0_4 &&
             (useCopybitForYUV || useCopybitForRGB)) {
-        int ret = allocRenderBuffers(mAlignedFBWidth,
-                                     mAlignedFBHeight,
+        int ret = allocRenderBuffers(mAlignedWidth,
+                                     mAlignedHeight,
                                      HAL_PIXEL_FORMAT_RGBA_8888);
         if (ret < 0) {
             return false;
@@ -502,6 +542,132 @@
     return true;
 }
 
+int CopyBit::drawOverlap(hwc_context_t *ctx, hwc_display_contents_1_t *list,
+                        int overlapIndex) {
+    int fd = -1;
+
+    if (ctx->mMDP.version < qdutils::MDP_V4_0) {
+        ALOGE("%s: Invalid request", __FUNCTION__);
+        return fd;
+    }
+
+    private_handle_t *renderBuffer = getCurrentRenderBuffer();
+
+    if (!renderBuffer) {
+        ALOGE("%s: Render buffer layer handle is NULL", __FUNCTION__);
+        return fd;
+    }
+
+    int copybitLayerCount = 0;
+    hwc_rect_t overlap = list->hwLayers[overlapIndex].displayFrame;
+
+    // Draw overlapped content of layers on render buffer
+    for (int i = 0; i <= overlapIndex; i++) {
+
+        int ret = -1;
+        if ((list->hwLayers[i].acquireFenceFd != -1)) {
+            // Wait for acquire fence on the App buffers.
+            ret = sync_wait(list->hwLayers[i].acquireFenceFd, 1000);
+            if (ret < 0) {
+                ALOGE("%s: sync_wait error!! error no = %d err str = %s",
+                        __FUNCTION__, errno, strerror(errno));
+            }
+            close(list->hwLayers[i].acquireFenceFd);
+            list->hwLayers[i].acquireFenceFd = -1;
+        }
+
+        hwc_layer_1_t *layer = &list->hwLayers[i];
+        int retVal = drawRectUsingCopybit(ctx, layer, renderBuffer, overlap);
+        copybitLayerCount++;
+
+        if(retVal < 0) {
+            ALOGE("%s: drawRectUsingCopybit failed", __FUNCTION__);
+            copybitLayerCount = 0;
+        }
+    }
+
+    if (copybitLayerCount) {
+        copybit_device_t *copybit = getCopyBitDevice();
+        copybit->flush_get_fence(copybit, &fd);
+    }
+
+    ALOGD_IF(DEBUG_COPYBIT, "%s: done!", __FUNCTION__);
+    return fd;
+}
+
+int CopyBit::drawRectUsingCopybit(hwc_context_t *dev, hwc_layer_1_t *layer,
+                          private_handle_t *renderBuffer, hwc_rect_t rect)
+{
+    hwc_context_t* ctx = (hwc_context_t*)(dev);
+    if (!ctx) {
+        ALOGE("%s: null context ", __FUNCTION__);
+        return -1;
+    }
+
+    private_handle_t *hnd = (private_handle_t *)layer->handle;
+    if (!hnd) {
+        ALOGE("%s: invalid handle", __FUNCTION__);
+        return -1;
+    }
+
+    private_handle_t *dstHandle = (private_handle_t *)renderBuffer;
+    if (!dstHandle) {
+        ALOGE("%s: RenderBuffer handle is NULL", __FUNCTION__);
+        return -1;
+    }
+
+    // Set the Copybit Source
+    copybit_image_t src;
+    src.handle = (native_handle_t *)layer->handle;
+    src.w = hnd->width;
+    src.h = hnd->height;
+    src.base = (void *)hnd->base;
+    src.format = hnd->format;
+    src.horiz_padding = 0;
+    src.vert_padding = 0;
+
+    hwc_rect_t dispFrame = layer->displayFrame;
+    hwc_rect_t iRect = getIntersection(dispFrame, rect);
+    hwc_rect_t crop = integerizeSourceCrop(layer->sourceCropf);
+    qhwc::calculate_crop_rects(crop, dispFrame, iRect, layer->transform);
+
+    // Copybit source rect
+    copybit_rect_t srcRect = {crop.left, crop.top, crop.right, crop.bottom};
+
+    // Copybit destination rect
+    copybit_rect_t dstRect = {rect.left, rect.top, rect.right, rect.bottom};
+
+    // Copybit dst
+    copybit_image_t dst;
+    dst.handle = (native_handle_t *)dstHandle;
+    dst.w = ALIGN(dstHandle->width, 32);
+    dst.h = dstHandle->height;
+    dst.base = (void *)dstHandle->base;
+    dst.format = dstHandle->format;
+
+    copybit_device_t *copybit = mEngine;
+
+    // Copybit region
+    hwc_region_t region = layer->visibleRegionScreen;
+    region_iterator copybitRegion(region);
+    int acquireFd = layer->acquireFenceFd;
+
+    copybit->set_parameter(copybit, COPYBIT_FRAMEBUFFER_WIDTH, renderBuffer->width);
+    copybit->set_parameter(copybit, COPYBIT_FRAMEBUFFER_HEIGHT, renderBuffer->height);
+    copybit->set_parameter(copybit, COPYBIT_TRANSFORM, layer->transform);
+    copybit->set_parameter(copybit, COPYBIT_PLANE_ALPHA, layer->planeAlpha);
+    copybit->set_parameter(copybit, COPYBIT_BLEND_MODE, layer->blending);
+    copybit->set_parameter(copybit, COPYBIT_DITHER,
+        (dst.format == HAL_PIXEL_FORMAT_RGB_565) ? COPYBIT_ENABLE : COPYBIT_DISABLE);
+    copybit->set_sync(copybit, acquireFd);
+    int err = copybit->stretch(copybit, &dst, &src, &dstRect, &srcRect, &copybitRegion);
+
+    if (err < 0)
+        ALOGE("%s: copybit stretch failed",__FUNCTION__);
+
+    return err;
+}
+
 int  CopyBit::drawLayerUsingCopybit(hwc_context_t *dev, hwc_layer_1_t *layer,
                           private_handle_t *renderBuffer, bool isFG)
 {
@@ -872,6 +1038,18 @@
     mRelFd[mCurRenderBufferIndex] = dup(fd);
 }
 
+void CopyBit::setReleaseFdSync(int fd) {
+    if (mRelFd[mCurRenderBufferIndex] >=0) {
+        int ret = -1;
+        ret = sync_wait(mRelFd[mCurRenderBufferIndex], 1000);
+        if (ret < 0)
+            ALOGE("%s: sync_wait error! errno = %d, err str = %s",
+                  __FUNCTION__, errno, strerror(errno));
+        close(mRelFd[mCurRenderBufferIndex]);
+    }
+    mRelFd[mCurRenderBufferIndex] = dup(fd);
+}
+
 struct copybit_device_t* CopyBit::getCopyBitDevice() {
     return mEngine;
 }
@@ -882,8 +1060,8 @@
     getBufferSizeAndDimensions(ctx->dpyAttr[dpy].xres,
             ctx->dpyAttr[dpy].yres,
             HAL_PIXEL_FORMAT_RGBA_8888,
-            mAlignedFBWidth,
-            mAlignedFBHeight);
+            mAlignedWidth,
+            mAlignedHeight);
 
     hw_module_t const *module;
     for (int i = 0; i < NUM_RENDER_BUFFERS; i++) {