binder: Replace use of resize()/memcpy() with reserve()/insert()

resize() zero initializes the vector even though data is about to be
copied to it. Moreover it does so with a loop rather than a memset.

Using reserve() and insert() is significantly faster with large
payloads, as it only has to allocate and memcpy.

Benchmarked using system/libhwbinder/tests/benchmarks

Before:
Benchmark                       Time           CPU Iterations
-------------------------------------------------------------
BM_sendVec_binder/4k        89872 ns      45885 ns      15328
BM_sendVec_binder/8k       122753 ns      57713 ns      11667
BM_sendVec_binder/16k      163825 ns      88444 ns       7500
BM_sendVec_binder/32k      261942 ns     153561 ns       4667
BM_sendVec_binder/64k      558372 ns     318525 ns       2386

After:
Benchmark                       Time           CPU Iterations
-------------------------------------------------------------
BM_sendVec_binder/4k        69052 ns      23111 ns      30000
BM_sendVec_binder/8k        71891 ns      25092 ns      27632
BM_sendVec_binder/16k       85439 ns      32822 ns      21429
BM_sendVec_binder/32k      115223 ns      43912 ns      15789
BM_sendVec_binder/64k      203960 ns      82667 ns       8750

Change-Id: Ie83c1d0a9da6f175ffd1a3fc9e0ecc6d542a909a
diff --git a/libs/binder/Parcel.cpp b/libs/binder/Parcel.cpp
index 061cb08..572c284 100644
--- a/libs/binder/Parcel.cpp
+++ b/libs/binder/Parcel.cpp
@@ -1427,13 +1427,13 @@
         return status;
     }
 
-    const void* data = parcel->readInplace(size);
+    T* data = const_cast<T*>(reinterpret_cast<const T*>(parcel->readInplace(size)));
     if (!data) {
         status = BAD_VALUE;
         return status;
     }
-    val->resize(size);
-    memcpy(val->data(), data, size);
+    val->reserve(size);
+    val->insert(val->end(), data, data + size);
 
     return status;
 }