Merge "liblp: Improve reliability of UpdatePartitionTable."
am: d73414c5e8

Change-Id: Icfaa6e3986d0c08a78a1a0d015d6cb28387c850a
diff --git a/fs_mgr/liblp/include/liblp/reader.h b/fs_mgr/liblp/include/liblp/reader.h
index 982fe65..9f01441 100644
--- a/fs_mgr/liblp/include/liblp/reader.h
+++ b/fs_mgr/liblp/include/liblp/reader.h
@@ -31,10 +31,15 @@
 std::unique_ptr<LpMetadata> ReadMetadata(const char* block_device, uint32_t slot_number);
 std::unique_ptr<LpMetadata> ReadMetadata(int fd, uint32_t slot_number);
 
-// Read and validate the logical partition geometry from a block device.
-bool ReadLogicalPartitionGeometry(const char* block_device, LpMetadataGeometry* geometry);
+// Helper functions for manually reading geometry and metadata.
 bool ReadLogicalPartitionGeometry(int fd, LpMetadataGeometry* geometry);
 
+// These functions assume a valid geometry and slot number.
+std::unique_ptr<LpMetadata> ReadPrimaryMetadata(int fd, const LpMetadataGeometry& geometry,
+                                                uint32_t slot_number);
+std::unique_ptr<LpMetadata> ReadBackupMetadata(int fd, const LpMetadataGeometry& geometry,
+                                               uint32_t slot_number);
+
 // Read logical partition metadata from an image file that was created with
 // WriteToImageFile().
 std::unique_ptr<LpMetadata> ReadFromImageFile(const char* file);
diff --git a/fs_mgr/liblp/include/liblp/writer.h b/fs_mgr/liblp/include/liblp/writer.h
index f4d1ad7..da38a4c 100644
--- a/fs_mgr/liblp/include/liblp/writer.h
+++ b/fs_mgr/liblp/include/liblp/writer.h
@@ -45,7 +45,7 @@
 bool UpdatePartitionTable(int fd, const LpMetadata& metadata, uint32_t slot_number);
 
 bool UpdatePartitionTable(int fd, const LpMetadata& metadata, uint32_t slot_number,
-                          std::function<bool(int, const std::string&)> writer);
+                          const std::function<bool(int, const std::string&)>& writer);
 
 // Helper function to serialize geometry and metadata to a normal file, for
 // flashing or debugging.
diff --git a/fs_mgr/liblp/io_test.cpp b/fs_mgr/liblp/io_test.cpp
index c3f8f36..de10eb6 100644
--- a/fs_mgr/liblp/io_test.cpp
+++ b/fs_mgr/liblp/io_test.cpp
@@ -399,29 +399,42 @@
     // When requested, write garbage instead of the requested bytes, then
     // return false.
     bool operator()(int fd, const std::string& blob) {
-        if (++write_count_ == fail_on_write_) {
+        write_count_++;
+        if (write_count_ == fail_on_write_) {
             std::unique_ptr<char[]> new_data = std::make_unique<char[]>(blob.size());
             memset(new_data.get(), 0xe5, blob.size());
             EXPECT_TRUE(android::base::WriteFully(fd, new_data.get(), blob.size()));
             return false;
         } else {
-            return android::base::WriteFully(fd, blob.data(), blob.size());
+            if (!android::base::WriteFully(fd, blob.data(), blob.size())) {
+                return false;
+            }
+            return fail_after_write_ != write_count_;
         }
     }
-    void FailOnWrite(int number) {
-        fail_on_write_ = number;
+    void Reset() {
+        fail_on_write_ = 0;
+        fail_after_write_ = 0;
         write_count_ = 0;
     }
+    void FailOnWrite(int number) {
+        Reset();
+        fail_on_write_ = number;
+    }
+    void FailAfterWrite(int number) {
+        Reset();
+        fail_after_write_ = number;
+    }
 
   private:
     int fail_on_write_ = 0;
+    int fail_after_write_ = 0;
     int write_count_ = 0;
 };
 
 // Test that an interrupted flash operation on the "primary" copy of metadata
 // is not fatal.
-TEST(liblp, FlashPrimaryMetadataFailure) {
-    // Initial state.
+TEST(liblp, UpdatePrimaryMetadataFailure) {
     unique_fd fd = CreateFlashedDisk();
     ASSERT_GE(fd, 0);
 
@@ -439,7 +452,7 @@
 
     // Flash again, this time fail the backup copy. We should still be able
     // to read the primary.
-    writer.FailOnWrite(2);
+    writer.FailOnWrite(3);
     ASSERT_FALSE(UpdatePartitionTable(fd, *imported.get(), 0, writer));
     imported = ReadMetadata(fd, 0);
     ASSERT_NE(imported, nullptr);
@@ -447,8 +460,7 @@
 
 // Test that an interrupted flash operation on the "backup" copy of metadata
 // is not fatal.
-TEST(liblp, FlashBackupMetadataFailure) {
-    // Initial state.
+TEST(liblp, UpdateBackupMetadataFailure) {
     unique_fd fd = CreateFlashedDisk();
     ASSERT_GE(fd, 0);
 
@@ -466,12 +478,45 @@
 
     // Flash again, this time fail the primary copy. We should still be able
     // to read the primary.
-    //
-    // TODO(dvander): This is currently not handled correctly. liblp does not
-    // guarantee both copies are in sync before the update. The ASSERT_EQ
-    // will change to an ASSERT_NE when this is fixed.
-    writer.FailOnWrite(1);
+    writer.FailOnWrite(2);
     ASSERT_FALSE(UpdatePartitionTable(fd, *imported.get(), 0, writer));
     imported = ReadMetadata(fd, 0);
-    ASSERT_EQ(imported, nullptr);
+    ASSERT_NE(imported, nullptr);
+}
+
+// Test that an interrupted write *in between* writing metadata will read
+// the correct metadata copy. The primary is always considered newer than
+// the backup.
+TEST(liblp, UpdateMetadataCleanFailure) {
+    unique_fd fd = CreateFlashedDisk();
+    ASSERT_GE(fd, 0);
+
+    BadWriter writer;
+
+    // Change the name of the existing partition.
+    unique_ptr<LpMetadata> new_table = ReadMetadata(fd, 0);
+    ASSERT_NE(new_table, nullptr);
+    ASSERT_GE(new_table->partitions.size(), 1);
+    new_table->partitions[0].name[0]++;
+
+    // Flash it, but fail to write the backup copy.
+    writer.FailAfterWrite(2);
+    ASSERT_FALSE(UpdatePartitionTable(fd, *new_table.get(), 0, writer));
+
+    // When we read back, we should get the updated primary copy.
+    unique_ptr<LpMetadata> imported = ReadMetadata(fd, 0);
+    ASSERT_NE(imported, nullptr);
+    ASSERT_GE(new_table->partitions.size(), 1);
+    ASSERT_EQ(GetPartitionName(new_table->partitions[0]), GetPartitionName(imported->partitions[0]));
+
+    // Flash again. After, the backup and primary copy should be coherent.
+    // Note that the sync step should have used the primary to sync, not
+    // the backup.
+    writer.Reset();
+    ASSERT_TRUE(UpdatePartitionTable(fd, *new_table.get(), 0, writer));
+
+    imported = ReadMetadata(fd, 0);
+    ASSERT_NE(imported, nullptr);
+    ASSERT_GE(new_table->partitions.size(), 1);
+    ASSERT_EQ(GetPartitionName(new_table->partitions[0]), GetPartitionName(imported->partitions[0]));
 }
diff --git a/fs_mgr/liblp/reader.cpp b/fs_mgr/liblp/reader.cpp
index a0eeec9..d680baf 100644
--- a/fs_mgr/liblp/reader.cpp
+++ b/fs_mgr/liblp/reader.cpp
@@ -111,16 +111,6 @@
     return ParseGeometry(buffer.get(), geometry);
 }
 
-// Helper function to read geometry from a device without an open descriptor.
-bool ReadLogicalPartitionGeometry(const char* block_device, LpMetadataGeometry* geometry) {
-    android::base::unique_fd fd(open(block_device, O_RDONLY));
-    if (fd < 0) {
-        PERROR << __PRETTY_FUNCTION__ << "open failed: " << block_device;
-        return false;
-    }
-    return ReadLogicalPartitionGeometry(fd, geometry);
-}
-
 static bool ValidateTableBounds(const LpMetadataHeader& header,
                                 const LpMetadataTableDescriptor& table) {
     if (table.offset > header.tables_size) {
@@ -243,6 +233,26 @@
     return metadata;
 }
 
+std::unique_ptr<LpMetadata> ReadPrimaryMetadata(int fd, const LpMetadataGeometry& geometry,
+                                                uint32_t slot_number) {
+    int64_t offset = GetPrimaryMetadataOffset(geometry, slot_number);
+    if (SeekFile64(fd, offset, SEEK_SET) < 0) {
+        PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << offset;
+        return nullptr;
+    }
+    return ParseMetadata(fd);
+}
+
+std::unique_ptr<LpMetadata> ReadBackupMetadata(int fd, const LpMetadataGeometry& geometry,
+                                               uint32_t slot_number) {
+    int64_t offset = GetBackupMetadataOffset(geometry, slot_number);
+    if (SeekFile64(fd, offset, SEEK_END) < 0) {
+        PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << offset;
+        return nullptr;
+    }
+    return ParseMetadata(fd);
+}
+
 std::unique_ptr<LpMetadata> ReadMetadata(int fd, uint32_t slot_number) {
     LpMetadataGeometry geometry;
     if (!ReadLogicalPartitionGeometry(fd, &geometry)) {
@@ -254,24 +264,11 @@
         return nullptr;
     }
 
-    // First try the primary copy.
-    int64_t offset = GetPrimaryMetadataOffset(geometry, slot_number);
-    if (SeekFile64(fd, offset, SEEK_SET) < 0) {
-        PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << offset;
-        return nullptr;
-    }
-    std::unique_ptr<LpMetadata> metadata = ParseMetadata(fd);
-
-    // If the primary copy failed, try the backup copy.
+    // Read the priamry copy, and if that fails, try the backup.
+    std::unique_ptr<LpMetadata> metadata = ReadPrimaryMetadata(fd, geometry, slot_number);
     if (!metadata) {
-        offset = GetBackupMetadataOffset(geometry, slot_number);
-        if (SeekFile64(fd, offset, SEEK_END) < 0) {
-            PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << offset;
-            return nullptr;
-        }
-        metadata = ParseMetadata(fd);
+        metadata = ReadBackupMetadata(fd, geometry, slot_number);
     }
-
     if (metadata) {
         metadata->geometry = geometry;
     }
diff --git a/fs_mgr/liblp/writer.cpp b/fs_mgr/liblp/writer.cpp
index 36c7b5a..963974c 100644
--- a/fs_mgr/liblp/writer.cpp
+++ b/fs_mgr/liblp/writer.cpp
@@ -130,20 +130,9 @@
     return true;
 }
 
-static bool DefaultWriter(int fd, const std::string& blob) {
-    return android::base::WriteFully(fd, blob.data(), blob.size());
-}
-
-static bool WriteMetadata(int fd, const LpMetadataGeometry& geometry, uint32_t slot_number,
-                          const std::string& blob,
-                          std::function<bool(int, const std::string&)> writer) {
-    // Make sure we're writing to a valid metadata slot.
-    if (slot_number >= geometry.metadata_slot_count) {
-        LERROR << "Invalid logical partition metadata slot number.";
-        return false;
-    }
-
-    // Write the primary copy of the metadata.
+static bool WritePrimaryMetadata(int fd, const LpMetadataGeometry& geometry, uint32_t slot_number,
+                                 const std::string& blob,
+                                 const std::function<bool(int, const std::string&)>& writer) {
     int64_t primary_offset = GetPrimaryMetadataOffset(geometry, slot_number);
     if (SeekFile64(fd, primary_offset, SEEK_SET) < 0) {
         PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << primary_offset;
@@ -153,8 +142,12 @@
         PERROR << __PRETTY_FUNCTION__ << "write " << blob.size() << " bytes failed";
         return false;
     }
+    return true;
+}
 
-    // Write the backup copy of the metadata.
+static bool WriteBackupMetadata(int fd, const LpMetadataGeometry& geometry, uint32_t slot_number,
+                                const std::string& blob,
+                                const std::function<bool(int, const std::string&)>& writer) {
     int64_t backup_offset = GetBackupMetadataOffset(geometry, slot_number);
     int64_t abs_offset = SeekFile64(fd, backup_offset, SEEK_END);
     if (abs_offset == (int64_t)-1) {
@@ -173,6 +166,27 @@
     return true;
 }
 
+static bool WriteMetadata(int fd, const LpMetadataGeometry& geometry, uint32_t slot_number,
+                          const std::string& blob,
+                          const std::function<bool(int, const std::string&)>& writer) {
+    // Make sure we're writing to a valid metadata slot.
+    if (slot_number >= geometry.metadata_slot_count) {
+        LERROR << "Invalid logical partition metadata slot number.";
+        return false;
+    }
+    if (!WritePrimaryMetadata(fd, geometry, slot_number, blob, writer)) {
+        return false;
+    }
+    if (!WriteBackupMetadata(fd, geometry, slot_number, blob, writer)) {
+        return false;
+    }
+    return true;
+}
+
+static bool DefaultWriter(int fd, const std::string& blob) {
+    return android::base::WriteFully(fd, blob.data(), blob.size());
+}
+
 bool FlashPartitionTable(int fd, const LpMetadata& metadata, uint32_t slot_number) {
     // Before writing geometry and/or logical partition tables, perform some
     // basic checks that the geometry and tables are coherent, and will fit
@@ -205,8 +219,13 @@
     return WriteMetadata(fd, metadata.geometry, slot_number, metadata_blob, DefaultWriter);
 }
 
+static bool CompareMetadata(const LpMetadata& a, const LpMetadata& b) {
+    return !memcmp(a.header.header_checksum, b.header.header_checksum,
+                   sizeof(a.header.header_checksum));
+}
+
 bool UpdatePartitionTable(int fd, const LpMetadata& metadata, uint32_t slot_number,
-                          std::function<bool(int, const std::string&)> writer) {
+                          const std::function<bool(int, const std::string&)>& writer) {
     // Before writing geometry and/or logical partition tables, perform some
     // basic checks that the geometry and tables are coherent, and will fit
     // on the given block device.
@@ -227,6 +246,45 @@
         LERROR << "Incompatible geometry in new logical partition metadata";
         return false;
     }
+
+    // Validate the slot number now, before we call Read*Metadata.
+    if (slot_number >= geometry.metadata_slot_count) {
+        LERROR << "Invalid logical partition metadata slot number.";
+        return false;
+    }
+
+    // Try to read both existing copies of the metadata, if any.
+    std::unique_ptr<LpMetadata> primary = ReadPrimaryMetadata(fd, geometry, slot_number);
+    std::unique_ptr<LpMetadata> backup = ReadBackupMetadata(fd, geometry, slot_number);
+
+    if (primary && (!backup || !CompareMetadata(*primary.get(), *backup.get()))) {
+        // If the backup copy does not match the primary copy, we first
+        // synchronize the backup copy. This guarantees that a partial write
+        // still leaves one copy intact.
+        std::string old_blob;
+        if (!ValidateAndSerializeMetadata(fd, *primary.get(), &old_blob)) {
+            LERROR << "Error serializing primary metadata to repair corrupted backup";
+            return false;
+        }
+        if (!WriteBackupMetadata(fd, geometry, slot_number, old_blob, writer)) {
+            LERROR << "Error writing primary metadata to repair corrupted backup";
+            return false;
+        }
+    } else if (backup && !primary) {
+        // The backup copy is coherent, and the primary is not. Sync it for
+        // safety.
+        std::string old_blob;
+        if (!ValidateAndSerializeMetadata(fd, *backup.get(), &old_blob)) {
+            LERROR << "Error serializing primary metadata to repair corrupted backup";
+            return false;
+        }
+        if (!WritePrimaryMetadata(fd, geometry, slot_number, old_blob, writer)) {
+            LERROR << "Error writing primary metadata to repair corrupted backup";
+            return false;
+        }
+    }
+
+    // Both copies should now be in sync, so we can continue the update.
     return WriteMetadata(fd, geometry, slot_number, blob, writer);
 }