liblp: Improve reliability of UpdatePartitionTable.

If UpdatePartitionTable is interrupted while writing metadata, the next
update becomes much more risky, as only one valid copy may exist. If
that subsequent update is interrupted, all metadata copies may be
corrupt.

To alleviate this, we now synchronize both metadata copies before each
update. If the backup copy is corrupted, we replace it with the primary
copy, and vice versa. Similarly if the primary and backup metadata do
not match, we sync the backup to contain the same data as the primary.

If for some reason this synchronization process fails, we do not proceed
to write a new partition table.

Bug: 79173901
Test: liblp_test gtest
Change-Id: Ic80cf9a5dc6336ff532e069ca5c8c76371550cb9
diff --git a/fs_mgr/liblp/include/liblp/reader.h b/fs_mgr/liblp/include/liblp/reader.h
index 982fe65..9f01441 100644
--- a/fs_mgr/liblp/include/liblp/reader.h
+++ b/fs_mgr/liblp/include/liblp/reader.h
@@ -31,10 +31,15 @@
 std::unique_ptr<LpMetadata> ReadMetadata(const char* block_device, uint32_t slot_number);
 std::unique_ptr<LpMetadata> ReadMetadata(int fd, uint32_t slot_number);
 
-// Read and validate the logical partition geometry from a block device.
-bool ReadLogicalPartitionGeometry(const char* block_device, LpMetadataGeometry* geometry);
+// Helper functions for manually reading geometry and metadata.
 bool ReadLogicalPartitionGeometry(int fd, LpMetadataGeometry* geometry);
 
+// These functions assume a valid geometry and slot number.
+std::unique_ptr<LpMetadata> ReadPrimaryMetadata(int fd, const LpMetadataGeometry& geometry,
+                                                uint32_t slot_number);
+std::unique_ptr<LpMetadata> ReadBackupMetadata(int fd, const LpMetadataGeometry& geometry,
+                                               uint32_t slot_number);
+
 // Read logical partition metadata from an image file that was created with
 // WriteToImageFile().
 std::unique_ptr<LpMetadata> ReadFromImageFile(const char* file);
diff --git a/fs_mgr/liblp/include/liblp/writer.h b/fs_mgr/liblp/include/liblp/writer.h
index f4d1ad7..da38a4c 100644
--- a/fs_mgr/liblp/include/liblp/writer.h
+++ b/fs_mgr/liblp/include/liblp/writer.h
@@ -45,7 +45,7 @@
 bool UpdatePartitionTable(int fd, const LpMetadata& metadata, uint32_t slot_number);
 
 bool UpdatePartitionTable(int fd, const LpMetadata& metadata, uint32_t slot_number,
-                          std::function<bool(int, const std::string&)> writer);
+                          const std::function<bool(int, const std::string&)>& writer);
 
 // Helper function to serialize geometry and metadata to a normal file, for
 // flashing or debugging.
diff --git a/fs_mgr/liblp/io_test.cpp b/fs_mgr/liblp/io_test.cpp
index c3f8f36..de10eb6 100644
--- a/fs_mgr/liblp/io_test.cpp
+++ b/fs_mgr/liblp/io_test.cpp
@@ -399,29 +399,42 @@
     // When requested, write garbage instead of the requested bytes, then
     // return false.
     bool operator()(int fd, const std::string& blob) {
-        if (++write_count_ == fail_on_write_) {
+        write_count_++;
+        if (write_count_ == fail_on_write_) {
             std::unique_ptr<char[]> new_data = std::make_unique<char[]>(blob.size());
             memset(new_data.get(), 0xe5, blob.size());
             EXPECT_TRUE(android::base::WriteFully(fd, new_data.get(), blob.size()));
             return false;
         } else {
-            return android::base::WriteFully(fd, blob.data(), blob.size());
+            if (!android::base::WriteFully(fd, blob.data(), blob.size())) {
+                return false;
+            }
+            return fail_after_write_ != write_count_;
         }
     }
-    void FailOnWrite(int number) {
-        fail_on_write_ = number;
+    void Reset() {
+        fail_on_write_ = 0;
+        fail_after_write_ = 0;
         write_count_ = 0;
     }
+    void FailOnWrite(int number) {
+        Reset();
+        fail_on_write_ = number;
+    }
+    void FailAfterWrite(int number) {
+        Reset();
+        fail_after_write_ = number;
+    }
 
   private:
     int fail_on_write_ = 0;
+    int fail_after_write_ = 0;
     int write_count_ = 0;
 };
 
 // Test that an interrupted flash operation on the "primary" copy of metadata
 // is not fatal.
-TEST(liblp, FlashPrimaryMetadataFailure) {
-    // Initial state.
+TEST(liblp, UpdatePrimaryMetadataFailure) {
     unique_fd fd = CreateFlashedDisk();
     ASSERT_GE(fd, 0);
 
@@ -439,7 +452,7 @@
 
     // Flash again, this time fail the backup copy. We should still be able
     // to read the primary.
-    writer.FailOnWrite(2);
+    writer.FailOnWrite(3);
     ASSERT_FALSE(UpdatePartitionTable(fd, *imported.get(), 0, writer));
     imported = ReadMetadata(fd, 0);
     ASSERT_NE(imported, nullptr);
@@ -447,8 +460,7 @@
 
 // Test that an interrupted flash operation on the "backup" copy of metadata
 // is not fatal.
-TEST(liblp, FlashBackupMetadataFailure) {
-    // Initial state.
+TEST(liblp, UpdateBackupMetadataFailure) {
     unique_fd fd = CreateFlashedDisk();
     ASSERT_GE(fd, 0);
 
@@ -466,12 +478,45 @@
 
     // Flash again, this time fail the primary copy. We should still be able
     // to read the primary.
-    //
-    // TODO(dvander): This is currently not handled correctly. liblp does not
-    // guarantee both copies are in sync before the update. The ASSERT_EQ
-    // will change to an ASSERT_NE when this is fixed.
-    writer.FailOnWrite(1);
+    writer.FailOnWrite(2);
     ASSERT_FALSE(UpdatePartitionTable(fd, *imported.get(), 0, writer));
     imported = ReadMetadata(fd, 0);
-    ASSERT_EQ(imported, nullptr);
+    ASSERT_NE(imported, nullptr);
+}
+
+// Test that an interrupted write *in between* writing metadata will read
+// the correct metadata copy. The primary is always considered newer than
+// the backup.
+TEST(liblp, UpdateMetadataCleanFailure) {
+    unique_fd fd = CreateFlashedDisk();
+    ASSERT_GE(fd, 0);
+
+    BadWriter writer;
+
+    // Change the name of the existing partition.
+    unique_ptr<LpMetadata> new_table = ReadMetadata(fd, 0);
+    ASSERT_NE(new_table, nullptr);
+    ASSERT_GE(new_table->partitions.size(), 1);
+    new_table->partitions[0].name[0]++;
+
+    // Flash it, but fail to write the backup copy.
+    writer.FailAfterWrite(2);
+    ASSERT_FALSE(UpdatePartitionTable(fd, *new_table.get(), 0, writer));
+
+    // When we read back, we should get the updated primary copy.
+    unique_ptr<LpMetadata> imported = ReadMetadata(fd, 0);
+    ASSERT_NE(imported, nullptr);
+    ASSERT_GE(new_table->partitions.size(), 1);
+    ASSERT_EQ(GetPartitionName(new_table->partitions[0]), GetPartitionName(imported->partitions[0]));
+
+    // Flash again. After, the backup and primary copy should be coherent.
+    // Note that the sync step should have used the primary to sync, not
+    // the backup.
+    writer.Reset();
+    ASSERT_TRUE(UpdatePartitionTable(fd, *new_table.get(), 0, writer));
+
+    imported = ReadMetadata(fd, 0);
+    ASSERT_NE(imported, nullptr);
+    ASSERT_GE(new_table->partitions.size(), 1);
+    ASSERT_EQ(GetPartitionName(new_table->partitions[0]), GetPartitionName(imported->partitions[0]));
 }
diff --git a/fs_mgr/liblp/reader.cpp b/fs_mgr/liblp/reader.cpp
index a0eeec9..d680baf 100644
--- a/fs_mgr/liblp/reader.cpp
+++ b/fs_mgr/liblp/reader.cpp
@@ -111,16 +111,6 @@
     return ParseGeometry(buffer.get(), geometry);
 }
 
-// Helper function to read geometry from a device without an open descriptor.
-bool ReadLogicalPartitionGeometry(const char* block_device, LpMetadataGeometry* geometry) {
-    android::base::unique_fd fd(open(block_device, O_RDONLY));
-    if (fd < 0) {
-        PERROR << __PRETTY_FUNCTION__ << "open failed: " << block_device;
-        return false;
-    }
-    return ReadLogicalPartitionGeometry(fd, geometry);
-}
-
 static bool ValidateTableBounds(const LpMetadataHeader& header,
                                 const LpMetadataTableDescriptor& table) {
     if (table.offset > header.tables_size) {
@@ -243,6 +233,26 @@
     return metadata;
 }
 
+std::unique_ptr<LpMetadata> ReadPrimaryMetadata(int fd, const LpMetadataGeometry& geometry,
+                                                uint32_t slot_number) {
+    int64_t offset = GetPrimaryMetadataOffset(geometry, slot_number);
+    if (SeekFile64(fd, offset, SEEK_SET) < 0) {
+        PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << offset;
+        return nullptr;
+    }
+    return ParseMetadata(fd);
+}
+
+std::unique_ptr<LpMetadata> ReadBackupMetadata(int fd, const LpMetadataGeometry& geometry,
+                                               uint32_t slot_number) {
+    int64_t offset = GetBackupMetadataOffset(geometry, slot_number);
+    if (SeekFile64(fd, offset, SEEK_END) < 0) {
+        PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << offset;
+        return nullptr;
+    }
+    return ParseMetadata(fd);
+}
+
 std::unique_ptr<LpMetadata> ReadMetadata(int fd, uint32_t slot_number) {
     LpMetadataGeometry geometry;
     if (!ReadLogicalPartitionGeometry(fd, &geometry)) {
@@ -254,24 +264,11 @@
         return nullptr;
     }
 
-    // First try the primary copy.
-    int64_t offset = GetPrimaryMetadataOffset(geometry, slot_number);
-    if (SeekFile64(fd, offset, SEEK_SET) < 0) {
-        PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << offset;
-        return nullptr;
-    }
-    std::unique_ptr<LpMetadata> metadata = ParseMetadata(fd);
-
-    // If the primary copy failed, try the backup copy.
+    // Read the priamry copy, and if that fails, try the backup.
+    std::unique_ptr<LpMetadata> metadata = ReadPrimaryMetadata(fd, geometry, slot_number);
     if (!metadata) {
-        offset = GetBackupMetadataOffset(geometry, slot_number);
-        if (SeekFile64(fd, offset, SEEK_END) < 0) {
-            PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << offset;
-            return nullptr;
-        }
-        metadata = ParseMetadata(fd);
+        metadata = ReadBackupMetadata(fd, geometry, slot_number);
     }
-
     if (metadata) {
         metadata->geometry = geometry;
     }
diff --git a/fs_mgr/liblp/writer.cpp b/fs_mgr/liblp/writer.cpp
index 36c7b5a..963974c 100644
--- a/fs_mgr/liblp/writer.cpp
+++ b/fs_mgr/liblp/writer.cpp
@@ -130,20 +130,9 @@
     return true;
 }
 
-static bool DefaultWriter(int fd, const std::string& blob) {
-    return android::base::WriteFully(fd, blob.data(), blob.size());
-}
-
-static bool WriteMetadata(int fd, const LpMetadataGeometry& geometry, uint32_t slot_number,
-                          const std::string& blob,
-                          std::function<bool(int, const std::string&)> writer) {
-    // Make sure we're writing to a valid metadata slot.
-    if (slot_number >= geometry.metadata_slot_count) {
-        LERROR << "Invalid logical partition metadata slot number.";
-        return false;
-    }
-
-    // Write the primary copy of the metadata.
+static bool WritePrimaryMetadata(int fd, const LpMetadataGeometry& geometry, uint32_t slot_number,
+                                 const std::string& blob,
+                                 const std::function<bool(int, const std::string&)>& writer) {
     int64_t primary_offset = GetPrimaryMetadataOffset(geometry, slot_number);
     if (SeekFile64(fd, primary_offset, SEEK_SET) < 0) {
         PERROR << __PRETTY_FUNCTION__ << "lseek failed: offset " << primary_offset;
@@ -153,8 +142,12 @@
         PERROR << __PRETTY_FUNCTION__ << "write " << blob.size() << " bytes failed";
         return false;
     }
+    return true;
+}
 
-    // Write the backup copy of the metadata.
+static bool WriteBackupMetadata(int fd, const LpMetadataGeometry& geometry, uint32_t slot_number,
+                                const std::string& blob,
+                                const std::function<bool(int, const std::string&)>& writer) {
     int64_t backup_offset = GetBackupMetadataOffset(geometry, slot_number);
     int64_t abs_offset = SeekFile64(fd, backup_offset, SEEK_END);
     if (abs_offset == (int64_t)-1) {
@@ -173,6 +166,27 @@
     return true;
 }
 
+static bool WriteMetadata(int fd, const LpMetadataGeometry& geometry, uint32_t slot_number,
+                          const std::string& blob,
+                          const std::function<bool(int, const std::string&)>& writer) {
+    // Make sure we're writing to a valid metadata slot.
+    if (slot_number >= geometry.metadata_slot_count) {
+        LERROR << "Invalid logical partition metadata slot number.";
+        return false;
+    }
+    if (!WritePrimaryMetadata(fd, geometry, slot_number, blob, writer)) {
+        return false;
+    }
+    if (!WriteBackupMetadata(fd, geometry, slot_number, blob, writer)) {
+        return false;
+    }
+    return true;
+}
+
+static bool DefaultWriter(int fd, const std::string& blob) {
+    return android::base::WriteFully(fd, blob.data(), blob.size());
+}
+
 bool FlashPartitionTable(int fd, const LpMetadata& metadata, uint32_t slot_number) {
     // Before writing geometry and/or logical partition tables, perform some
     // basic checks that the geometry and tables are coherent, and will fit
@@ -205,8 +219,13 @@
     return WriteMetadata(fd, metadata.geometry, slot_number, metadata_blob, DefaultWriter);
 }
 
+static bool CompareMetadata(const LpMetadata& a, const LpMetadata& b) {
+    return !memcmp(a.header.header_checksum, b.header.header_checksum,
+                   sizeof(a.header.header_checksum));
+}
+
 bool UpdatePartitionTable(int fd, const LpMetadata& metadata, uint32_t slot_number,
-                          std::function<bool(int, const std::string&)> writer) {
+                          const std::function<bool(int, const std::string&)>& writer) {
     // Before writing geometry and/or logical partition tables, perform some
     // basic checks that the geometry and tables are coherent, and will fit
     // on the given block device.
@@ -227,6 +246,45 @@
         LERROR << "Incompatible geometry in new logical partition metadata";
         return false;
     }
+
+    // Validate the slot number now, before we call Read*Metadata.
+    if (slot_number >= geometry.metadata_slot_count) {
+        LERROR << "Invalid logical partition metadata slot number.";
+        return false;
+    }
+
+    // Try to read both existing copies of the metadata, if any.
+    std::unique_ptr<LpMetadata> primary = ReadPrimaryMetadata(fd, geometry, slot_number);
+    std::unique_ptr<LpMetadata> backup = ReadBackupMetadata(fd, geometry, slot_number);
+
+    if (primary && (!backup || !CompareMetadata(*primary.get(), *backup.get()))) {
+        // If the backup copy does not match the primary copy, we first
+        // synchronize the backup copy. This guarantees that a partial write
+        // still leaves one copy intact.
+        std::string old_blob;
+        if (!ValidateAndSerializeMetadata(fd, *primary.get(), &old_blob)) {
+            LERROR << "Error serializing primary metadata to repair corrupted backup";
+            return false;
+        }
+        if (!WriteBackupMetadata(fd, geometry, slot_number, old_blob, writer)) {
+            LERROR << "Error writing primary metadata to repair corrupted backup";
+            return false;
+        }
+    } else if (backup && !primary) {
+        // The backup copy is coherent, and the primary is not. Sync it for
+        // safety.
+        std::string old_blob;
+        if (!ValidateAndSerializeMetadata(fd, *backup.get(), &old_blob)) {
+            LERROR << "Error serializing primary metadata to repair corrupted backup";
+            return false;
+        }
+        if (!WritePrimaryMetadata(fd, geometry, slot_number, old_blob, writer)) {
+            LERROR << "Error writing primary metadata to repair corrupted backup";
+            return false;
+        }
+    }
+
+    // Both copies should now be in sync, so we can continue the update.
     return WriteMetadata(fd, geometry, slot_number, blob, writer);
 }