Merge changes from topic "wait_for_merge"
am: 18c6248ffe

Change-Id: Ibccf63cfdaabb8610c24d2b2271d7cb3cda016f2
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index 5738b96..445e6db 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -198,6 +198,13 @@
     //   - other states indicating an error has occurred
     UpdateState InitiateMergeAndWait();
 
+    // Wait for the merge if rebooted into the new slot. Does NOT initiate a
+    // merge. If the merge has not been initiated (but should be), wait.
+    // Returns:
+    //   - true there is no merge or merge finishes
+    //   - false indicating an error has occurred
+    bool WaitForMerge();
+
     // Find the status of the current update, if any.
     //
     // |progress| depends on the returned status:
@@ -496,6 +503,10 @@
     // as a sanity check.
     bool EnsureNoOverflowSnapshot(LockedFile* lock);
 
+    enum class Slot { Unknown, Source, Target };
+    friend std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot);
+    Slot GetCurrentSlot();
+
     std::string gsid_dir_;
     std::string metadata_dir_;
     std::unique_ptr<IDeviceInfo> device_;
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index f38db43..a0ec068 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -74,6 +74,7 @@
 using namespace std::string_literals;
 
 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
+static constexpr auto kUpdateStateCheckInterval = 2s;
 
 // Note: IImageManager is an incomplete type in the header, so the default
 // destructor doesn't work.
@@ -171,14 +172,9 @@
 
     if (state == UpdateState::Unverified) {
         // We completed an update, but it can still be canceled if we haven't booted into it.
-        auto boot_file = GetSnapshotBootIndicatorPath();
-        std::string contents;
-        if (!android::base::ReadFileToString(boot_file, &contents)) {
-            PLOG(WARNING) << "Cannot read " << boot_file << ", proceed to canceling the update:";
-            return RemoveAllUpdateState(file.get());
-        }
-        if (device_->GetSlotSuffix() == contents) {
-            LOG(INFO) << "Canceling a previously completed update";
+        auto slot = GetCurrentSlot();
+        if (slot != Slot::Target) {
+            LOG(INFO) << "Canceling previously completed updates (if any)";
             return RemoveAllUpdateState(file.get());
         }
     }
@@ -186,6 +182,19 @@
     return true;
 }
 
+SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
+    auto boot_file = GetSnapshotBootIndicatorPath();
+    std::string contents;
+    if (!android::base::ReadFileToString(boot_file, &contents)) {
+        PLOG(WARNING) << "Cannot read " << boot_file;
+        return Slot::Unknown;
+    }
+    if (device_->GetSlotSuffix() == contents) {
+        return Slot::Source;
+    }
+    return Slot::Target;
+}
+
 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock) {
     if (!RemoveAllSnapshots(lock)) {
         LOG(ERROR) << "Could not remove all snapshots";
@@ -505,15 +514,9 @@
         return false;
     }
 
-    std::string old_slot;
-    auto boot_file = GetSnapshotBootIndicatorPath();
-    if (!android::base::ReadFileToString(boot_file, &old_slot)) {
-        LOG(ERROR) << "Could not determine the previous slot; aborting merge";
-        return false;
-    }
-    auto new_slot = device_->GetSlotSuffix();
-    if (new_slot == old_slot) {
-        LOG(ERROR) << "Device cannot merge while booting off old slot " << old_slot;
+    auto slot = GetCurrentSlot();
+    if (slot != Slot::Target) {
+        LOG(ERROR) << "Device cannot merge while not booting from new slot";
         return false;
     }
 
@@ -729,7 +732,7 @@
 
         // This wait is not super time sensitive, so we have a relatively
         // low polling frequency.
-        std::this_thread::sleep_for(2s);
+        std::this_thread::sleep_for(kUpdateStateCheckInterval);
     }
 }
 
@@ -1097,13 +1100,11 @@
 }
 
 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock) {
-    std::string old_slot;
-    auto boot_file = GetSnapshotBootIndicatorPath();
-    if (!android::base::ReadFileToString(boot_file, &old_slot)) {
-        PLOG(ERROR) << "Unable to read the snapshot indicator file: " << boot_file;
+    auto slot = GetCurrentSlot();
+    if (slot == Slot::Unknown) {
         return false;
     }
-    if (device_->GetSlotSuffix() != old_slot) {
+    if (slot == Slot::Target) {
         // We're booted into the target slot, which means we just rebooted
         // after applying the update.
         if (!HandleCancelledUpdateOnNewSlot(lock)) {
@@ -1271,14 +1272,9 @@
     // ultimately we'll fail to boot. Why not make it a fatal error and have
     // the reason be clearer? Because the indicator file still exists, and
     // if this was FATAL, reverting to the old slot would be broken.
-    std::string old_slot;
-    auto boot_file = GetSnapshotBootIndicatorPath();
-    if (!android::base::ReadFileToString(boot_file, &old_slot)) {
-        PLOG(ERROR) << "Unable to read the snapshot indicator file: " << boot_file;
-        return false;
-    }
-    if (device_->GetSlotSuffix() == old_slot) {
-        LOG(INFO) << "Detected slot rollback, will not mount snapshots.";
+    auto slot = GetCurrentSlot();
+    if (slot != Slot::Target) {
+        LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
         return false;
     }
 
@@ -2156,6 +2152,17 @@
     return ok;
 }
 
+std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
+    switch (slot) {
+        case SnapshotManager::Slot::Unknown:
+            return os << "unknown";
+        case SnapshotManager::Slot::Source:
+            return os << "source";
+        case SnapshotManager::Slot::Target:
+            return os << "target";
+    }
+}
+
 bool SnapshotManager::Dump(std::ostream& os) {
     // Don't actually lock. Dump() is for debugging purposes only, so it is okay
     // if it is racy.
@@ -2166,11 +2173,8 @@
 
     ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
 
-    auto boot_file = GetSnapshotBootIndicatorPath();
-    std::string boot_indicator;
-    if (android::base::ReadFileToString(boot_file, &boot_indicator)) {
-        ss << "Boot indicator: old slot = " << boot_indicator << std::endl;
-    }
+    ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
+    ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
 
     bool ok = true;
     std::vector<std::string> snapshots;
@@ -2238,6 +2242,21 @@
     return state;
 }
 
+bool SnapshotManager::WaitForMerge() {
+    LOG(INFO) << "Waiting for any previous merge request to complete. "
+              << "This can take up to several minutes.";
+    while (true) {
+        auto state = ProcessUpdateState();
+        if (state == UpdateState::Unverified && GetCurrentSlot() == Slot::Target) {
+            LOG(INFO) << "Wait for merge to be initiated.";
+            std::this_thread::sleep_for(kUpdateStateCheckInterval);
+            continue;
+        }
+        LOG(INFO) << "Wait for merge exits with state " << state;
+        return state == UpdateState::None || state == UpdateState::MergeCompleted;
+    }
+}
+
 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
     if (!device_->IsRecovery()) {
         LOG(ERROR) << "Data wipes are only allowed in recovery.";
@@ -2283,11 +2302,9 @@
             //
             // Since the rollback is inevitable, we don't treat a HAL failure
             // as an error here.
-            std::string old_slot;
-            auto boot_file = GetSnapshotBootIndicatorPath();
-            if (android::base::ReadFileToString(boot_file, &old_slot) &&
-                device_->GetSlotSuffix() != old_slot) {
-                LOG(ERROR) << "Reverting to slot " << old_slot << " since update will be deleted.";
+            auto slot = GetCurrentSlot();
+            if (slot == Slot::Target) {
+                LOG(ERROR) << "Reverting to old slot since update will be deleted.";
                 device_->SetSlotAsUnbootable(slot_number);
             }
             break;
diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index 964b21a..0f5af14 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp
@@ -921,6 +921,25 @@
         return AssertionSuccess();
     }
 
+    // Create fake install operations to grow the COW device size.
+    void AddOperation(PartitionUpdate* partition_update, uint64_t size_bytes = 0) {
+        auto e = partition_update->add_operations()->add_dst_extents();
+        e->set_start_block(0);
+        if (size_bytes == 0) {
+            size_bytes = GetSize(partition_update);
+        }
+        e->set_num_blocks(size_bytes / manifest_.block_size());
+    }
+
+    void AddOperationForPartitions(std::vector<PartitionUpdate*> partitions = {}) {
+        if (partitions.empty()) {
+            partitions = {sys_, vnd_, prd_};
+        }
+        for (auto* partition : partitions) {
+            AddOperation(partition);
+        }
+    }
+
     std::unique_ptr<TestPartitionOpener> opener_;
     DeltaArchiveManifest manifest_;
     std::unique_ptr<MetadataBuilder> src_;
@@ -948,12 +967,7 @@
     SetSize(vnd_, partition_size);
     SetSize(prd_, partition_size);
 
-    // Create fake install operations to grow the COW device size.
-    for (auto& partition : {sys_, vnd_, prd_}) {
-        auto e = partition->add_operations()->add_dst_extents();
-        e->set_start_block(0);
-        e->set_num_blocks(GetSize(partition) / manifest_.block_size());
-    }
+    AddOperationForPartitions();
 
     // Execute the update.
     ASSERT_TRUE(sm->BeginUpdate());
@@ -1089,12 +1103,7 @@
     ASSERT_TRUE(sm->BeginUpdate());
     ASSERT_TRUE(sm->UnmapUpdateSnapshot("sys_b"));
 
-    // Create fake install operations to grow the COW device size.
-    for (auto& partition : {sys_, vnd_, prd_}) {
-        auto e = partition->add_operations()->add_dst_extents();
-        e->set_start_block(0);
-        e->set_num_blocks(GetSize(partition) / manifest_.block_size());
-    }
+    AddOperationForPartitions();
 
     ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
 
@@ -1239,10 +1248,8 @@
     group_->set_size(kRetrofitGroupSize);
     for (auto* partition : {sys_, vnd_, prd_}) {
         SetSize(partition, 2_MiB);
-        auto* e = partition->add_operations()->add_dst_extents();
-        e->set_start_block(0);
-        e->set_num_blocks(2_MiB / manifest_.block_size());
     }
+    AddOperationForPartitions();
 
     ASSERT_TRUE(sm->BeginUpdate());
     ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
@@ -1285,9 +1292,7 @@
     }
 
     // Add operations for sys. The whole device is written.
-    auto e = sys_->add_operations()->add_dst_extents();
-    e->set_start_block(0);
-    e->set_num_blocks(GetSize(sys_) / manifest_.block_size());
+    AddOperation(sys_);
 
     // Execute the update.
     ASSERT_TRUE(sm->BeginUpdate());
@@ -1477,10 +1482,7 @@
 
     const auto block_size = manifest_.block_size();
     SetSize(sys_, partition_size);
-
-    auto e = sys_->add_operations()->add_dst_extents();
-    e->set_start_block(0);
-    e->set_num_blocks(data_size / block_size);
+    AddOperation(sys_, data_size);
 
     // Set hastree extents.
     sys_->mutable_hash_tree_data_extent()->set_start_block(0);
@@ -1525,9 +1527,7 @@
     const auto actual_write_size = GetSize(sys_);
     const auto declared_write_size = actual_write_size - 1_MiB;
 
-    auto e = sys_->add_operations()->add_dst_extents();
-    e->set_start_block(0);
-    e->set_num_blocks(declared_write_size / manifest_.block_size());
+    AddOperation(sys_, declared_write_size);
 
     // Execute the update.
     ASSERT_TRUE(sm->BeginUpdate());
@@ -1546,6 +1546,45 @@
             << "FinishedSnapshotWrites should detect overflow of CoW device.";
 }
 
+TEST_F(SnapshotUpdateTest, WaitForMerge) {
+    AddOperationForPartitions();
+
+    // Execute the update.
+    ASSERT_TRUE(sm->BeginUpdate());
+    ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
+
+    // Write some data to target partitions.
+    for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) {
+        ASSERT_TRUE(WriteSnapshotAndHash(name));
+    }
+
+    ASSERT_TRUE(sm->FinishedSnapshotWrites());
+
+    // Simulate shutting down the device.
+    ASSERT_TRUE(UnmapAll());
+
+    // After reboot, init does first stage mount.
+    {
+        auto init = SnapshotManager::NewForFirstStageMount(new TestDeviceInfo(fake_super, "_b"));
+        ASSERT_NE(nullptr, init);
+        ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super"));
+    }
+
+    auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, "_b"));
+    ASSERT_NE(nullptr, new_sm);
+
+    auto waiter = std::async(std::launch::async, [&new_sm] { return new_sm->WaitForMerge(); });
+    ASSERT_EQ(std::future_status::timeout, waiter.wait_for(1s))
+            << "WaitForMerge should block when not initiated";
+
+    auto merger =
+            std::async(std::launch::async, [&new_sm] { return new_sm->InitiateMergeAndWait(); });
+    // Small images, so should be merged pretty quickly.
+    ASSERT_EQ(std::future_status::ready, waiter.wait_for(3s)) << "WaitForMerge did not finish";
+    ASSERT_TRUE(waiter.get());
+    ASSERT_THAT(merger.get(), AnyOf(UpdateState::None, UpdateState::MergeCompleted));
+}
+
 class FlashAfterUpdateTest : public SnapshotUpdateTest,
                              public WithParamInterface<std::tuple<uint32_t, bool>> {
   public: