releasetools: Detect incomplete block ranges.

This CL detects incomplete block ranges (e.g. due to the holes in
mke2fs created images). Such block ranges will be tagged, so we won't
attempt to imgdiff those files. Note that the change to blockimgdiff.py,
which uses the tag info, will come in a separate CL.

An 'extra' attribute is added to RangeSet class, which defaults to an
empty dict. An 'incomplete' tag will be added into the dict by the
caller of the class. Not adding this tag as an immediate attribute,
because it is not a property regarding the ranges being represented, but
rather some storage space for the caller.

This CL also refactors GetSparseImage and RoundUpTo4K into common.py, so
the same code can be called from both of ota_from_target_files.py and
validate_target_files.py. Not able to add unittests for
GetSparseImage(), as SparseImage requires data in specific format.

Bug: 68016761
Test: Run validate_target_files.py on target-files.zip. It skips
      validating files with missing holes as before.
Test: Run ota_from_target_files.py on angler target-files.zip. It gives
      identical packages w/ and w/o the CL.
Test: pylint on changed files. There're warnings with common.py, but
      unrelated to this change.
Change-Id: I126ccfea13c0d5ebcc8c1b4ff1a4f9200e97423a
diff --git a/tools/releasetools/common.py b/tools/releasetools/common.py
index d0ee6ae..d09f60c 100644
--- a/tools/releasetools/common.py
+++ b/tools/releasetools/common.py
@@ -25,6 +25,7 @@
 import re
 import shlex
 import shutil
+import string
 import subprocess
 import sys
 import tempfile
@@ -34,6 +35,7 @@
 from hashlib import sha1, sha256
 
 import blockimgdiff
+import sparse_img
 
 class Options(object):
   def __init__(self):
@@ -124,6 +126,11 @@
   return subprocess.Popen(args, **kwargs)
 
 
+def RoundUpTo4K(value):
+  rounded_up = value + 4095
+  return rounded_up - (rounded_up % 4096)
+
+
 def CloseInheritedPipes():
   """ Gmake in MAC OS has file descriptor (PIPE) leak. We close those fds
   before doing other work."""
@@ -618,6 +625,56 @@
   return tmp, zipfile.ZipFile(filename, "r")
 
 
+def GetSparseImage(which, tmpdir, input_zip):
+  """Returns a SparseImage object suitable for passing to BlockImageDiff.
+
+  This function loads the specified sparse image from the given path, and
+  performs additional processing for OTA purpose. For example, it always adds
+  block 0 to clobbered blocks list. It also detects files that cannot be
+  reconstructed from the block list, for whom we should avoid applying imgdiff.
+
+  Args:
+    which: The partition name, which must be "system" or "vendor".
+    tmpdir: The directory that contains the prebuilt image and block map file.
+    input_zip: The target-files ZIP archive.
+
+  Returns:
+    A SparseImage object, with file_map info loaded.
+  """
+  assert which in ("system", "vendor")
+
+  path = os.path.join(tmpdir, "IMAGES", which + ".img")
+  mappath = os.path.join(tmpdir, "IMAGES", which + ".map")
+
+  # The image and map files must have been created prior to calling
+  # ota_from_target_files.py (since LMP).
+  assert os.path.exists(path) and os.path.exists(mappath)
+
+  # In ext4 filesystems, block 0 might be changed even being mounted R/O. We add
+  # it to clobbered_blocks so that it will be written to the target
+  # unconditionally. Note that they are still part of care_map. (Bug: 20939131)
+  clobbered_blocks = "0"
+
+  image = sparse_img.SparseImage(path, mappath, clobbered_blocks)
+
+  # block.map may contain less blocks, because mke2fs may skip allocating blocks
+  # if they contain all zeros. We can't reconstruct such a file from its block
+  # list. Tag such entries accordingly. (Bug: 65213616)
+  for entry in image.file_map:
+    # "/system/framework/am.jar" => "SYSTEM/framework/am.jar".
+    arcname = string.replace(entry, which, which.upper(), 1)[1:]
+    # Skip artificial names, such as "__ZERO", "__NONZERO-1".
+    if arcname not in input_zip.namelist():
+      continue
+
+    info = input_zip.getinfo(arcname)
+    ranges = image.file_map[entry]
+    if RoundUpTo4K(info.file_size) > ranges.size() * 4096:
+      ranges.extra['incomplete'] = True
+
+  return image
+
+
 def GetKeyPasswords(keylist):
   """Given a list of keys, prompt the user to enter passwords for
   those which require them.  Return a {key: password} dict.  password
diff --git a/tools/releasetools/ota_from_target_files.py b/tools/releasetools/ota_from_target_files.py
index 95b7303..0044a87 100755
--- a/tools/releasetools/ota_from_target_files.py
+++ b/tools/releasetools/ota_from_target_files.py
@@ -140,7 +140,6 @@
 
 import common
 import edify_generator
-import sparse_img
 
 if sys.hexversion < 0x02070000:
   print("Python 2.7 or newer is required.", file=sys.stderr)
@@ -452,31 +451,6 @@
         source_info.GetBuildProp("ro.build.thumbprint"))
 
 
-def GetImage(which, tmpdir):
-  """Returns an image object suitable for passing to BlockImageDiff.
-
-  'which' partition must be "system" or "vendor". A prebuilt image and file
-  map must already exist in tmpdir.
-  """
-
-  assert which in ("system", "vendor")
-
-  path = os.path.join(tmpdir, "IMAGES", which + ".img")
-  mappath = os.path.join(tmpdir, "IMAGES", which + ".map")
-
-  # The image and map files must have been created prior to calling
-  # ota_from_target_files.py (since LMP).
-  assert os.path.exists(path) and os.path.exists(mappath)
-
-  # Bug: http://b/20939131
-  # In ext4 filesystems, block 0 might be changed even being mounted
-  # R/O. We add it to clobbered_blocks so that it will be written to the
-  # target unconditionally. Note that they are still part of care_map.
-  clobbered_blocks = "0"
-
-  return sparse_img.SparseImage(path, mappath, clobbered_blocks)
-
-
 def AddCompatibilityArchiveIfTrebleEnabled(target_zip, output_zip, target_info,
                                            source_info=None):
   """Adds compatibility info into the output zip if it's Treble-enabled target.
@@ -662,7 +636,7 @@
   # has the effect of writing new data from the package to the entire
   # partition, but lets us reuse the updater code that writes incrementals to
   # do it.
-  system_tgt = GetImage("system", OPTIONS.input_tmp)
+  system_tgt = common.GetSparseImage("system", OPTIONS.input_tmp, input_zip)
   system_tgt.ResetFileMap()
   system_diff = common.BlockDifference("system", system_tgt, src=None)
   system_diff.WriteScript(script, output_zip)
@@ -673,7 +647,7 @@
   if HasVendorPartition(input_zip):
     script.ShowProgress(0.1, 0)
 
-    vendor_tgt = GetImage("vendor", OPTIONS.input_tmp)
+    vendor_tgt = common.GetSparseImage("vendor", OPTIONS.input_tmp, input_zip)
     vendor_tgt.ResetFileMap()
     vendor_diff = common.BlockDifference("vendor", vendor_tgt)
     vendor_diff.WriteScript(script, output_zip)
@@ -846,8 +820,8 @@
   target_recovery = common.GetBootableImage(
       "/tmp/recovery.img", "recovery.img", OPTIONS.target_tmp, "RECOVERY")
 
-  system_src = GetImage("system", OPTIONS.source_tmp)
-  system_tgt = GetImage("system", OPTIONS.target_tmp)
+  system_src = common.GetSparseImage("system", OPTIONS.source_tmp, source_zip)
+  system_tgt = common.GetSparseImage("system", OPTIONS.target_tmp, target_zip)
 
   blockimgdiff_version = max(
       int(i) for i in target_info.get("blockimgdiff_versions", "1").split(","))
@@ -872,8 +846,8 @@
   if HasVendorPartition(target_zip):
     if not HasVendorPartition(source_zip):
       raise RuntimeError("can't generate incremental that adds /vendor")
-    vendor_src = GetImage("vendor", OPTIONS.source_tmp)
-    vendor_tgt = GetImage("vendor", OPTIONS.target_tmp)
+    vendor_src = common.GetSparseImage("vendor", OPTIONS.source_tmp, source_zip)
+    vendor_tgt = common.GetSparseImage("vendor", OPTIONS.target_tmp, target_zip)
 
     # Check first block of vendor partition for remount R/W only if
     # disk type is ext4
diff --git a/tools/releasetools/rangelib.py b/tools/releasetools/rangelib.py
index 87380a5..8af61c3 100644
--- a/tools/releasetools/rangelib.py
+++ b/tools/releasetools/rangelib.py
@@ -25,6 +25,7 @@
 
   def __init__(self, data=None):
     self.monotonic = False
+    self._extra = {}
     if isinstance(data, str):
       self._parse_internal(data)
     elif data:
@@ -56,6 +57,10 @@
   def __repr__(self):
     return '<RangeSet("' + self.to_string() + '")>'
 
+  @property
+  def extra(self):
+    return self._extra
+
   @classmethod
   def parse(cls, text):
     """Parse a text string consisting of a space-separated list of
diff --git a/tools/releasetools/validate_target_files.py b/tools/releasetools/validate_target_files.py
index b590392..1b3eb73 100755
--- a/tools/releasetools/validate_target_files.py
+++ b/tools/releasetools/validate_target_files.py
@@ -29,35 +29,17 @@
 import sys
 
 import common
-import sparse_img
-
-
-def _GetImage(which, tmpdir):
-  assert which in ('system', 'vendor')
-
-  path = os.path.join(tmpdir, 'IMAGES', which + '.img')
-  mappath = os.path.join(tmpdir, 'IMAGES', which + '.map')
-
-  # Map file must exist (allowed to be empty).
-  assert os.path.exists(path) and os.path.exists(mappath)
-
-  clobbered_blocks = '0'
-  return sparse_img.SparseImage(path, mappath, clobbered_blocks)
 
 
 def _ReadFile(file_name, unpacked_name, round_up=False):
   """Constructs and returns a File object. Rounds up its size if needed."""
 
-  def RoundUpTo4K(value):
-    rounded_up = value + 4095
-    return rounded_up - (rounded_up % 4096)
-
   assert os.path.exists(unpacked_name)
   with open(unpacked_name, 'r') as f:
     file_data = f.read()
   file_size = len(file_data)
   if round_up:
-    file_size_rounded_up = RoundUpTo4K(file_size)
+    file_size_rounded_up = common.RoundUpTo4K(file_size)
     file_data += '\0' * (file_size_rounded_up - file_size)
   return common.File(file_name, file_data)
 
@@ -79,33 +61,28 @@
 
   def CheckAllFiles(which):
     logging.info('Checking %s image.', which)
-    image = _GetImage(which, input_tmp)
+    image = common.GetSparseImage(which, input_tmp, input_zip)
     prefix = '/' + which
     for entry in image.file_map:
+      # Skip entries like '__NONZERO-0'.
       if not entry.startswith(prefix):
         continue
 
       # Read the blocks that the file resides. Note that it will contain the
       # bytes past the file length, which is expected to be padded with '\0's.
       ranges = image.file_map[entry]
+
+      incomplete = ranges.extra.get('incomplete', False)
+      if incomplete:
+        logging.warning('Skipping %s that has incomplete block list', entry)
+        continue
+
       blocks_sha1 = image.RangeSha1(ranges)
 
       # The filename under unpacked directory, such as SYSTEM/bin/sh.
       unpacked_name = os.path.join(
           input_tmp, which.upper(), entry[(len(prefix) + 1):])
       unpacked_file = _ReadFile(entry, unpacked_name, True)
-      file_size = unpacked_file.size
-
-      # block.map may contain less blocks, because mke2fs may skip allocating
-      # blocks if they contain all zeros. We can't reconstruct such a file from
-      # its block list. (Bug: 65213616)
-      if file_size > ranges.size() * 4096:
-        logging.warning(
-            'Skipping %s that has less blocks: file size %d-byte,'
-            ' ranges %s (%d-byte)', entry, file_size, ranges,
-            ranges.size() * 4096)
-        continue
-
       file_sha1 = unpacked_file.sha1
       assert blocks_sha1 == file_sha1, \
           'file: %s, range: %s, blocks_sha1: %s, file_sha1: %s' % (