Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2018 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #define LOG_TAG "apexd" |
| 18 | |
| 19 | #include "apexd_loop.h" |
| 20 | |
Martijn Coenen | 22613b7 | 2020-08-04 12:02:43 +0200 | [diff] [blame] | 21 | #include <mutex> |
| 22 | |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 23 | #include <dirent.h> |
| 24 | #include <fcntl.h> |
| 25 | #include <linux/fs.h> |
| 26 | #include <linux/loop.h> |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 27 | #include <sys/ioctl.h> |
| 28 | #include <sys/stat.h> |
| 29 | #include <sys/types.h> |
| 30 | #include <unistd.h> |
| 31 | |
| 32 | #include <android-base/file.h> |
| 33 | #include <android-base/logging.h> |
| 34 | #include <android-base/stringprintf.h> |
| 35 | #include <android-base/strings.h> |
| 36 | |
Jiyong Park | d8a82ce | 2019-02-25 02:21:18 +0900 | [diff] [blame] | 37 | #include "apexd_utils.h" |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 38 | #include "string_log.h" |
| 39 | |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 40 | using android::base::Error; |
| 41 | using android::base::Result; |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 42 | using android::base::StartsWith; |
| 43 | using android::base::StringPrintf; |
| 44 | using android::base::unique_fd; |
| 45 | |
Martijn Coenen | 22613b7 | 2020-08-04 12:02:43 +0200 | [diff] [blame] | 46 | #ifndef LOOP_CONFIGURE |
| 47 | // These can be removed whenever we pull in the Linux v5.8 UAPI headers |
| 48 | struct loop_config { |
| 49 | __u32 fd; |
| 50 | __u32 block_size; |
| 51 | struct loop_info64 info; |
| 52 | __u64 __reserved[8]; |
| 53 | }; |
| 54 | #define LOOP_CONFIGURE 0x4C0A |
| 55 | #endif |
| 56 | |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 57 | namespace android { |
| 58 | namespace apex { |
| 59 | namespace loop { |
| 60 | |
| 61 | static constexpr const char* kApexLoopIdPrefix = "apex:"; |
| 62 | |
| 63 | // 128 kB read-ahead, which we currently use for /system as well |
| 64 | static constexpr const char* kReadAheadKb = "128"; |
| 65 | |
Andreas Gampe | d69625c | 2019-01-18 12:41:11 -0800 | [diff] [blame] | 66 | // TODO(b/122059364): Even though the kernel has created the loop |
| 67 | // device, we still depend on ueventd to run to actually create the |
| 68 | // device node in userspace. To solve this properly we should listen on |
| 69 | // the netlink socket for uevents, or use inotify. For now, this will |
| 70 | // have to do. |
| 71 | static constexpr size_t kLoopDeviceRetryAttempts = 3u; |
| 72 | |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 73 | void LoopbackDeviceUniqueFd::MaybeCloseBad() { |
| 74 | if (device_fd.get() != -1) { |
| 75 | // Disassociate any files. |
| 76 | if (ioctl(device_fd.get(), LOOP_CLR_FD) == -1) { |
| 77 | PLOG(ERROR) << "Unable to clear fd for loopback device"; |
| 78 | } |
| 79 | } |
| 80 | } |
| 81 | |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 82 | Result<void> configureReadAhead(const std::string& device_path) { |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 83 | auto pos = device_path.find("/dev/block/"); |
| 84 | if (pos != 0) { |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 85 | return Error() << "Device path does not start with /dev/block."; |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 86 | } |
Nikita Ioffe | 6bea4e5 | 2019-02-10 22:46:05 +0000 | [diff] [blame] | 87 | pos = device_path.find_last_of('/'); |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 88 | std::string device_name = device_path.substr(pos + 1, std::string::npos); |
| 89 | |
| 90 | std::string sysfs_device = |
| 91 | StringPrintf("/sys/block/%s/queue/read_ahead_kb", device_name.c_str()); |
| 92 | unique_fd sysfs_fd(open(sysfs_device.c_str(), O_RDWR | O_CLOEXEC)); |
| 93 | if (sysfs_fd.get() == -1) { |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 94 | return ErrnoError() << "Failed to open " << sysfs_device; |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 95 | } |
| 96 | |
| 97 | int ret = TEMP_FAILURE_RETRY( |
| 98 | write(sysfs_fd.get(), kReadAheadKb, strlen(kReadAheadKb) + 1)); |
| 99 | if (ret < 0) { |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 100 | return ErrnoError() << "Failed to write to " << sysfs_device; |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 101 | } |
| 102 | |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 103 | return {}; |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 104 | } |
| 105 | |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 106 | Result<void> preAllocateLoopDevices(size_t num) { |
| 107 | Result<void> loopReady = WaitForFile("/dev/loop-control", 20s); |
Bernie Innocenti | d04d5d0 | 2020-02-06 22:01:51 +0900 | [diff] [blame] | 108 | if (!loopReady.ok()) { |
Jiyong Park | d8a82ce | 2019-02-25 02:21:18 +0900 | [diff] [blame] | 109 | return loopReady; |
| 110 | } |
Jiyong Park | 4d0f832 | 2019-02-02 19:45:57 +0900 | [diff] [blame] | 111 | unique_fd ctl_fd( |
| 112 | TEMP_FAILURE_RETRY(open("/dev/loop-control", O_RDWR | O_CLOEXEC))); |
| 113 | if (ctl_fd.get() == -1) { |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 114 | return ErrnoError() << "Failed to open loop-control"; |
Jiyong Park | 4d0f832 | 2019-02-02 19:45:57 +0900 | [diff] [blame] | 115 | } |
| 116 | |
Jooyung Han | eeafec4 | 2019-03-21 01:54:16 +0900 | [diff] [blame] | 117 | // Assumption: loop device ID [0..num) is valid. |
| 118 | // This is because pre-allocation happens during bootstrap. |
| 119 | // Anyway Kernel pre-allocated loop devices |
| 120 | // as many as CONFIG_BLK_DEV_LOOP_MIN_COUNT, |
| 121 | // Within the amount of kernel-pre-allocation, |
| 122 | // LOOP_CTL_ADD will fail with EEXIST |
| 123 | for (size_t id = 0ul; id < num; ++id) { |
| 124 | int ret = ioctl(ctl_fd.get(), LOOP_CTL_ADD, id); |
| 125 | if (ret < 0 && errno != EEXIST) { |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 126 | return ErrnoError() << "Failed LOOP_CTL_ADD"; |
Jiyong Park | 4d0f832 | 2019-02-02 19:45:57 +0900 | [diff] [blame] | 127 | } |
| 128 | } |
Jooyung Han | eeafec4 | 2019-03-21 01:54:16 +0900 | [diff] [blame] | 129 | |
Jiyong Park | 4d0f832 | 2019-02-02 19:45:57 +0900 | [diff] [blame] | 130 | // Don't wait until the dev nodes are actually created, which |
| 131 | // will delay the boot. By simply returing here, the creation of the dev |
| 132 | // nodes will be done in parallel with other boot processes, and we |
| 133 | // just optimistally hope that they are all created when we actually |
| 134 | // access them for activating APEXes. If the dev nodes are not ready |
| 135 | // even then, we wait 50ms and warning message will be printed (see below |
| 136 | // createLoopDevice()). |
| 137 | LOG(INFO) << "Pre-allocated " << num << " loopback devices"; |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 138 | return {}; |
Jiyong Park | 4d0f832 | 2019-02-02 19:45:57 +0900 | [diff] [blame] | 139 | } |
| 140 | |
Martijn Coenen | 22613b7 | 2020-08-04 12:02:43 +0200 | [diff] [blame] | 141 | Result<void> configureLoopDevice(const int device_fd, const std::string& target, |
| 142 | const int32_t imageOffset, |
| 143 | const size_t imageSize) { |
| 144 | static bool useLoopConfigure; |
| 145 | static std::once_flag onceFlag; |
| 146 | std::call_once(onceFlag, [&]() { |
| 147 | // LOOP_CONFIGURE is a new ioctl in Linux 5.8 (and backported in Android |
| 148 | // common) that allows atomically configuring a loop device. It is a lot |
| 149 | // faster than the traditional LOOP_SET_FD/LOOP_SET_STATUS64 combo, but |
| 150 | // it may not be available on updating devices, so try once before |
| 151 | // deciding. |
| 152 | struct loop_config config; |
| 153 | memset(&config, 0, sizeof(config)); |
| 154 | config.fd = -1; |
| 155 | if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1 && errno == EBADF) { |
| 156 | // If the IOCTL exists, it will fail with EBADF for the -1 fd |
| 157 | useLoopConfigure = true; |
| 158 | } |
| 159 | }); |
| 160 | |
| 161 | /* |
| 162 | * Using O_DIRECT will tell the kernel that we want to use Direct I/O |
| 163 | * on the underlying file, which we want to do to avoid double caching. |
| 164 | * Note that Direct I/O won't be enabled immediately, because the block |
| 165 | * size of the underlying block device may not match the default loop |
| 166 | * device block size (512); when we call LOOP_SET_BLOCK_SIZE below, the |
| 167 | * kernel driver will automatically enable Direct I/O when it sees that |
| 168 | * condition is now met. |
| 169 | */ |
| 170 | unique_fd target_fd(open(target.c_str(), O_RDONLY | O_CLOEXEC | O_DIRECT)); |
| 171 | if (target_fd.get() == -1) { |
| 172 | return ErrnoError() << "Failed to open " << target; |
| 173 | } |
| 174 | |
| 175 | struct loop_info64 li; |
| 176 | memset(&li, 0, sizeof(li)); |
| 177 | strlcpy((char*)li.lo_crypt_name, kApexLoopIdPrefix, LO_NAME_SIZE); |
| 178 | li.lo_offset = imageOffset; |
| 179 | li.lo_sizelimit = imageSize; |
| 180 | |
| 181 | if (useLoopConfigure) { |
| 182 | struct loop_config config; |
| 183 | memset(&config, 0, sizeof(config)); |
| 184 | li.lo_flags |= LO_FLAGS_DIRECT_IO; |
| 185 | config.fd = target_fd.get(); |
| 186 | config.info = li; |
| 187 | config.block_size = 4096; |
| 188 | |
| 189 | if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1) { |
| 190 | return ErrnoError() << "Failed to LOOP_CONFIGURE"; |
| 191 | } |
| 192 | |
| 193 | return {}; |
| 194 | } else { |
| 195 | if (ioctl(device_fd, LOOP_SET_FD, target_fd.get()) == -1) { |
| 196 | return ErrnoError() << "Failed to LOOP_SET_FD"; |
| 197 | } |
| 198 | |
| 199 | if (ioctl(device_fd, LOOP_SET_STATUS64, &li) == -1) { |
| 200 | return ErrnoError() << "Failed to LOOP_SET_STATUS64"; |
| 201 | } |
| 202 | |
| 203 | if (ioctl(device_fd, BLKFLSBUF, 0) == -1) { |
| 204 | // This works around a kernel bug where the following happens. |
| 205 | // 1) The device runs with a value of loop.max_part > 0 |
| 206 | // 2) As part of LOOP_SET_FD above, we do a partition scan, which loads |
| 207 | // the first 2 pages of the underlying file into the buffer cache |
| 208 | // 3) When we then change the offset with LOOP_SET_STATUS64, those pages |
| 209 | // are not invalidated from the cache. |
| 210 | // 4) When we try to mount an ext4 filesystem on the loop device, the ext4 |
| 211 | // code will try to find a superblock by reading 4k at offset 0; but, |
| 212 | // because we still have the old pages at offset 0 lying in the cache, |
| 213 | // those pages will be returned directly. However, those pages contain |
| 214 | // the data at offset 0 in the underlying file, not at the offset that |
| 215 | // we configured |
| 216 | // 5) the ext4 driver fails to find a superblock in the (wrong) data, and |
| 217 | // fails to mount the filesystem. |
| 218 | // |
| 219 | // To work around this, explicitly flush the block device, which will |
| 220 | // flush the buffer cache and make sure we actually read the data at the |
| 221 | // correct offset. |
| 222 | return ErrnoError() << "Failed to flush buffers on the loop device"; |
| 223 | } |
| 224 | |
| 225 | // Direct-IO requires the loop device to have the same block size as the |
| 226 | // underlying filesystem. |
| 227 | if (ioctl(device_fd, LOOP_SET_BLOCK_SIZE, 4096) == -1) { |
| 228 | PLOG(WARNING) << "Failed to LOOP_SET_BLOCK_SIZE"; |
| 229 | } |
| 230 | } |
| 231 | return {}; |
| 232 | } |
| 233 | |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 234 | Result<LoopbackDeviceUniqueFd> createLoopDevice(const std::string& target, |
| 235 | const int32_t imageOffset, |
| 236 | const size_t imageSize) { |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 237 | unique_fd ctl_fd(open("/dev/loop-control", O_RDWR | O_CLOEXEC)); |
| 238 | if (ctl_fd.get() == -1) { |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 239 | return ErrnoError() << "Failed to open loop-control"; |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 240 | } |
| 241 | |
| 242 | int num = ioctl(ctl_fd.get(), LOOP_CTL_GET_FREE); |
| 243 | if (num == -1) { |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 244 | return ErrnoError() << "Failed LOOP_CTL_GET_FREE"; |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 245 | } |
| 246 | |
| 247 | std::string device = StringPrintf("/dev/block/loop%d", num); |
| 248 | |
Andreas Gampe | d69625c | 2019-01-18 12:41:11 -0800 | [diff] [blame] | 249 | LoopbackDeviceUniqueFd device_fd; |
| 250 | { |
| 251 | // See comment on kLoopDeviceRetryAttempts. |
| 252 | unique_fd sysfs_fd; |
| 253 | for (size_t i = 0; i != kLoopDeviceRetryAttempts; ++i) { |
| 254 | sysfs_fd.reset(open(device.c_str(), O_RDWR | O_CLOEXEC)); |
| 255 | if (sysfs_fd.get() != -1) { |
| 256 | break; |
| 257 | } |
Jiyong Park | 4d0f832 | 2019-02-02 19:45:57 +0900 | [diff] [blame] | 258 | PLOG(WARNING) << "Loopback device " << device |
| 259 | << " not ready. Waiting 50ms..."; |
Andreas Gampe | d69625c | 2019-01-18 12:41:11 -0800 | [diff] [blame] | 260 | usleep(50000); |
| 261 | } |
| 262 | if (sysfs_fd.get() == -1) { |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 263 | return ErrnoError() << "Failed to open " << device; |
Andreas Gampe | d69625c | 2019-01-18 12:41:11 -0800 | [diff] [blame] | 264 | } |
| 265 | device_fd = LoopbackDeviceUniqueFd(std::move(sysfs_fd), device); |
| 266 | CHECK_NE(device_fd.get(), -1); |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 267 | } |
| 268 | |
Martijn Coenen | 22613b7 | 2020-08-04 12:02:43 +0200 | [diff] [blame] | 269 | Result<void> configureStatus = |
| 270 | configureLoopDevice(device_fd.get(), target, imageOffset, imageSize); |
| 271 | if (!configureStatus.ok()) { |
| 272 | return configureStatus.error(); |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 273 | } |
| 274 | |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 275 | Result<void> readAheadStatus = configureReadAhead(device); |
Bernie Innocenti | d04d5d0 | 2020-02-06 22:01:51 +0900 | [diff] [blame] | 276 | if (!readAheadStatus.ok()) { |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 277 | return readAheadStatus.error(); |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 278 | } |
Mohammad Samiul Islam | bd6ab0f | 2019-06-20 15:55:27 +0100 | [diff] [blame] | 279 | return device_fd; |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 280 | } |
| 281 | |
Nikita Ioffe | 6bea4e5 | 2019-02-10 22:46:05 +0000 | [diff] [blame] | 282 | void DestroyLoopDevice(const std::string& path, const DestroyLoopFn& extra) { |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 283 | unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC)); |
| 284 | if (fd.get() == -1) { |
| 285 | if (errno != ENOENT) { |
| 286 | PLOG(WARNING) << "Failed to open " << path; |
| 287 | } |
| 288 | return; |
| 289 | } |
| 290 | |
| 291 | struct loop_info64 li; |
| 292 | if (ioctl(fd.get(), LOOP_GET_STATUS64, &li) < 0) { |
| 293 | if (errno != ENXIO) { |
| 294 | PLOG(WARNING) << "Failed to LOOP_GET_STATUS64 " << path; |
| 295 | } |
| 296 | return; |
| 297 | } |
| 298 | |
| 299 | auto id = std::string((char*)li.lo_crypt_name); |
| 300 | if (StartsWith(id, kApexLoopIdPrefix)) { |
| 301 | extra(path, id); |
| 302 | |
| 303 | if (ioctl(fd.get(), LOOP_CLR_FD, 0) < 0) { |
| 304 | PLOG(WARNING) << "Failed to LOOP_CLR_FD " << path; |
| 305 | } |
| 306 | } |
| 307 | } |
| 308 | |
Andreas Gampe | 225e1b0 | 2019-01-15 14:53:24 -0800 | [diff] [blame] | 309 | } // namespace loop |
| 310 | } // namespace apex |
| 311 | } // namespace android |