Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2015 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include <fcntl.h> |
| 18 | #include <stdlib.h> |
| 19 | #include <sys/mman.h> |
| 20 | |
| 21 | extern "C" { |
| 22 | #include <fec.h> |
| 23 | } |
| 24 | |
| 25 | #include "fec_private.h" |
| 26 | |
| 27 | using rs_unique_ptr = std::unique_ptr<void, decltype(&free_rs_char)>; |
| 28 | |
| 29 | /* prints a hexdump of `data' using warn(...) */ |
| 30 | static void dump(const char *name, uint64_t value, const uint8_t *data, |
| 31 | size_t size) |
| 32 | { |
| 33 | const int bytes_per_line = 16; |
| 34 | char hex[bytes_per_line * 3 + 1]; |
| 35 | char prn[bytes_per_line + 1]; |
| 36 | |
| 37 | warn("%s (%" PRIu64 ") (%zu bytes):", name ? name : "", value, size); |
| 38 | |
| 39 | if (!data) { |
| 40 | warn(" (null)"); |
| 41 | return; |
| 42 | } |
| 43 | |
| 44 | for (size_t n = 0; n < size; n += bytes_per_line) { |
| 45 | memset(hex, 0, sizeof(hex)); |
| 46 | memset(prn, 0, sizeof(prn)); |
| 47 | |
| 48 | for (size_t m = 0; m < bytes_per_line; ++m) { |
| 49 | if (n + m < size) { |
George Burgess IV | ac4f7e7 | 2016-03-02 14:15:49 -0800 | [diff] [blame] | 50 | ptrdiff_t offset = &hex[m * 3] - hex; |
| 51 | snprintf(hex + offset, sizeof(hex) - offset, "%02x ", |
| 52 | data[n + m]); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 53 | |
| 54 | if (isprint(data[n + m])) { |
| 55 | prn[m] = data[n + m]; |
| 56 | } else { |
| 57 | prn[m] = '.'; |
| 58 | } |
| 59 | } else { |
| 60 | strcpy(&hex[m * 3], " "); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | warn(" %04zu %s %s", n, hex, prn); |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | /* checks if `offset' is within a corrupted block */ |
| 69 | static inline bool is_erasure(fec_handle *f, uint64_t offset, |
| 70 | const uint8_t *data) |
| 71 | { |
| 72 | if (unlikely(offset >= f->data_size)) { |
| 73 | return false; |
| 74 | } |
| 75 | |
| 76 | /* ideally, we would like to know if a specific byte on this block has |
| 77 | been corrupted, but knowing whether any of them is can be useful as |
| 78 | well, because often the entire block is corrupted */ |
| 79 | |
| 80 | uint64_t n = offset / FEC_BLOCKSIZE; |
| 81 | |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 82 | return !f->hashtree().check_block_hash_with_index(n, data); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 83 | } |
| 84 | |
| 85 | /* check if `offset' is within a block expected to contain zeros */ |
| 86 | static inline bool is_zero(fec_handle *f, uint64_t offset) |
| 87 | { |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 88 | auto hashtree = f->hashtree(); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 89 | |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 90 | if (hashtree.hash_data.empty() || unlikely(offset >= f->data_size)) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 91 | return false; |
| 92 | } |
| 93 | |
| 94 | uint64_t hash_offset = (offset / FEC_BLOCKSIZE) * SHA256_DIGEST_LENGTH; |
| 95 | |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 96 | if (unlikely(hash_offset > |
| 97 | hashtree.hash_data.size() - SHA256_DIGEST_LENGTH)) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 98 | return false; |
| 99 | } |
| 100 | |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 101 | return !memcmp(hashtree.zero_hash.data(), &hashtree.hash_data[hash_offset], |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 102 | SHA256_DIGEST_LENGTH); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 103 | } |
| 104 | |
| 105 | /* reads and decodes a single block starting from `offset', returns the number |
| 106 | of bytes corrected in `errors' */ |
| 107 | static int __ecc_read(fec_handle *f, void *rs, uint8_t *dest, uint64_t offset, |
| 108 | bool use_erasures, uint8_t *ecc_data, size_t *errors) |
| 109 | { |
| 110 | check(offset % FEC_BLOCKSIZE == 0); |
| 111 | ecc_info *e = &f->ecc; |
| 112 | |
| 113 | /* reverse interleaving: calculate the RS block that includes the requested |
| 114 | offset */ |
| 115 | uint64_t rsb = offset - (offset / (e->rounds * FEC_BLOCKSIZE)) * |
| 116 | e->rounds * FEC_BLOCKSIZE; |
| 117 | int data_index = -1; |
| 118 | int erasures[e->rsn]; |
| 119 | int neras = 0; |
| 120 | |
| 121 | /* verity is required to check for erasures */ |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 122 | check(!use_erasures || !f->hashtree().hash_data.empty()); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 123 | |
| 124 | for (int i = 0; i < e->rsn; ++i) { |
| 125 | uint64_t interleaved = fec_ecc_interleave(rsb * e->rsn + i, e->rsn, |
| 126 | e->rounds); |
| 127 | |
| 128 | if (interleaved == offset) { |
| 129 | data_index = i; |
| 130 | } |
| 131 | |
Sami Tolvanen | e9677cc | 2015-12-17 13:36:03 +0000 | [diff] [blame] | 132 | /* to improve our chances of correcting IO errors, initialize the |
| 133 | buffer to zeros even if we are going to read to it later */ |
| 134 | uint8_t bbuf[FEC_BLOCKSIZE] = {0}; |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 135 | |
Sami Tolvanen | e9677cc | 2015-12-17 13:36:03 +0000 | [diff] [blame] | 136 | if (likely(interleaved < e->start) && !is_zero(f, interleaved)) { |
| 137 | /* copy raw data to reconstruct the RS block */ |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 138 | if (!raw_pread(f->fd, bbuf, FEC_BLOCKSIZE, interleaved)) { |
Sami Tolvanen | e9677cc | 2015-12-17 13:36:03 +0000 | [diff] [blame] | 139 | warn("failed to read: %s", strerror(errno)); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 140 | |
Sami Tolvanen | e9677cc | 2015-12-17 13:36:03 +0000 | [diff] [blame] | 141 | /* treat errors as corruption */ |
| 142 | if (use_erasures && neras <= e->roots) { |
| 143 | erasures[neras++] = i; |
| 144 | } |
| 145 | } else if (use_erasures && neras <= e->roots && |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 146 | is_erasure(f, interleaved, bbuf)) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 147 | erasures[neras++] = i; |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | for (int j = 0; j < FEC_BLOCKSIZE; ++j) { |
| 152 | ecc_data[j * FEC_RSM + i] = bbuf[j]; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | check(data_index >= 0); |
| 157 | |
| 158 | size_t nerrs = 0; |
| 159 | uint8_t copy[FEC_RSM]; |
| 160 | |
| 161 | for (int i = 0; i < FEC_BLOCKSIZE; ++i) { |
| 162 | /* copy parity data */ |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 163 | if (!raw_pread(f->fd, &ecc_data[i * FEC_RSM + e->rsn], e->roots, |
| 164 | e->start + (i + rsb) * e->roots)) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 165 | error("failed to read ecc data: %s", strerror(errno)); |
| 166 | return -1; |
| 167 | } |
| 168 | |
| 169 | /* for debugging decoding failures, because decode_rs_char can mangle |
| 170 | ecc_data */ |
| 171 | if (unlikely(use_erasures)) { |
| 172 | memcpy(copy, &ecc_data[i * FEC_RSM], FEC_RSM); |
| 173 | } |
| 174 | |
| 175 | /* decode */ |
| 176 | int rc = decode_rs_char(rs, &ecc_data[i * FEC_RSM], erasures, neras); |
| 177 | |
| 178 | if (unlikely(rc < 0)) { |
| 179 | if (use_erasures) { |
| 180 | error("RS block %" PRIu64 ": decoding failed (%d erasures)", |
| 181 | rsb, neras); |
| 182 | dump("raw RS block", rsb, copy, FEC_RSM); |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 183 | } else if (f->hashtree().hash_data.empty()) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 184 | warn("RS block %" PRIu64 ": decoding failed", rsb); |
| 185 | } else { |
| 186 | debug("RS block %" PRIu64 ": decoding failed", rsb); |
| 187 | } |
| 188 | |
| 189 | errno = EIO; |
| 190 | return -1; |
| 191 | } else if (unlikely(rc > 0)) { |
| 192 | check(rc <= (use_erasures ? e->roots : e->roots / 2)); |
| 193 | nerrs += rc; |
| 194 | } |
| 195 | |
| 196 | dest[i] = ecc_data[i * FEC_RSM + data_index]; |
| 197 | } |
| 198 | |
| 199 | if (nerrs) { |
| 200 | warn("RS block %" PRIu64 ": corrected %zu errors", rsb, nerrs); |
| 201 | *errors += nerrs; |
| 202 | } |
| 203 | |
| 204 | return FEC_BLOCKSIZE; |
| 205 | } |
| 206 | |
| 207 | /* initializes RS decoder and allocates memory for interleaving */ |
| 208 | static int ecc_init(fec_handle *f, rs_unique_ptr& rs, |
| 209 | std::unique_ptr<uint8_t[]>& ecc_data) |
| 210 | { |
| 211 | check(f); |
| 212 | |
| 213 | rs.reset(init_rs_char(FEC_PARAMS(f->ecc.roots))); |
| 214 | |
| 215 | if (unlikely(!rs)) { |
| 216 | error("failed to initialize RS"); |
| 217 | errno = ENOMEM; |
| 218 | return -1; |
| 219 | } |
| 220 | |
| 221 | ecc_data.reset(new (std::nothrow) uint8_t[FEC_RSM * FEC_BLOCKSIZE]); |
| 222 | |
| 223 | if (unlikely(!ecc_data)) { |
| 224 | error("failed to allocate ecc buffer"); |
| 225 | errno = ENOMEM; |
| 226 | return -1; |
| 227 | } |
| 228 | |
| 229 | return 0; |
| 230 | } |
| 231 | |
| 232 | /* reads `count' bytes from `offset' and corrects possible errors without |
| 233 | erasure detection, returning the number of corrected bytes in `errors' */ |
| 234 | static ssize_t ecc_read(fec_handle *f, uint8_t *dest, size_t count, |
| 235 | uint64_t offset, size_t *errors) |
| 236 | { |
| 237 | check(f); |
| 238 | check(dest); |
| 239 | check(offset < f->data_size); |
| 240 | check(offset + count <= f->data_size); |
| 241 | check(errors); |
| 242 | |
| 243 | debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count); |
| 244 | |
| 245 | rs_unique_ptr rs(NULL, free_rs_char); |
| 246 | std::unique_ptr<uint8_t[]> ecc_data; |
| 247 | |
| 248 | if (ecc_init(f, rs, ecc_data) == -1) { |
| 249 | return -1; |
| 250 | } |
| 251 | |
| 252 | uint64_t curr = offset / FEC_BLOCKSIZE; |
| 253 | size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE); |
| 254 | size_t left = count; |
| 255 | |
| 256 | uint8_t data[FEC_BLOCKSIZE]; |
| 257 | |
| 258 | while (left > 0) { |
| 259 | /* there's no erasure detection without verity metadata */ |
| 260 | if (__ecc_read(f, rs.get(), data, curr * FEC_BLOCKSIZE, false, |
| 261 | ecc_data.get(), errors) == -1) { |
| 262 | return -1; |
| 263 | } |
| 264 | |
| 265 | size_t copy = FEC_BLOCKSIZE - coff; |
| 266 | |
| 267 | if (copy > left) { |
| 268 | copy = left; |
| 269 | } |
| 270 | |
| 271 | memcpy(dest, &data[coff], copy); |
| 272 | |
| 273 | dest += copy; |
| 274 | left -= copy; |
| 275 | coff = 0; |
| 276 | ++curr; |
| 277 | } |
| 278 | |
| 279 | return count; |
| 280 | } |
| 281 | |
| 282 | /* reads `count' bytes from `offset', corrects possible errors with |
| 283 | erasure detection, and verifies the integrity of read data using |
| 284 | verity hash tree; returns the number of corrections in `errors' */ |
| 285 | static ssize_t verity_read(fec_handle *f, uint8_t *dest, size_t count, |
| 286 | uint64_t offset, size_t *errors) |
| 287 | { |
| 288 | check(f); |
| 289 | check(dest); |
| 290 | check(offset < f->data_size); |
| 291 | check(offset + count <= f->data_size); |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 292 | check(!f->hashtree().hash_data.empty()); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 293 | check(errors); |
| 294 | |
| 295 | debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count); |
| 296 | |
| 297 | rs_unique_ptr rs(NULL, free_rs_char); |
| 298 | std::unique_ptr<uint8_t[]> ecc_data; |
| 299 | |
| 300 | if (f->ecc.start && ecc_init(f, rs, ecc_data) == -1) { |
| 301 | return -1; |
| 302 | } |
| 303 | |
| 304 | uint64_t curr = offset / FEC_BLOCKSIZE; |
| 305 | size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE); |
| 306 | size_t left = count; |
| 307 | uint8_t data[FEC_BLOCKSIZE]; |
| 308 | |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 309 | uint64_t max_hash_block = |
| 310 | (f->hashtree().hash_data.size() - SHA256_DIGEST_LENGTH) / |
| 311 | SHA256_DIGEST_LENGTH; |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 312 | |
| 313 | while (left > 0) { |
| 314 | check(curr <= max_hash_block); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 315 | uint64_t curr_offset = curr * FEC_BLOCKSIZE; |
| 316 | |
| 317 | bool expect_zeros = is_zero(f, curr_offset); |
| 318 | |
| 319 | /* if we are in read-only mode and expect to read a zero block, |
| 320 | skip reading and just return zeros */ |
Sami Tolvanen | 9798825 | 2018-08-02 11:13:15 -0700 | [diff] [blame] | 321 | if ((f->mode & O_ACCMODE) == O_RDONLY && expect_zeros) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 322 | memset(data, 0, FEC_BLOCKSIZE); |
| 323 | goto valid; |
| 324 | } |
| 325 | |
| 326 | /* copy raw data without error correction */ |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 327 | if (!raw_pread(f->fd, data, FEC_BLOCKSIZE, curr_offset)) { |
Kelvin Zhang | 66ca49d | 2022-12-06 09:57:20 -0800 | [diff] [blame] | 328 | if (errno == EIO) { |
| 329 | warn("I/O error encounter when reading, attempting to recover using fec"); |
| 330 | } else { |
| 331 | error("failed to read: %s", strerror(errno)); |
| 332 | return -1; |
| 333 | } |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 334 | } |
| 335 | |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 336 | if (likely(f->hashtree().check_block_hash_with_index(curr, data))) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 337 | goto valid; |
| 338 | } |
| 339 | |
| 340 | /* we know the block is supposed to contain zeros, so return zeros |
| 341 | instead of trying to correct it */ |
| 342 | if (expect_zeros) { |
| 343 | memset(data, 0, FEC_BLOCKSIZE); |
| 344 | goto corrected; |
| 345 | } |
| 346 | |
| 347 | if (!f->ecc.start) { |
| 348 | /* fatal error without ecc */ |
| 349 | error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64, |
| 350 | offset, offset + count, curr); |
| 351 | return -1; |
| 352 | } else { |
| 353 | debug("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64, |
| 354 | offset, offset + count, curr); |
| 355 | } |
| 356 | |
| 357 | /* try to correct without erasures first, because checking for |
| 358 | erasure locations is slower */ |
| 359 | if (__ecc_read(f, rs.get(), data, curr_offset, false, ecc_data.get(), |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 360 | errors) == FEC_BLOCKSIZE && |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 361 | f->hashtree().check_block_hash_with_index(curr, data)) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 362 | goto corrected; |
| 363 | } |
| 364 | |
| 365 | /* try to correct with erasures */ |
| 366 | if (__ecc_read(f, rs.get(), data, curr_offset, true, ecc_data.get(), |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 367 | errors) == FEC_BLOCKSIZE && |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 368 | f->hashtree().check_block_hash_with_index(curr, data)) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 369 | goto corrected; |
| 370 | } |
| 371 | |
| 372 | error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64 |
| 373 | " (offset %" PRIu64 ") cannot be recovered", |
| 374 | offset, offset + count, curr, curr_offset); |
| 375 | dump("decoded block", curr, data, FEC_BLOCKSIZE); |
| 376 | |
| 377 | errno = EIO; |
| 378 | return -1; |
| 379 | |
| 380 | corrected: |
| 381 | /* update the corrected block to the file if we are in r/w mode */ |
| 382 | if (f->mode & O_RDWR && |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 383 | !raw_pwrite(f->fd, data, FEC_BLOCKSIZE, curr_offset)) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 384 | error("failed to write: %s", strerror(errno)); |
| 385 | return -1; |
| 386 | } |
| 387 | |
| 388 | valid: |
| 389 | size_t copy = FEC_BLOCKSIZE - coff; |
| 390 | |
| 391 | if (copy > left) { |
| 392 | copy = left; |
| 393 | } |
| 394 | |
| 395 | memcpy(dest, &data[coff], copy); |
| 396 | |
| 397 | dest += copy; |
| 398 | left -= copy; |
| 399 | coff = 0; |
| 400 | ++curr; |
| 401 | } |
| 402 | |
| 403 | return count; |
| 404 | } |
| 405 | |
| 406 | /* sets the internal file position to `offset' relative to `whence' */ |
| 407 | int fec_seek(struct fec_handle *f, int64_t offset, int whence) |
| 408 | { |
| 409 | check(f); |
| 410 | |
| 411 | if (whence == SEEK_SET) { |
| 412 | if (offset < 0) { |
| 413 | errno = EOVERFLOW; |
| 414 | return -1; |
| 415 | } |
| 416 | |
| 417 | f->pos = offset; |
| 418 | } else if (whence == SEEK_CUR) { |
| 419 | if (offset < 0 && f->pos < (uint64_t)-offset) { |
| 420 | errno = EOVERFLOW; |
| 421 | return -1; |
| 422 | } else if (offset > 0 && (uint64_t)offset > UINT64_MAX - f->pos) { |
| 423 | errno = EOVERFLOW; |
| 424 | return -1; |
| 425 | } |
| 426 | |
| 427 | f->pos += offset; |
| 428 | } else if (whence == SEEK_END) { |
| 429 | if (offset >= 0) { |
| 430 | errno = ENXIO; |
| 431 | return -1; |
| 432 | } else if ((uint64_t)-offset > f->size) { |
| 433 | errno = EOVERFLOW; |
| 434 | return -1; |
| 435 | } |
| 436 | |
| 437 | f->pos = f->size + offset; |
| 438 | } else { |
| 439 | errno = EINVAL; |
| 440 | return -1; |
| 441 | } |
| 442 | |
| 443 | return 0; |
| 444 | } |
| 445 | |
| 446 | /* reads up to `count' bytes starting from the internal file position using |
| 447 | error correction and integrity validation, if available */ |
| 448 | ssize_t fec_read(struct fec_handle *f, void *buf, size_t count) |
| 449 | { |
| 450 | ssize_t rc = fec_pread(f, buf, count, f->pos); |
| 451 | |
| 452 | if (rc > 0) { |
| 453 | check(f->pos < UINT64_MAX - rc); |
| 454 | f->pos += rc; |
| 455 | } |
| 456 | |
| 457 | return rc; |
| 458 | } |
| 459 | |
| 460 | /* for a file with size `max', returns the number of bytes we can read starting |
| 461 | from `offset', up to `count' bytes */ |
| 462 | static inline size_t get_max_count(uint64_t offset, size_t count, uint64_t max) |
| 463 | { |
| 464 | if (offset >= max) { |
| 465 | return 0; |
| 466 | } else if (offset > max - count) { |
| 467 | return (size_t)(max - offset); |
| 468 | } |
| 469 | |
| 470 | return count; |
| 471 | } |
| 472 | |
| 473 | /* reads `count' bytes from `f->fd' starting from `offset', and copies the |
| 474 | data to `buf' */ |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 475 | bool raw_pread(int fd, void *buf, size_t count, uint64_t offset) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 476 | check(buf); |
| 477 | |
| 478 | uint8_t *p = (uint8_t *)buf; |
| 479 | size_t remaining = count; |
| 480 | |
| 481 | while (remaining > 0) { |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 482 | ssize_t n = TEMP_FAILURE_RETRY(pread64(fd, p, remaining, offset)); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 483 | |
| 484 | if (n <= 0) { |
| 485 | return false; |
| 486 | } |
| 487 | |
| 488 | p += n; |
| 489 | remaining -= n; |
| 490 | offset += n; |
| 491 | } |
| 492 | |
| 493 | return true; |
| 494 | } |
| 495 | |
| 496 | /* writes `count' bytes from `buf' to `f->fd' to a file position `offset' */ |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 497 | bool raw_pwrite(int fd, const void *buf, size_t count, uint64_t offset) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 498 | check(buf); |
| 499 | |
| 500 | const uint8_t *p = (const uint8_t *)buf; |
| 501 | size_t remaining = count; |
| 502 | |
| 503 | while (remaining > 0) { |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 504 | ssize_t n = TEMP_FAILURE_RETRY(pwrite64(fd, p, remaining, offset)); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 505 | |
| 506 | if (n <= 0) { |
| 507 | return false; |
| 508 | } |
| 509 | |
| 510 | p += n; |
| 511 | remaining -= n; |
| 512 | offset += n; |
| 513 | } |
| 514 | |
| 515 | return true; |
| 516 | } |
| 517 | |
| 518 | /* reads up to `count' bytes starting from `offset' using error correction and |
| 519 | integrity validation, if available */ |
| 520 | ssize_t fec_pread(struct fec_handle *f, void *buf, size_t count, |
| 521 | uint64_t offset) |
| 522 | { |
| 523 | check(f); |
| 524 | check(buf); |
| 525 | |
| 526 | if (unlikely(offset > UINT64_MAX - count)) { |
| 527 | errno = EOVERFLOW; |
| 528 | return -1; |
| 529 | } |
| 530 | |
Tianjie Xu | 622e628 | 2019-12-02 12:55:33 -0800 | [diff] [blame] | 531 | if (!f->hashtree().hash_data.empty()) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 532 | return process(f, (uint8_t *)buf, |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 533 | get_max_count(offset, count, f->data_size), offset, |
| 534 | verity_read); |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 535 | } else if (f->ecc.start) { |
| 536 | check(f->ecc.start < f->size); |
| 537 | |
| 538 | count = get_max_count(offset, count, f->data_size); |
| 539 | ssize_t rc = process(f, (uint8_t *)buf, count, offset, ecc_read); |
| 540 | |
| 541 | if (rc >= 0) { |
| 542 | return rc; |
| 543 | } |
| 544 | |
| 545 | /* return raw data if pure ecc read fails; due to interleaving |
| 546 | the specific blocks the caller wants may still be fine */ |
| 547 | } else { |
| 548 | count = get_max_count(offset, count, f->size); |
| 549 | } |
| 550 | |
Tianjie Xu | ef9dc36 | 2019-11-20 12:18:40 -0800 | [diff] [blame] | 551 | if (raw_pread(f->fd, buf, count, offset)) { |
Sami Tolvanen | c54a33d | 2015-06-26 14:28:31 +0100 | [diff] [blame] | 552 | return count; |
| 553 | } |
| 554 | |
| 555 | return -1; |
| 556 | } |