David Srbecky | 154c57f | 2018-06-03 12:00:27 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2018 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "xz_utils.h" |
| 18 | |
| 19 | #include <vector> |
| 20 | |
| 21 | #include "base/array_ref.h" |
| 22 | #include "dwarf/writer.h" |
| 23 | #include "base/leb128.h" |
| 24 | |
| 25 | // liblzma. |
| 26 | #include "7zCrc.h" |
| 27 | #include "XzCrc64.h" |
| 28 | #include "XzEnc.h" |
| 29 | |
| 30 | namespace art { |
| 31 | namespace debug { |
| 32 | |
| 33 | constexpr size_t kChunkSize = kPageSize; |
| 34 | |
| 35 | static void XzCompressChunk(ArrayRef<uint8_t> src, std::vector<uint8_t>* dst) { |
| 36 | // Configure the compression library. |
| 37 | CrcGenerateTable(); |
| 38 | Crc64GenerateTable(); |
| 39 | CLzma2EncProps lzma2Props; |
| 40 | Lzma2EncProps_Init(&lzma2Props); |
| 41 | lzma2Props.lzmaProps.level = 1; // Fast compression. |
| 42 | Lzma2EncProps_Normalize(&lzma2Props); |
| 43 | CXzProps props; |
| 44 | XzProps_Init(&props); |
| 45 | props.lzma2Props = lzma2Props; |
| 46 | // Implement the required interface for communication (written in C so no virtual methods). |
| 47 | struct XzCallbacks : public ISeqInStream, public ISeqOutStream, public ICompressProgress { |
| 48 | static SRes ReadImpl(const ISeqInStream* p, void* buf, size_t* size) { |
| 49 | auto* ctx = static_cast<XzCallbacks*>(const_cast<ISeqInStream*>(p)); |
| 50 | *size = std::min(*size, ctx->src_.size() - ctx->src_pos_); |
| 51 | memcpy(buf, ctx->src_.data() + ctx->src_pos_, *size); |
| 52 | ctx->src_pos_ += *size; |
| 53 | return SZ_OK; |
| 54 | } |
| 55 | static size_t WriteImpl(const ISeqOutStream* p, const void* buf, size_t size) { |
| 56 | auto* ctx = static_cast<const XzCallbacks*>(p); |
| 57 | const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buf); |
| 58 | ctx->dst_->insert(ctx->dst_->end(), buffer, buffer + size); |
| 59 | return size; |
| 60 | } |
| 61 | static SRes ProgressImpl(const ICompressProgress* , UInt64, UInt64) { |
| 62 | return SZ_OK; |
| 63 | } |
| 64 | size_t src_pos_; |
| 65 | ArrayRef<uint8_t> src_; |
| 66 | std::vector<uint8_t>* dst_; |
| 67 | }; |
| 68 | XzCallbacks callbacks; |
| 69 | callbacks.Read = XzCallbacks::ReadImpl; |
| 70 | callbacks.Write = XzCallbacks::WriteImpl; |
| 71 | callbacks.Progress = XzCallbacks::ProgressImpl; |
| 72 | callbacks.src_pos_ = 0; |
| 73 | callbacks.src_ = src; |
| 74 | callbacks.dst_ = dst; |
| 75 | // Compress. |
| 76 | SRes res = Xz_Encode(&callbacks, &callbacks, &props, &callbacks); |
| 77 | CHECK_EQ(res, SZ_OK); |
| 78 | } |
| 79 | |
| 80 | // Compress data while splitting it to smaller chunks to enable random-access reads. |
| 81 | // The XZ file format supports this well, but the compression library does not. |
| 82 | // Therefore compress the chunks separately and then glue them together manually. |
| 83 | // |
| 84 | // The XZ file format is described here: https://tukaani.org/xz/xz-file-format.txt |
| 85 | // In short, the file format is: [header] [compressed_block]* [index] [footer] |
| 86 | // Where [index] is: [num_records] ([compressed_size] [uncompressed_size])* [crc32] |
| 87 | // |
| 88 | void XzCompress(ArrayRef<uint8_t> src, std::vector<uint8_t>* dst) { |
| 89 | uint8_t header[] = { 0xFD, '7', 'z', 'X', 'Z', 0, 0, 1, 0x69, 0x22, 0xDE, 0x36 }; |
| 90 | uint8_t footer[] = { 0, 1, 'Y', 'Z' }; |
| 91 | dst->insert(dst->end(), header, header + sizeof(header)); |
| 92 | std::vector<uint8_t> tmp; |
| 93 | std::vector<uint32_t> index; |
| 94 | for (size_t offset = 0; offset < src.size(); offset += kChunkSize) { |
| 95 | size_t size = std::min(src.size() - offset, kChunkSize); |
| 96 | tmp.clear(); |
| 97 | XzCompressChunk(src.SubArray(offset, size), &tmp); |
| 98 | DCHECK_EQ(memcmp(tmp.data(), header, sizeof(header)), 0); |
| 99 | DCHECK_EQ(memcmp(tmp.data() + tmp.size() - sizeof(footer), footer, sizeof(footer)), 0); |
| 100 | uint32_t* index_size = reinterpret_cast<uint32_t*>(tmp.data() + tmp.size() - 8); |
| 101 | DCHECK_ALIGNED(index_size, sizeof(uint32_t)); |
| 102 | size_t index_offset = tmp.size() - 16 - *index_size * 4; |
| 103 | const uint8_t* index_ptr = tmp.data() + index_offset; |
| 104 | uint8_t index_indicator = *(index_ptr++); |
| 105 | CHECK_EQ(index_indicator, 0); // Mark the start of index (as opposed to compressed block). |
| 106 | uint32_t num_records = DecodeUnsignedLeb128(&index_ptr); |
| 107 | for (uint32_t i = 0; i < num_records; i++) { |
| 108 | index.push_back(DecodeUnsignedLeb128(&index_ptr)); // Compressed size. |
| 109 | index.push_back(DecodeUnsignedLeb128(&index_ptr)); // Uncompressed size. |
| 110 | } |
| 111 | // Copy the raw compressed block(s) located between the header and index. |
| 112 | dst->insert(dst->end(), tmp.data() + sizeof(header), tmp.data() + index_offset); |
| 113 | } |
| 114 | |
| 115 | // Write the index. |
| 116 | uint32_t index_size_in_words; |
| 117 | { |
| 118 | tmp.clear(); |
| 119 | dwarf::Writer<> writer(&tmp); |
| 120 | writer.PushUint8(0); // Index indicator. |
| 121 | writer.PushUleb128(static_cast<uint32_t>(index.size()) / 2); // Record count. |
| 122 | for (uint32_t i : index) { |
| 123 | writer.PushUleb128(i); |
| 124 | } |
| 125 | writer.Pad(4); |
| 126 | index_size_in_words = writer.size() / sizeof(uint32_t); |
| 127 | writer.PushUint32(CrcCalc(tmp.data(), tmp.size())); |
| 128 | dst->insert(dst->end(), tmp.begin(), tmp.end()); |
| 129 | } |
| 130 | |
| 131 | // Write the footer. |
| 132 | { |
| 133 | tmp.clear(); |
| 134 | dwarf::Writer<> writer(&tmp); |
| 135 | writer.PushUint32(0); // CRC placeholder. |
| 136 | writer.PushUint32(index_size_in_words); |
| 137 | writer.PushData(footer, sizeof(footer)); |
| 138 | writer.UpdateUint32(0, CrcCalc(tmp.data() + 4, 6)); |
| 139 | dst->insert(dst->end(), tmp.begin(), tmp.end()); |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | } // namespace debug |
| 144 | } // namespace art |