blob: a9e30a69701dc9da0da64528bce40a9c66199773 [file] [log] [blame]
David Srbecky154c57f2018-06-03 12:00:27 +01001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "xz_utils.h"
18
19#include <vector>
20
21#include "base/array_ref.h"
22#include "dwarf/writer.h"
23#include "base/leb128.h"
24
25// liblzma.
26#include "7zCrc.h"
27#include "XzCrc64.h"
28#include "XzEnc.h"
29
30namespace art {
31namespace debug {
32
33constexpr size_t kChunkSize = kPageSize;
34
35static void XzCompressChunk(ArrayRef<uint8_t> src, std::vector<uint8_t>* dst) {
36 // Configure the compression library.
37 CrcGenerateTable();
38 Crc64GenerateTable();
39 CLzma2EncProps lzma2Props;
40 Lzma2EncProps_Init(&lzma2Props);
41 lzma2Props.lzmaProps.level = 1; // Fast compression.
42 Lzma2EncProps_Normalize(&lzma2Props);
43 CXzProps props;
44 XzProps_Init(&props);
45 props.lzma2Props = lzma2Props;
46 // Implement the required interface for communication (written in C so no virtual methods).
47 struct XzCallbacks : public ISeqInStream, public ISeqOutStream, public ICompressProgress {
48 static SRes ReadImpl(const ISeqInStream* p, void* buf, size_t* size) {
49 auto* ctx = static_cast<XzCallbacks*>(const_cast<ISeqInStream*>(p));
50 *size = std::min(*size, ctx->src_.size() - ctx->src_pos_);
51 memcpy(buf, ctx->src_.data() + ctx->src_pos_, *size);
52 ctx->src_pos_ += *size;
53 return SZ_OK;
54 }
55 static size_t WriteImpl(const ISeqOutStream* p, const void* buf, size_t size) {
56 auto* ctx = static_cast<const XzCallbacks*>(p);
57 const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buf);
58 ctx->dst_->insert(ctx->dst_->end(), buffer, buffer + size);
59 return size;
60 }
61 static SRes ProgressImpl(const ICompressProgress* , UInt64, UInt64) {
62 return SZ_OK;
63 }
64 size_t src_pos_;
65 ArrayRef<uint8_t> src_;
66 std::vector<uint8_t>* dst_;
67 };
68 XzCallbacks callbacks;
69 callbacks.Read = XzCallbacks::ReadImpl;
70 callbacks.Write = XzCallbacks::WriteImpl;
71 callbacks.Progress = XzCallbacks::ProgressImpl;
72 callbacks.src_pos_ = 0;
73 callbacks.src_ = src;
74 callbacks.dst_ = dst;
75 // Compress.
76 SRes res = Xz_Encode(&callbacks, &callbacks, &props, &callbacks);
77 CHECK_EQ(res, SZ_OK);
78}
79
80// Compress data while splitting it to smaller chunks to enable random-access reads.
81// The XZ file format supports this well, but the compression library does not.
82// Therefore compress the chunks separately and then glue them together manually.
83//
84// The XZ file format is described here: https://tukaani.org/xz/xz-file-format.txt
85// In short, the file format is: [header] [compressed_block]* [index] [footer]
86// Where [index] is: [num_records] ([compressed_size] [uncompressed_size])* [crc32]
87//
88void XzCompress(ArrayRef<uint8_t> src, std::vector<uint8_t>* dst) {
89 uint8_t header[] = { 0xFD, '7', 'z', 'X', 'Z', 0, 0, 1, 0x69, 0x22, 0xDE, 0x36 };
90 uint8_t footer[] = { 0, 1, 'Y', 'Z' };
91 dst->insert(dst->end(), header, header + sizeof(header));
92 std::vector<uint8_t> tmp;
93 std::vector<uint32_t> index;
94 for (size_t offset = 0; offset < src.size(); offset += kChunkSize) {
95 size_t size = std::min(src.size() - offset, kChunkSize);
96 tmp.clear();
97 XzCompressChunk(src.SubArray(offset, size), &tmp);
98 DCHECK_EQ(memcmp(tmp.data(), header, sizeof(header)), 0);
99 DCHECK_EQ(memcmp(tmp.data() + tmp.size() - sizeof(footer), footer, sizeof(footer)), 0);
100 uint32_t* index_size = reinterpret_cast<uint32_t*>(tmp.data() + tmp.size() - 8);
101 DCHECK_ALIGNED(index_size, sizeof(uint32_t));
102 size_t index_offset = tmp.size() - 16 - *index_size * 4;
103 const uint8_t* index_ptr = tmp.data() + index_offset;
104 uint8_t index_indicator = *(index_ptr++);
105 CHECK_EQ(index_indicator, 0); // Mark the start of index (as opposed to compressed block).
106 uint32_t num_records = DecodeUnsignedLeb128(&index_ptr);
107 for (uint32_t i = 0; i < num_records; i++) {
108 index.push_back(DecodeUnsignedLeb128(&index_ptr)); // Compressed size.
109 index.push_back(DecodeUnsignedLeb128(&index_ptr)); // Uncompressed size.
110 }
111 // Copy the raw compressed block(s) located between the header and index.
112 dst->insert(dst->end(), tmp.data() + sizeof(header), tmp.data() + index_offset);
113 }
114
115 // Write the index.
116 uint32_t index_size_in_words;
117 {
118 tmp.clear();
119 dwarf::Writer<> writer(&tmp);
120 writer.PushUint8(0); // Index indicator.
121 writer.PushUleb128(static_cast<uint32_t>(index.size()) / 2); // Record count.
122 for (uint32_t i : index) {
123 writer.PushUleb128(i);
124 }
125 writer.Pad(4);
126 index_size_in_words = writer.size() / sizeof(uint32_t);
127 writer.PushUint32(CrcCalc(tmp.data(), tmp.size()));
128 dst->insert(dst->end(), tmp.begin(), tmp.end());
129 }
130
131 // Write the footer.
132 {
133 tmp.clear();
134 dwarf::Writer<> writer(&tmp);
135 writer.PushUint32(0); // CRC placeholder.
136 writer.PushUint32(index_size_in_words);
137 writer.PushData(footer, sizeof(footer));
138 writer.UpdateUint32(0, CrcCalc(tmp.data() + 4, 6));
139 dst->insert(dst->end(), tmp.begin(), tmp.end());
140 }
141}
142
143} // namespace debug
144} // namespace art