blob: f219b65378ff5d54931bc2a49088d7c423910d86 [file] [log] [blame]
Adam Lesinski6f6ceb72014-11-14 14:48:12 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Adam Lesinski1ab598f2015-08-14 14:26:04 -070017#include "util/BigBuffer.h"
18#include "util/Maybe.h"
19#include "util/StringPiece.h"
20#include "util/Util.h"
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080021
22#include <algorithm>
23#include <ostream>
24#include <string>
25#include <utils/Unicode.h>
26#include <vector>
27
28namespace aapt {
29namespace util {
30
Adam Lesinski24aad162015-04-24 19:19:30 -070031constexpr const char16_t* kSchemaAuto = u"http://schemas.android.com/apk/res-auto";
32constexpr const char16_t* kSchemaPrefix = u"http://schemas.android.com/apk/res/";
33
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080034static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep,
35 const std::function<char(char)>& f) {
36 std::vector<std::string> parts;
37 const StringPiece::const_iterator end = std::end(str);
38 StringPiece::const_iterator start = std::begin(str);
39 StringPiece::const_iterator current;
40 do {
41 current = std::find(start, end, sep);
42 parts.emplace_back(str.substr(start, current).toString());
43 if (f) {
44 std::string& part = parts.back();
45 std::transform(part.begin(), part.end(), part.begin(), f);
46 }
47 start = current + 1;
48 } while (current != end);
49 return parts;
50}
51
52std::vector<std::string> split(const StringPiece& str, char sep) {
53 return splitAndTransform(str, sep, nullptr);
54}
55
56std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) {
57 return splitAndTransform(str, sep, ::tolower);
58}
59
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080060StringPiece16 trimWhitespace(const StringPiece16& str) {
61 if (str.size() == 0 || str.data() == nullptr) {
62 return str;
63 }
64
65 const char16_t* start = str.data();
66 const char16_t* end = str.data() + str.length();
67
68 while (start != end && util::isspace16(*start)) {
69 start++;
70 }
71
72 while (end != start && util::isspace16(*(end - 1))) {
73 end--;
74 }
75
76 return StringPiece16(start, end - start);
77}
78
79StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str,
80 const StringPiece16& allowedChars) {
81 const auto endIter = str.end();
82 for (auto iter = str.begin(); iter != endIter; ++iter) {
83 char16_t c = *iter;
84 if ((c >= u'a' && c <= u'z') ||
85 (c >= u'A' && c <= u'Z') ||
86 (c >= u'0' && c <= u'9')) {
87 continue;
88 }
89
90 bool match = false;
91 for (char16_t i : allowedChars) {
92 if (c == i) {
93 match = true;
94 break;
95 }
96 }
97
98 if (!match) {
99 return iter;
100 }
101 }
102 return endIter;
103}
104
Adam Lesinskia1ad4a82015-06-08 11:41:09 -0700105bool isJavaClassName(const StringPiece16& str) {
106 size_t pieces = 0;
107 for (const StringPiece16& piece : tokenize(str, u'.')) {
108 pieces++;
109 if (piece.empty()) {
110 return false;
111 }
112
113 // Can't have starting or trailing $ character.
114 if (piece.data()[0] == u'$' || piece.data()[piece.size() - 1] == u'$') {
115 return false;
116 }
117
118 if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) {
119 return false;
120 }
121 }
122 return pieces >= 2;
123}
124
Adam Lesinski1ab598f2015-08-14 14:26:04 -0700125bool isJavaPackageName(const StringPiece16& str) {
126 if (str.empty()) {
127 return false;
128 }
129
130 size_t pieces = 0;
131 for (const StringPiece16& piece : tokenize(str, u'.')) {
132 pieces++;
133 if (piece.empty()) {
134 return false;
135 }
136
137 if (piece.data()[0] == u'_' || piece.data()[piece.size() - 1] == u'_') {
138 return false;
139 }
140
141 if (findNonAlphaNumericAndNotInSet(piece, u"_") != piece.end()) {
142 return false;
143 }
144 }
145 return pieces >= 1;
146}
147
Adam Lesinskia1ad4a82015-06-08 11:41:09 -0700148Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package,
149 const StringPiece16& className) {
150 if (className.empty()) {
151 return {};
152 }
153
154 if (util::isJavaClassName(className)) {
155 return className.toString();
156 }
157
158 if (package.empty()) {
159 return {};
160 }
161
162 std::u16string result(package.data(), package.size());
163 if (className.data()[0] != u'.') {
164 result += u'.';
165 }
166 result.append(className.data(), className.size());
167 if (!isJavaClassName(result)) {
168 return {};
169 }
170 return result;
171}
172
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800173static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) {
174 char16_t code = 0;
175 for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) {
176 char16_t c = **start;
177 int a;
178 if (c >= '0' && c <= '9') {
179 a = c - '0';
180 } else if (c >= 'a' && c <= 'f') {
181 a = c - 'a' + 10;
182 } else if (c >= 'A' && c <= 'F') {
183 a = c - 'A' + 10;
184 } else {
185 return make_nothing<char16_t>();
186 }
187 code = (code << 4) | a;
188 }
189 return make_value(code);
190}
191
192StringBuilder& StringBuilder::append(const StringPiece16& str) {
193 if (!mError.empty()) {
194 return *this;
195 }
196
197 const char16_t* const end = str.end();
198 const char16_t* start = str.begin();
199 const char16_t* current = start;
200 while (current != end) {
Adam Lesinski90959882015-07-06 18:09:18 -0700201 if (mLastCharWasEscape) {
202 switch (*current) {
203 case u't':
204 mStr += u'\t';
205 break;
206 case u'n':
207 mStr += u'\n';
208 break;
209 case u'#':
210 mStr += u'#';
211 break;
212 case u'@':
213 mStr += u'@';
214 break;
215 case u'?':
216 mStr += u'?';
217 break;
218 case u'"':
219 mStr += u'"';
220 break;
221 case u'\'':
222 mStr += u'\'';
223 break;
224 case u'\\':
225 mStr += u'\\';
226 break;
227 case u'u': {
228 current++;
229 Maybe<char16_t> c = parseUnicodeCodepoint(&current, end);
230 if (!c) {
231 mError = "invalid unicode escape sequence";
232 return *this;
233 }
234 mStr += c.value();
235 current -= 1;
236 break;
237 }
238
239 default:
240 // Ignore.
241 break;
242 }
243 mLastCharWasEscape = false;
244 start = current + 1;
245 } else if (*current == u'"') {
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800246 if (!mQuote && mTrailingSpace) {
247 // We found an opening quote, and we have
248 // trailing space, so we should append that
249 // space now.
250 if (mTrailingSpace) {
251 // We had trailing whitespace, so
252 // replace with a single space.
253 if (!mStr.empty()) {
254 mStr += u' ';
255 }
256 mTrailingSpace = false;
257 }
258 }
259 mQuote = !mQuote;
260 mStr.append(start, current - start);
261 start = current + 1;
262 } else if (*current == u'\'' && !mQuote) {
263 // This should be escaped.
264 mError = "unescaped apostrophe";
265 return *this;
266 } else if (*current == u'\\') {
267 // This is an escape sequence, convert to the real value.
268 if (!mQuote && mTrailingSpace) {
269 // We had trailing whitespace, so
270 // replace with a single space.
271 if (!mStr.empty()) {
272 mStr += u' ';
273 }
274 mTrailingSpace = false;
275 }
276 mStr.append(start, current - start);
277 start = current + 1;
Adam Lesinski90959882015-07-06 18:09:18 -0700278 mLastCharWasEscape = true;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800279 } else if (!mQuote) {
280 // This is not quoted text, so look for whitespace.
281 if (isspace16(*current)) {
282 // We found whitespace, see if we have seen some
283 // before.
284 if (!mTrailingSpace) {
285 // We didn't see a previous adjacent space,
286 // so mark that we did.
287 mTrailingSpace = true;
288 mStr.append(start, current - start);
289 }
290
291 // Keep skipping whitespace.
292 start = current + 1;
293 } else if (mTrailingSpace) {
294 // We saw trailing space before, so replace all
295 // that trailing space with one space.
296 if (!mStr.empty()) {
297 mStr += u' ';
298 }
299 mTrailingSpace = false;
300 }
301 }
302 current++;
303 }
304 mStr.append(start, end - start);
305 return *this;
306}
307
308std::u16string utf8ToUtf16(const StringPiece& utf8) {
309 ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()),
310 utf8.length());
311 if (utf16Length <= 0) {
312 return {};
313 }
314
315 std::u16string utf16;
316 utf16.resize(utf16Length);
317 utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin());
318 return utf16;
319}
320
321std::string utf16ToUtf8(const StringPiece16& utf16) {
322 ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length());
323 if (utf8Length <= 0) {
324 return {};
325 }
326
327 std::string utf8;
328 utf8.resize(utf8Length);
329 utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin());
330 return utf8;
331}
332
333bool writeAll(std::ostream& out, const BigBuffer& buffer) {
334 for (const auto& b : buffer) {
335 if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
336 return false;
337 }
338 }
339 return true;
340}
341
342std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) {
343 std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
344 uint8_t* p = data.get();
345 for (const auto& block : buffer) {
346 memcpy(p, block.buffer.get(), block.size);
347 p += block.size;
348 }
349 return data;
350}
351
Adam Lesinski24aad162015-04-24 19:19:30 -0700352Maybe<std::u16string> extractPackageFromNamespace(const std::u16string& namespaceUri) {
353 if (stringStartsWith<char16_t>(namespaceUri, kSchemaPrefix)) {
354 StringPiece16 schemaPrefix = kSchemaPrefix;
355 StringPiece16 package = namespaceUri;
356 return package.substr(schemaPrefix.size(), package.size() - schemaPrefix.size())
357 .toString();
358 } else if (namespaceUri == kSchemaAuto) {
359 return std::u16string();
360 }
361 return {};
362}
363
Adam Lesinski1ab598f2015-08-14 14:26:04 -0700364bool extractResFilePathParts(const StringPiece16& path, StringPiece16* outPrefix,
365 StringPiece16* outEntry, StringPiece16* outSuffix) {
366 if (!stringStartsWith<char16_t>(path, u"res/")) {
367 return false;
368 }
369
370 StringPiece16::const_iterator lastOccurence = path.end();
371 for (auto iter = path.begin() + StringPiece16(u"res/").size(); iter != path.end(); ++iter) {
372 if (*iter == u'/') {
373 lastOccurence = iter;
374 }
375 }
376
377 if (lastOccurence == path.end()) {
378 return false;
379 }
380
381 auto iter = std::find(lastOccurence, path.end(), u'.');
382 *outSuffix = StringPiece16(iter, path.end() - iter);
383 *outEntry = StringPiece16(lastOccurence + 1, iter - lastOccurence - 1);
384 *outPrefix = StringPiece16(path.begin(), lastOccurence - path.begin() + 1);
385 return true;
386}
387
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800388} // namespace util
389} // namespace aapt