blob: e743247be8a9a1ea57d2906e138ab3fccdcd2278 [file] [log] [blame]
Adam Lesinski6f6ceb72014-11-14 14:48:12 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Adam Lesinski1ab598f2015-08-14 14:26:04 -070017#include "util/BigBuffer.h"
18#include "util/Maybe.h"
19#include "util/StringPiece.h"
20#include "util/Util.h"
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080021
22#include <algorithm>
23#include <ostream>
24#include <string>
25#include <utils/Unicode.h>
26#include <vector>
27
28namespace aapt {
29namespace util {
30
31static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep,
32 const std::function<char(char)>& f) {
33 std::vector<std::string> parts;
34 const StringPiece::const_iterator end = std::end(str);
35 StringPiece::const_iterator start = std::begin(str);
36 StringPiece::const_iterator current;
37 do {
38 current = std::find(start, end, sep);
39 parts.emplace_back(str.substr(start, current).toString());
40 if (f) {
41 std::string& part = parts.back();
42 std::transform(part.begin(), part.end(), part.begin(), f);
43 }
44 start = current + 1;
45 } while (current != end);
46 return parts;
47}
48
49std::vector<std::string> split(const StringPiece& str, char sep) {
50 return splitAndTransform(str, sep, nullptr);
51}
52
53std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) {
54 return splitAndTransform(str, sep, ::tolower);
55}
56
Adam Lesinskid0f116b2016-07-08 15:00:32 -070057bool stringStartsWith(const StringPiece& str, const StringPiece& prefix) {
58 if (str.size() < prefix.size()) {
59 return false;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080060 }
Adam Lesinskid0f116b2016-07-08 15:00:32 -070061 return str.substr(0, prefix.size()) == prefix;
62}
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080063
Adam Lesinskid0f116b2016-07-08 15:00:32 -070064bool stringEndsWith(const StringPiece& str, const StringPiece& suffix) {
65 if (str.size() < suffix.size()) {
66 return false;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080067 }
Adam Lesinskid0f116b2016-07-08 15:00:32 -070068 return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080069}
70
Adam Lesinski3b4cd942015-10-30 16:31:42 -070071StringPiece trimWhitespace(const StringPiece& str) {
72 if (str.size() == 0 || str.data() == nullptr) {
73 return str;
74 }
75
76 const char* start = str.data();
77 const char* end = str.data() + str.length();
78
79 while (start != end && isspace(*start)) {
80 start++;
81 }
82
83 while (end != start && isspace(*(end - 1))) {
84 end--;
85 }
86
87 return StringPiece(start, end - start);
88}
89
Adam Lesinskid0f116b2016-07-08 15:00:32 -070090StringPiece::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece& str,
91 const StringPiece& allowedChars) {
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080092 const auto endIter = str.end();
93 for (auto iter = str.begin(); iter != endIter; ++iter) {
Adam Lesinskid0f116b2016-07-08 15:00:32 -070094 char c = *iter;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -080095 if ((c >= u'a' && c <= u'z') ||
96 (c >= u'A' && c <= u'Z') ||
97 (c >= u'0' && c <= u'9')) {
98 continue;
99 }
100
101 bool match = false;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700102 for (char i : allowedChars) {
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800103 if (c == i) {
104 match = true;
105 break;
106 }
107 }
108
109 if (!match) {
110 return iter;
111 }
112 }
113 return endIter;
114}
115
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700116bool isJavaClassName(const StringPiece& str) {
Adam Lesinskia1ad4a82015-06-08 11:41:09 -0700117 size_t pieces = 0;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700118 for (const StringPiece& piece : tokenize(str, '.')) {
Adam Lesinskia1ad4a82015-06-08 11:41:09 -0700119 pieces++;
120 if (piece.empty()) {
121 return false;
122 }
123
124 // Can't have starting or trailing $ character.
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700125 if (piece.data()[0] == '$' || piece.data()[piece.size() - 1] == '$') {
Adam Lesinskia1ad4a82015-06-08 11:41:09 -0700126 return false;
127 }
128
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700129 if (findNonAlphaNumericAndNotInSet(piece, "$_") != piece.end()) {
Adam Lesinskia1ad4a82015-06-08 11:41:09 -0700130 return false;
131 }
132 }
133 return pieces >= 2;
134}
135
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700136bool isJavaPackageName(const StringPiece& str) {
Adam Lesinski1ab598f2015-08-14 14:26:04 -0700137 if (str.empty()) {
138 return false;
139 }
140
141 size_t pieces = 0;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700142 for (const StringPiece& piece : tokenize(str, '.')) {
Adam Lesinski1ab598f2015-08-14 14:26:04 -0700143 pieces++;
144 if (piece.empty()) {
145 return false;
146 }
147
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700148 if (piece.data()[0] == '_' || piece.data()[piece.size() - 1] == '_') {
Adam Lesinski1ab598f2015-08-14 14:26:04 -0700149 return false;
150 }
151
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700152 if (findNonAlphaNumericAndNotInSet(piece, "_") != piece.end()) {
Adam Lesinski1ab598f2015-08-14 14:26:04 -0700153 return false;
154 }
155 }
156 return pieces >= 1;
157}
158
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700159Maybe<std::string> getFullyQualifiedClassName(const StringPiece& package,
160 const StringPiece& className) {
Adam Lesinskia1ad4a82015-06-08 11:41:09 -0700161 if (className.empty()) {
162 return {};
163 }
164
165 if (util::isJavaClassName(className)) {
166 return className.toString();
167 }
168
169 if (package.empty()) {
170 return {};
171 }
172
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700173 std::string result(package.data(), package.size());
174 if (className.data()[0] != '.') {
175 result += '.';
Adam Lesinskia1ad4a82015-06-08 11:41:09 -0700176 }
Adam Lesinski52364f72016-01-11 13:10:24 -0800177
Adam Lesinskia1ad4a82015-06-08 11:41:09 -0700178 result.append(className.data(), className.size());
179 if (!isJavaClassName(result)) {
180 return {};
181 }
182 return result;
183}
184
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700185static size_t consumeDigits(const char* start, const char* end) {
186 const char* c = start;
187 for (; c != end && *c >= '0' && *c <= '9'; c++) {}
Adam Lesinskib23f1e02015-11-03 12:24:17 -0800188 return static_cast<size_t>(c - start);
189}
190
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700191bool verifyJavaStringFormat(const StringPiece& str) {
192 const char* c = str.begin();
193 const char* const end = str.end();
Adam Lesinskib23f1e02015-11-03 12:24:17 -0800194
195 size_t argCount = 0;
196 bool nonpositional = false;
197 while (c != end) {
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700198 if (*c == '%' && c + 1 < end) {
Adam Lesinskib23f1e02015-11-03 12:24:17 -0800199 c++;
200
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700201 if (*c == '%') {
Adam Lesinskib23f1e02015-11-03 12:24:17 -0800202 c++;
203 continue;
204 }
205
206 argCount++;
207
208 size_t numDigits = consumeDigits(c, end);
209 if (numDigits > 0) {
210 c += numDigits;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700211 if (c != end && *c != '$') {
Adam Lesinskib23f1e02015-11-03 12:24:17 -0800212 // The digits were a size, but not a positional argument.
213 nonpositional = true;
214 }
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700215 } else if (*c == '<') {
Adam Lesinskib23f1e02015-11-03 12:24:17 -0800216 // Reusing last argument, bad idea since positions can be moved around
217 // during translation.
218 nonpositional = true;
219
220 c++;
221
222 // Optionally we can have a $ after
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700223 if (c != end && *c == '$') {
Adam Lesinskib23f1e02015-11-03 12:24:17 -0800224 c++;
225 }
226 } else {
227 nonpositional = true;
228 }
229
230 // Ignore size, width, flags, etc.
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700231 while (c != end && (*c == '-' ||
232 *c == '#' ||
233 *c == '+' ||
234 *c == ' ' ||
235 *c == ',' ||
236 *c == '(' ||
237 (*c >= '0' && *c <= '9'))) {
Adam Lesinskib23f1e02015-11-03 12:24:17 -0800238 c++;
239 }
240
241 /*
242 * This is a shortcut to detect strings that are going to Time.format()
243 * instead of String.format()
244 *
245 * Comparison of String.format() and Time.format() args:
246 *
247 * String: ABC E GH ST X abcdefgh nost x
248 * Time: DEFGHKMS W Za d hkm s w yz
249 *
250 * Therefore we know it's definitely Time if we have:
251 * DFKMWZkmwyz
252 */
253 if (c != end) {
254 switch (*c) {
255 case 'D':
256 case 'F':
257 case 'K':
258 case 'M':
259 case 'W':
260 case 'Z':
261 case 'k':
262 case 'm':
263 case 'w':
264 case 'y':
265 case 'z':
266 return true;
267 }
268 }
269 }
270
271 if (c != end) {
272 c++;
273 }
274 }
275
276 if (argCount > 1 && nonpositional) {
277 // Multiple arguments were specified, but some or all were non positional. Translated
278 // strings may rearrange the order of the arguments, which will break the string.
279 return false;
280 }
281 return true;
282}
283
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700284static Maybe<std::string> parseUnicodeCodepoint(const char** start, const char* end) {
285 char32_t code = 0;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800286 for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) {
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700287 char c = **start;
288 char32_t a;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800289 if (c >= '0' && c <= '9') {
290 a = c - '0';
291 } else if (c >= 'a' && c <= 'f') {
292 a = c - 'a' + 10;
293 } else if (c >= 'A' && c <= 'F') {
294 a = c - 'A' + 10;
295 } else {
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700296 return {};
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800297 }
298 code = (code << 4) | a;
299 }
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700300
301 ssize_t len = utf32_to_utf8_length(&code, 1);
302 if (len < 0) {
303 return {};
304 }
305
306 std::string resultUtf8;
307 resultUtf8.resize(len);
308 utf32_to_utf8(&code, 1, &*resultUtf8.begin(), len + 1);
309 return resultUtf8;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800310}
311
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700312StringBuilder& StringBuilder::append(const StringPiece& str) {
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800313 if (!mError.empty()) {
314 return *this;
315 }
316
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700317 const char* const end = str.end();
318 const char* start = str.begin();
319 const char* current = start;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800320 while (current != end) {
Adam Lesinski90959882015-07-06 18:09:18 -0700321 if (mLastCharWasEscape) {
322 switch (*current) {
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700323 case 't':
324 mStr += '\t';
Adam Lesinski90959882015-07-06 18:09:18 -0700325 break;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700326 case 'n':
327 mStr += '\n';
Adam Lesinski90959882015-07-06 18:09:18 -0700328 break;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700329 case '#':
330 mStr += '#';
Adam Lesinski90959882015-07-06 18:09:18 -0700331 break;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700332 case '@':
333 mStr += '@';
Adam Lesinski90959882015-07-06 18:09:18 -0700334 break;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700335 case '?':
336 mStr += '?';
Adam Lesinski90959882015-07-06 18:09:18 -0700337 break;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700338 case '"':
339 mStr += '"';
Adam Lesinski90959882015-07-06 18:09:18 -0700340 break;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700341 case '\'':
342 mStr += '\'';
Adam Lesinski90959882015-07-06 18:09:18 -0700343 break;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700344 case '\\':
345 mStr += '\\';
Adam Lesinski90959882015-07-06 18:09:18 -0700346 break;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700347 case 'u': {
Adam Lesinski90959882015-07-06 18:09:18 -0700348 current++;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700349 Maybe<std::string> c = parseUnicodeCodepoint(&current, end);
Adam Lesinski90959882015-07-06 18:09:18 -0700350 if (!c) {
351 mError = "invalid unicode escape sequence";
352 return *this;
353 }
354 mStr += c.value();
355 current -= 1;
356 break;
357 }
358
359 default:
360 // Ignore.
361 break;
362 }
363 mLastCharWasEscape = false;
364 start = current + 1;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700365 } else if (*current == '"') {
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800366 if (!mQuote && mTrailingSpace) {
367 // We found an opening quote, and we have
368 // trailing space, so we should append that
369 // space now.
370 if (mTrailingSpace) {
371 // We had trailing whitespace, so
372 // replace with a single space.
373 if (!mStr.empty()) {
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700374 mStr += ' ';
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800375 }
376 mTrailingSpace = false;
377 }
378 }
379 mQuote = !mQuote;
380 mStr.append(start, current - start);
381 start = current + 1;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700382 } else if (*current == '\'' && !mQuote) {
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800383 // This should be escaped.
384 mError = "unescaped apostrophe";
385 return *this;
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700386 } else if (*current == '\\') {
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800387 // This is an escape sequence, convert to the real value.
388 if (!mQuote && mTrailingSpace) {
389 // We had trailing whitespace, so
390 // replace with a single space.
391 if (!mStr.empty()) {
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700392 mStr += ' ';
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800393 }
394 mTrailingSpace = false;
395 }
396 mStr.append(start, current - start);
397 start = current + 1;
Adam Lesinski90959882015-07-06 18:09:18 -0700398 mLastCharWasEscape = true;
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800399 } else if (!mQuote) {
400 // This is not quoted text, so look for whitespace.
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700401 if (isspace(*current)) {
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800402 // We found whitespace, see if we have seen some
403 // before.
404 if (!mTrailingSpace) {
405 // We didn't see a previous adjacent space,
406 // so mark that we did.
407 mTrailingSpace = true;
408 mStr.append(start, current - start);
409 }
410
411 // Keep skipping whitespace.
412 start = current + 1;
413 } else if (mTrailingSpace) {
414 // We saw trailing space before, so replace all
415 // that trailing space with one space.
416 if (!mStr.empty()) {
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700417 mStr += ' ';
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800418 }
419 mTrailingSpace = false;
420 }
421 }
422 current++;
423 }
424 mStr.append(start, end - start);
425 return *this;
426}
427
428std::u16string utf8ToUtf16(const StringPiece& utf8) {
429 ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()),
430 utf8.length());
431 if (utf16Length <= 0) {
432 return {};
433 }
434
435 std::u16string utf16;
436 utf16.resize(utf16Length);
Sergio Giro03b95c72016-07-21 14:44:07 +0100437 utf8_to_utf16(
438 reinterpret_cast<const uint8_t*>(utf8.data()),
439 utf8.length(),
440 &*utf16.begin(),
441 (size_t) utf16Length + 1);
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800442 return utf16;
443}
444
445std::string utf16ToUtf8(const StringPiece16& utf16) {
446 ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length());
447 if (utf8Length <= 0) {
448 return {};
449 }
450
451 std::string utf8;
452 utf8.resize(utf8Length);
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700453 utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin(), utf8Length + 1);
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800454 return utf8;
455}
456
457bool writeAll(std::ostream& out, const BigBuffer& buffer) {
458 for (const auto& b : buffer) {
459 if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
460 return false;
461 }
462 }
463 return true;
464}
465
466std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) {
467 std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
468 uint8_t* p = data.get();
469 for (const auto& block : buffer) {
470 memcpy(p, block.buffer.get(), block.size);
471 p += block.size;
472 }
473 return data;
474}
475
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700476typename Tokenizer::iterator& Tokenizer::iterator::operator++() {
477 const char* start = mToken.end();
478 const char* end = mStr.end();
479 if (start == end) {
480 mEnd = true;
481 mToken.assign(mToken.end(), 0);
482 return *this;
483 }
484
485 start += 1;
486 const char* current = start;
487 while (current != end) {
488 if (*current == mSeparator) {
489 mToken.assign(start, current - start);
490 return *this;
491 }
492 ++current;
493 }
494 mToken.assign(start, end - start);
495 return *this;
496}
497
498bool Tokenizer::iterator::operator==(const iterator& rhs) const {
499 // We check equality here a bit differently.
500 // We need to know that the addresses are the same.
501 return mToken.begin() == rhs.mToken.begin() && mToken.end() == rhs.mToken.end() &&
502 mEnd == rhs.mEnd;
503}
504
505bool Tokenizer::iterator::operator!=(const iterator& rhs) const {
506 return !(*this == rhs);
507}
508
509Tokenizer::iterator::iterator(StringPiece s, char sep, StringPiece tok, bool end) :
510 mStr(s), mSeparator(sep), mToken(tok), mEnd(end) {
511}
512
513Tokenizer::Tokenizer(StringPiece str, char sep) :
514 mBegin(++iterator(str, sep, StringPiece(str.begin() - 1, 0), false)),
515 mEnd(str, sep, StringPiece(str.end(), 0), true) {
516}
517
518bool extractResFilePathParts(const StringPiece& path, StringPiece* outPrefix,
519 StringPiece* outEntry, StringPiece* outSuffix) {
520 const StringPiece resPrefix("res/");
521 if (!stringStartsWith(path, resPrefix)) {
Adam Lesinski1ab598f2015-08-14 14:26:04 -0700522 return false;
523 }
524
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700525 StringPiece::const_iterator lastOccurence = path.end();
526 for (auto iter = path.begin() + resPrefix.size(); iter != path.end(); ++iter) {
527 if (*iter == '/') {
Adam Lesinski1ab598f2015-08-14 14:26:04 -0700528 lastOccurence = iter;
529 }
530 }
531
532 if (lastOccurence == path.end()) {
533 return false;
534 }
535
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700536 auto iter = std::find(lastOccurence, path.end(), '.');
537 *outSuffix = StringPiece(iter, path.end() - iter);
538 *outEntry = StringPiece(lastOccurence + 1, iter - lastOccurence - 1);
539 *outPrefix = StringPiece(path.begin(), lastOccurence - path.begin() + 1);
Adam Lesinski1ab598f2015-08-14 14:26:04 -0700540 return true;
541}
542
Adam Lesinskid0f116b2016-07-08 15:00:32 -0700543StringPiece16 getString16(const android::ResStringPool& pool, size_t idx) {
544 size_t len;
545 const char16_t* str = pool.stringAt(idx, &len);
546 if (str != nullptr) {
547 return StringPiece16(str, len);
548 }
549 return StringPiece16();
550}
551
552std::string getString(const android::ResStringPool& pool, size_t idx) {
553 size_t len;
554 const char* str = pool.string8At(idx, &len);
555 if (str != nullptr) {
556 return std::string(str, len);
557 }
558 return utf16ToUtf8(getString16(pool, idx));
559}
560
Adam Lesinski6f6ceb72014-11-14 14:48:12 -0800561} // namespace util
562} // namespace aapt