Blame - tools/aapt2/util/Util.cpp - platform_frameworks_base

blob: dfa92d79515435828efd876955473dbe7f07b8d6 [file] [log] [blame]

Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2015 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	17	#include "util/Util.h"
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	18
				19	#include <algorithm>
				20	#include <ostream>
				21	#include <string>
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	22	#include <vector>
				23
Adam Lesinski	d5083f6	2017-01-16 15:07:21 -0800	[diff] [blame]	24	#include "androidfw/StringPiece.h"
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	25	#include "utils/Unicode.h"
Adam Lesinski	d5083f6	2017-01-16 15:07:21 -0800	[diff] [blame]	26
				27	#include "util/BigBuffer.h"
				28	#include "util/Maybe.h"
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	29	#include "util/Utf8Iterator.h"
Adam Lesinski	d5083f6	2017-01-16 15:07:21 -0800	[diff] [blame]	30
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	31	using ::android::StringPiece;
				32	using ::android::StringPiece16;
Adam Lesinski	d5083f6	2017-01-16 15:07:21 -0800	[diff] [blame]	33
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	34	namespace aapt {
				35	namespace util {
				36
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	37	static std::vector<std::string> SplitAndTransform(
				38	const StringPiece& str, char sep, const std::function<char(char)>& f) {
				39	std::vector<std::string> parts;
				40	const StringPiece::const_iterator end = std::end(str);
				41	StringPiece::const_iterator start = std::begin(str);
				42	StringPiece::const_iterator current;
				43	do {
				44	current = std::find(start, end, sep);
Adam Lesinski	d5083f6	2017-01-16 15:07:21 -0800	[diff] [blame]	45	parts.emplace_back(str.substr(start, current).to_string());
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	46	if (f) {
				47	std::string& part = parts.back();
				48	std::transform(part.begin(), part.end(), part.begin(), f);
				49	}
				50	start = current + 1;
				51	} while (current != end);
				52	return parts;
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	53	}
				54
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	55	std::vector<std::string> Split(const StringPiece& str, char sep) {
				56	return SplitAndTransform(str, sep, nullptr);
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	57	}
				58
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	59	std::vector<std::string> SplitAndLowercase(const StringPiece& str, char sep) {
				60	return SplitAndTransform(str, sep, ::tolower);
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	61	}
				62
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	63	bool StartsWith(const StringPiece& str, const StringPiece& prefix) {
				64	if (str.size() < prefix.size()) {
				65	return false;
				66	}
				67	return str.substr(0, prefix.size()) == prefix;
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	68	}
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	69
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	70	bool EndsWith(const StringPiece& str, const StringPiece& suffix) {
				71	if (str.size() < suffix.size()) {
				72	return false;
				73	}
				74	return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	75	}
				76
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	77	StringPiece TrimWhitespace(const StringPiece& str) {
				78	if (str.size() == 0 \|\| str.data() == nullptr) {
				79	return str;
				80	}
Adam Lesinski	3b4cd94	2015-10-30 16:31:42 -0700	[diff] [blame]	81
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	82	const char* start = str.data();
				83	const char* end = str.data() + str.length();
Adam Lesinski	3b4cd94	2015-10-30 16:31:42 -0700	[diff] [blame]	84
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	85	while (start != end && isspace(*start)) {
				86	start++;
				87	}
Adam Lesinski	3b4cd94	2015-10-30 16:31:42 -0700	[diff] [blame]	88
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	89	while (end != start && isspace(*(end - 1))) {
				90	end--;
				91	}
Adam Lesinski	3b4cd94	2015-10-30 16:31:42 -0700	[diff] [blame]	92
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	93	return StringPiece(start, end - start);
Adam Lesinski	3b4cd94	2015-10-30 16:31:42 -0700	[diff] [blame]	94	}
				95
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	96	StringPiece::const_iterator FindNonAlphaNumericAndNotInSet(
				97	const StringPiece& str, const StringPiece& allowed_chars) {
				98	const auto end_iter = str.end();
				99	for (auto iter = str.begin(); iter != end_iter; ++iter) {
				100	char c = *iter;
				101	if ((c >= u'a' && c <= u'z') \|\| (c >= u'A' && c <= u'Z') \|\|
				102	(c >= u'0' && c <= u'9')) {
				103	continue;
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	104	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	105
				106	bool match = false;
				107	for (char i : allowed_chars) {
				108	if (c == i) {
				109	match = true;
				110	break;
				111	}
				112	}
				113
				114	if (!match) {
				115	return iter;
				116	}
				117	}
				118	return end_iter;
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	119	}
				120
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	121	bool IsJavaClassName(const StringPiece& str) {
				122	size_t pieces = 0;
				123	for (const StringPiece& piece : Tokenize(str, '.')) {
				124	pieces++;
				125	if (piece.empty()) {
				126	return false;
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	127	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	128
				129	// Can't have starting or trailing $ character.
				130	if (piece.data()[0] == '$' \|\| piece.data()[piece.size() - 1] == '$') {
				131	return false;
				132	}
				133
				134	if (FindNonAlphaNumericAndNotInSet(piece, "$_") != piece.end()) {
				135	return false;
				136	}
				137	}
				138	return pieces >= 2;
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	139	}
				140
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	141	bool IsJavaPackageName(const StringPiece& str) {
				142	if (str.empty()) {
				143	return false;
				144	}
				145
				146	size_t pieces = 0;
				147	for (const StringPiece& piece : Tokenize(str, '.')) {
				148	pieces++;
				149	if (piece.empty()) {
				150	return false;
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	151	}
				152
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	153	if (piece.data()[0] == '_' \|\| piece.data()[piece.size() - 1] == '_') {
				154	return false;
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	155	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	156
				157	if (FindNonAlphaNumericAndNotInSet(piece, "_") != piece.end()) {
				158	return false;
				159	}
				160	}
				161	return pieces >= 1;
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	162	}
				163
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	164	Maybe<std::string> GetFullyQualifiedClassName(const StringPiece& package,
				165	const StringPiece& classname) {
				166	if (classname.empty()) {
				167	return {};
				168	}
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	169
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	170	if (util::IsJavaClassName(classname)) {
Adam Lesinski	d5083f6	2017-01-16 15:07:21 -0800	[diff] [blame]	171	return classname.to_string();
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	172	}
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	173
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	174	if (package.empty()) {
				175	return {};
				176	}
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	177
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	178	std::string result(package.data(), package.size());
				179	if (classname.data()[0] != '.') {
				180	result += '.';
				181	}
Adam Lesinski	52364f7	2016-01-11 13:10:24 -0800	[diff] [blame]	182
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	183	result.append(classname.data(), classname.size());
				184	if (!IsJavaClassName(result)) {
				185	return {};
				186	}
				187	return result;
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	188	}
				189
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	190	static size_t ConsumeDigits(const char* start, const char* end) {
				191	const char* c = start;
				192	for (; c != end && c >= '0' && c <= '9'; c++) {
				193	}
				194	return static_cast<size_t>(c - start);
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	195	}
				196
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	197	bool VerifyJavaStringFormat(const StringPiece& str) {
				198	const char* c = str.begin();
				199	const char* const end = str.end();
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	200
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	201	size_t arg_count = 0;
				202	bool nonpositional = false;
				203	while (c != end) {
				204	if (*c == '%' && c + 1 < end) {
				205	c++;
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	206
Adam Lesinski	b9f0548	2017-06-02 16:32:37 -0700	[diff] [blame]	207	if (c == '%' \|\| c == 'n') {
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	208	c++;
				209	continue;
				210	}
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	211
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	212	arg_count++;
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	213
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	214	size_t num_digits = ConsumeDigits(c, end);
				215	if (num_digits > 0) {
				216	c += num_digits;
				217	if (c != end && *c != '$') {
				218	// The digits were a size, but not a positional argument.
				219	nonpositional = true;
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	220	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	221	} else if (*c == '<') {
				222	// Reusing last argument, bad idea since positions can be moved around
				223	// during translation.
				224	nonpositional = true;
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	225
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	226	c++;
				227
				228	// Optionally we can have a $ after
				229	if (c != end && *c == '$') {
				230	c++;
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	231	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	232	} else {
				233	nonpositional = true;
				234	}
				235
				236	// Ignore size, width, flags, etc.
				237	while (c != end && (c == '-' \|\| c == '#' \|\| c == '+' \|\| c == ' ' \|\|
				238	c == ',' \|\| c == '(' \|\| (c >= '0' && c <= '9'))) {
				239	c++;
				240	}
				241
				242	/*
				243	* This is a shortcut to detect strings that are going to Time.format()
				244	* instead of String.format()
				245	*
				246	* Comparison of String.format() and Time.format() args:
				247	*
				248	* String: ABC E GH ST X abcdefgh nost x
				249	* Time: DEFGHKMS W Za d hkm s w yz
				250	*
				251	* Therefore we know it's definitely Time if we have:
				252	* DFKMWZkmwyz
				253	*/
				254	if (c != end) {
				255	switch (*c) {
				256	case 'D':
				257	case 'F':
				258	case 'K':
				259	case 'M':
				260	case 'W':
				261	case 'Z':
				262	case 'k':
				263	case 'm':
				264	case 'w':
				265	case 'y':
				266	case 'z':
				267	return true;
				268	}
				269	}
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	270	}
				271
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	272	if (c != end) {
				273	c++;
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	274	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	275	}
				276
				277	if (arg_count > 1 && nonpositional) {
				278	// Multiple arguments were specified, but some or all were non positional.
				279	// Translated
				280	// strings may rearrange the order of the arguments, which will break the
				281	// string.
				282	return false;
				283	}
				284	return true;
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	285	}
				286
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	287	static bool AppendCodepointToUtf8String(char32_t codepoint, std::string* output) {
				288	ssize_t len = utf32_to_utf8_length(&codepoint, 1);
				289	if (len < 0) {
				290	return false;
				291	}
				292
				293	const size_t start_append_pos = output->size();
				294
				295	// Make room for the next character.
				296	output->resize(output->size() + len);
				297
				298	char* dst = &*(output->begin() + start_append_pos);
				299	utf32_to_utf8(&codepoint, 1, dst, len + 1);
				300	return true;
				301	}
				302
				303	static bool AppendUnicodeCodepoint(Utf8Iterator* iter, std::string* output) {
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	304	char32_t code = 0;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	305	for (size_t i = 0; i < 4 && iter->HasNext(); i++) {
				306	char32_t codepoint = iter->Next();
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	307	char32_t a;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	308	if (codepoint >= U'0' && codepoint <= U'9') {
				309	a = codepoint - U'0';
				310	} else if (codepoint >= U'a' && codepoint <= U'f') {
				311	a = codepoint - U'a' + 10;
				312	} else if (codepoint >= U'A' && codepoint <= U'F') {
				313	a = codepoint - U'A' + 10;
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	314	} else {
				315	return {};
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	316	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	317	code = (code << 4) \| a;
				318	}
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	319	return AppendCodepointToUtf8String(code, output);
				320	}
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	321
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	322	static bool IsCodepointSpace(char32_t codepoint) {
				323	if (static_cast<uint32_t>(codepoint) & 0xffffff00u) {
				324	return false;
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	325	}
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	326	return isspace(static_cast<char>(codepoint));
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	327	}
				328
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	329	StringBuilder& StringBuilder::Append(const StringPiece& str) {
				330	if (!error_.empty()) {
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	331	return *this;
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	332	}
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	333
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	334	// Where the new data will be appended to.
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	335	const size_t new_data_index = str_.size();
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	336
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	337	Utf8Iterator iter(str);
				338	while (iter.HasNext()) {
				339	const char32_t codepoint = iter.Next();
				340
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	341	if (last_char_was_escape_) {
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	342	switch (codepoint) {
				343	case U't':
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	344	str_ += '\t';
				345	break;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	346
				347	case U'n':
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	348	str_ += '\n';
				349	break;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	350
				351	case U'#':
				352	case U'@':
				353	case U'?':
				354	case U'"':
				355	case U'\'':
				356	case U'\\':
				357	str_ += static_cast<char>(codepoint);
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	358	break;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	359
				360	case U'u':
				361	if (!AppendUnicodeCodepoint(&iter, &str_)) {
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	362	error_ = "invalid unicode escape sequence";
				363	return *this;
				364	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	365	break;
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	366
				367	default:
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	368	// Ignore the escape character and just include the codepoint.
				369	AppendCodepointToUtf8String(codepoint, &str_);
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	370	break;
				371	}
				372	last_char_was_escape_ = false;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	373
				374	} else if (codepoint == U'"') {
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	375	if (!quote_ && trailing_space_) {
				376	// We found an opening quote, and we have
				377	// trailing space, so we should append that
				378	// space now.
				379	if (trailing_space_) {
				380	// We had trailing whitespace, so
				381	// replace with a single space.
				382	if (!str_.empty()) {
				383	str_ += ' ';
				384	}
				385	trailing_space_ = false;
				386	}
				387	}
				388	quote_ = !quote_;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	389
				390	} else if (codepoint == U'\'' && !quote_) {
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	391	// This should be escaped.
				392	error_ = "unescaped apostrophe";
				393	return *this;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	394
				395	} else if (codepoint == U'\\') {
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	396	// This is an escape sequence, convert to the real value.
				397	if (!quote_ && trailing_space_) {
				398	// We had trailing whitespace, so
				399	// replace with a single space.
				400	if (!str_.empty()) {
				401	str_ += ' ';
				402	}
				403	trailing_space_ = false;
				404	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	405	last_char_was_escape_ = true;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	406	} else {
				407	if (quote_) {
				408	// Quotes mean everything is taken, including whitespace.
				409	AppendCodepointToUtf8String(codepoint, &str_);
				410	} else {
				411	// This is not quoted text, so we will accumulate whitespace and only emit a single
				412	// character of whitespace if it is followed by a non-whitespace character.
				413	if (IsCodepointSpace(codepoint)) {
				414	// We found whitespace.
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	415	trailing_space_ = true;
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	416	} else {
				417	if (trailing_space_) {
				418	// We saw trailing space before, so replace all
				419	// that trailing space with one space.
				420	if (!str_.empty()) {
				421	str_ += ' ';
				422	}
				423	trailing_space_ = false;
				424	}
				425	AppendCodepointToUtf8String(codepoint, &str_);
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	426	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	427	}
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	428	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	429	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	430
				431	// Accumulate the added string's UTF-16 length.
Adam Lesinski	549e437	2017-06-27 18:39:07 -0700	[diff] [blame^]	432	ssize_t len = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(str_.data()) + new_data_index,
				433	str_.size() - new_data_index);
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	434	if (len < 0) {
				435	error_ = "invalid unicode code point";
				436	return *this;
				437	}
				438	utf16_len_ += len;
				439	return *this;
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	440	}
				441
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	442	std::u16string Utf8ToUtf16(const StringPiece& utf8) {
				443	ssize_t utf16_length = utf8_to_utf16_length(
				444	reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
				445	if (utf16_length <= 0) {
				446	return {};
				447	}
				448
				449	std::u16string utf16;
				450	utf16.resize(utf16_length);
				451	utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(),
				452	&*utf16.begin(), utf16_length + 1);
				453	return utf16;
				454	}
				455
				456	std::string Utf16ToUtf8(const StringPiece16& utf16) {
				457	ssize_t utf8_length = utf16_to_utf8_length(utf16.data(), utf16.length());
				458	if (utf8_length <= 0) {
				459	return {};
				460	}
				461
				462	std::string utf8;
				463	utf8.resize(utf8_length);
				464	utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin(), utf8_length + 1);
				465	return utf8;
				466	}
				467
				468	bool WriteAll(std::ostream& out, const BigBuffer& buffer) {
				469	for (const auto& b : buffer) {
				470	if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
				471	return false;
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	472	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	473	}
				474	return true;
				475	}
				476
				477	std::unique_ptr<uint8_t[]> Copy(const BigBuffer& buffer) {
				478	std::unique_ptr<uint8_t[]> data =
				479	std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
				480	uint8_t* p = data.get();
				481	for (const auto& block : buffer) {
				482	memcpy(p, block.buffer.get(), block.size);
				483	p += block.size;
				484	}
				485	return data;
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	486	}
				487
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	488	typename Tokenizer::iterator& Tokenizer::iterator::operator++() {
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	489	const char* start = token_.end();
				490	const char* end = str_.end();
				491	if (start == end) {
				492	end_ = true;
				493	token_.assign(token_.end(), 0);
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	494	return *this;
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	495	}
				496
				497	start += 1;
				498	const char* current = start;
				499	while (current != end) {
				500	if (*current == separator_) {
				501	token_.assign(start, current - start);
				502	return *this;
				503	}
				504	++current;
				505	}
				506	token_.assign(start, end - start);
				507	return *this;
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	508	}
				509
				510	bool Tokenizer::iterator::operator==(const iterator& rhs) const {
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	511	// We check equality here a bit differently.
				512	// We need to know that the addresses are the same.
				513	return token_.begin() == rhs.token_.begin() &&
				514	token_.end() == rhs.token_.end() && end_ == rhs.end_;
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	515	}
				516
				517	bool Tokenizer::iterator::operator!=(const iterator& rhs) const {
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	518	return !(*this == rhs);
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	519	}
				520
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	521	Tokenizer::iterator::iterator(StringPiece s, char sep, StringPiece tok,
				522	bool end)
				523	: str_(s), separator_(sep), token_(tok), end_(end) {}
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	524
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	525	Tokenizer::Tokenizer(StringPiece str, char sep)
				526	: begin_(++iterator(str, sep, StringPiece(str.begin() - 1, 0), false)),
				527	end_(str, sep, StringPiece(str.end(), 0), true) {}
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	528
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	529	bool ExtractResFilePathParts(const StringPiece& path, StringPiece* out_prefix,
				530	StringPiece* out_entry, StringPiece* out_suffix) {
				531	const StringPiece res_prefix("res/");
				532	if (!StartsWith(path, res_prefix)) {
				533	return false;
				534	}
				535
				536	StringPiece::const_iterator last_occurence = path.end();
				537	for (auto iter = path.begin() + res_prefix.size(); iter != path.end();
				538	++iter) {
				539	if (*iter == '/') {
				540	last_occurence = iter;
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	541	}
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	542	}
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	543
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	544	if (last_occurence == path.end()) {
				545	return false;
				546	}
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	547
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	548	auto iter = std::find(last_occurence, path.end(), '.');
				549	*out_suffix = StringPiece(iter, path.end() - iter);
				550	*out_entry = StringPiece(last_occurence + 1, iter - last_occurence - 1);
				551	*out_prefix = StringPiece(path.begin(), last_occurence - path.begin() + 1);
				552	return true;
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	553	}
				554
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	555	StringPiece16 GetString16(const android::ResStringPool& pool, size_t idx) {
				556	size_t len;
				557	const char16_t* str = pool.stringAt(idx, &len);
				558	if (str != nullptr) {
				559	return StringPiece16(str, len);
				560	}
				561	return StringPiece16();
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	562	}
				563
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	564	std::string GetString(const android::ResStringPool& pool, size_t idx) {
				565	size_t len;
				566	const char* str = pool.string8At(idx, &len);
				567	if (str != nullptr) {
				568	return std::string(str, len);
				569	}
				570	return Utf16ToUtf8(GetString16(pool, idx));
Adam Lesinski	d0f116b	2016-07-08 15:00:32 -0700	[diff] [blame]	571	}
				572
Adam Lesinski	ce5e56e	2016-10-21 17:56:45 -0700	[diff] [blame]	573	} // namespace util
				574	} // namespace aapt