Blame - tools/aapt2/util/Util.cpp - platform_frameworks_base

blob: 7b0c71d93bb5d09b8dac6c99242d86b582d7d51b [file] [log] [blame]

Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2015 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	17	#include "util/BigBuffer.h"
				18	#include "util/Maybe.h"
				19	#include "util/StringPiece.h"
				20	#include "util/Util.h"
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	21
				22	#include <algorithm>
				23	#include <ostream>
				24	#include <string>
				25	#include <utils/Unicode.h>
				26	#include <vector>
				27
				28	namespace aapt {
				29	namespace util {
				30
				31	static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep,
				32	const std::function<char(char)>& f) {
				33	std::vector<std::string> parts;
				34	const StringPiece::const_iterator end = std::end(str);
				35	StringPiece::const_iterator start = std::begin(str);
				36	StringPiece::const_iterator current;
				37	do {
				38	current = std::find(start, end, sep);
				39	parts.emplace_back(str.substr(start, current).toString());
				40	if (f) {
				41	std::string& part = parts.back();
				42	std::transform(part.begin(), part.end(), part.begin(), f);
				43	}
				44	start = current + 1;
				45	} while (current != end);
				46	return parts;
				47	}
				48
				49	std::vector<std::string> split(const StringPiece& str, char sep) {
				50	return splitAndTransform(str, sep, nullptr);
				51	}
				52
				53	std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) {
				54	return splitAndTransform(str, sep, ::tolower);
				55	}
				56
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	57	StringPiece16 trimWhitespace(const StringPiece16& str) {
				58	if (str.size() == 0 \|\| str.data() == nullptr) {
				59	return str;
				60	}
				61
				62	const char16_t* start = str.data();
				63	const char16_t* end = str.data() + str.length();
				64
				65	while (start != end && util::isspace16(*start)) {
				66	start++;
				67	}
				68
				69	while (end != start && util::isspace16(*(end - 1))) {
				70	end--;
				71	}
				72
				73	return StringPiece16(start, end - start);
				74	}
				75
Adam Lesinski	3b4cd94	2015-10-30 16:31:42 -0700	[diff] [blame]	76	StringPiece trimWhitespace(const StringPiece& str) {
				77	if (str.size() == 0 \|\| str.data() == nullptr) {
				78	return str;
				79	}
				80
				81	const char* start = str.data();
				82	const char* end = str.data() + str.length();
				83
				84	while (start != end && isspace(*start)) {
				85	start++;
				86	}
				87
				88	while (end != start && isspace(*(end - 1))) {
				89	end--;
				90	}
				91
				92	return StringPiece(start, end - start);
				93	}
				94
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	95	StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str,
				96	const StringPiece16& allowedChars) {
				97	const auto endIter = str.end();
				98	for (auto iter = str.begin(); iter != endIter; ++iter) {
				99	char16_t c = *iter;
				100	if ((c >= u'a' && c <= u'z') \|\|
				101	(c >= u'A' && c <= u'Z') \|\|
				102	(c >= u'0' && c <= u'9')) {
				103	continue;
				104	}
				105
				106	bool match = false;
				107	for (char16_t i : allowedChars) {
				108	if (c == i) {
				109	match = true;
				110	break;
				111	}
				112	}
				113
				114	if (!match) {
				115	return iter;
				116	}
				117	}
				118	return endIter;
				119	}
				120
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	121	bool isJavaClassName(const StringPiece16& str) {
				122	size_t pieces = 0;
				123	for (const StringPiece16& piece : tokenize(str, u'.')) {
				124	pieces++;
				125	if (piece.empty()) {
				126	return false;
				127	}
				128
				129	// Can't have starting or trailing $ character.
				130	if (piece.data()[0] == u'$' \|\| piece.data()[piece.size() - 1] == u'$') {
				131	return false;
				132	}
				133
				134	if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) {
				135	return false;
				136	}
				137	}
				138	return pieces >= 2;
				139	}
				140
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	141	bool isJavaPackageName(const StringPiece16& str) {
				142	if (str.empty()) {
				143	return false;
				144	}
				145
				146	size_t pieces = 0;
				147	for (const StringPiece16& piece : tokenize(str, u'.')) {
				148	pieces++;
				149	if (piece.empty()) {
				150	return false;
				151	}
				152
				153	if (piece.data()[0] == u'_' \|\| piece.data()[piece.size() - 1] == u'_') {
				154	return false;
				155	}
				156
				157	if (findNonAlphaNumericAndNotInSet(piece, u"_") != piece.end()) {
				158	return false;
				159	}
				160	}
				161	return pieces >= 1;
				162	}
				163
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	164	Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package,
				165	const StringPiece16& className) {
				166	if (className.empty()) {
				167	return {};
				168	}
				169
				170	if (util::isJavaClassName(className)) {
				171	return className.toString();
				172	}
				173
				174	if (package.empty()) {
				175	return {};
				176	}
				177
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	178	if (className.data()[0] != u'.') {
Adam Lesinski	52364f7	2016-01-11 13:10:24 -0800	[diff] [blame^]	179	return {};
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	180	}
Adam Lesinski	52364f7	2016-01-11 13:10:24 -0800	[diff] [blame^]	181
				182	std::u16string result(package.data(), package.size());
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	183	result.append(className.data(), className.size());
				184	if (!isJavaClassName(result)) {
				185	return {};
				186	}
				187	return result;
				188	}
				189
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame]	190	static size_t consumeDigits(const char16_t* start, const char16_t* end) {
				191	const char16_t* c = start;
				192	for (; c != end && c >= u'0' && c <= u'9'; c++) {}
				193	return static_cast<size_t>(c - start);
				194	}
				195
				196	bool verifyJavaStringFormat(const StringPiece16& str) {
				197	const char16_t* c = str.begin();
				198	const char16_t* const end = str.end();
				199
				200	size_t argCount = 0;
				201	bool nonpositional = false;
				202	while (c != end) {
				203	if (*c == u'%' && c + 1 < end) {
				204	c++;
				205
				206	if (*c == u'%') {
				207	c++;
				208	continue;
				209	}
				210
				211	argCount++;
				212
				213	size_t numDigits = consumeDigits(c, end);
				214	if (numDigits > 0) {
				215	c += numDigits;
				216	if (c != end && *c != u'$') {
				217	// The digits were a size, but not a positional argument.
				218	nonpositional = true;
				219	}
				220	} else if (*c == u'<') {
				221	// Reusing last argument, bad idea since positions can be moved around
				222	// during translation.
				223	nonpositional = true;
				224
				225	c++;
				226
				227	// Optionally we can have a $ after
				228	if (c != end && *c == u'$') {
				229	c++;
				230	}
				231	} else {
				232	nonpositional = true;
				233	}
				234
				235	// Ignore size, width, flags, etc.
				236	while (c != end && (*c == u'-' \|\|
				237	*c == u'#' \|\|
				238	*c == u'+' \|\|
				239	*c == u' ' \|\|
				240	*c == u',' \|\|
				241	*c == u'(' \|\|
				242	(c >= u'0' && c <= '9'))) {
				243	c++;
				244	}
				245
				246	/*
				247	* This is a shortcut to detect strings that are going to Time.format()
				248	* instead of String.format()
				249	*
				250	* Comparison of String.format() and Time.format() args:
				251	*
				252	* String: ABC E GH ST X abcdefgh nost x
				253	* Time: DEFGHKMS W Za d hkm s w yz
				254	*
				255	* Therefore we know it's definitely Time if we have:
				256	* DFKMWZkmwyz
				257	*/
				258	if (c != end) {
				259	switch (*c) {
				260	case 'D':
				261	case 'F':
				262	case 'K':
				263	case 'M':
				264	case 'W':
				265	case 'Z':
				266	case 'k':
				267	case 'm':
				268	case 'w':
				269	case 'y':
				270	case 'z':
				271	return true;
				272	}
				273	}
				274	}
				275
				276	if (c != end) {
				277	c++;
				278	}
				279	}
				280
				281	if (argCount > 1 && nonpositional) {
				282	// Multiple arguments were specified, but some or all were non positional. Translated
				283	// strings may rearrange the order of the arguments, which will break the string.
				284	return false;
				285	}
				286	return true;
				287	}
				288
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	289	static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) {
				290	char16_t code = 0;
				291	for (size_t i = 0; i < 4 && start != end; i++, (start)++) {
				292	char16_t c = **start;
				293	int a;
				294	if (c >= '0' && c <= '9') {
				295	a = c - '0';
				296	} else if (c >= 'a' && c <= 'f') {
				297	a = c - 'a' + 10;
				298	} else if (c >= 'A' && c <= 'F') {
				299	a = c - 'A' + 10;
				300	} else {
				301	return make_nothing<char16_t>();
				302	}
				303	code = (code << 4) \| a;
				304	}
				305	return make_value(code);
				306	}
				307
				308	StringBuilder& StringBuilder::append(const StringPiece16& str) {
				309	if (!mError.empty()) {
				310	return *this;
				311	}
				312
				313	const char16_t* const end = str.end();
				314	const char16_t* start = str.begin();
				315	const char16_t* current = start;
				316	while (current != end) {
Adam Lesinski	9095988	2015-07-06 18:09:18 -0700	[diff] [blame]	317	if (mLastCharWasEscape) {
				318	switch (*current) {
				319	case u't':
				320	mStr += u'\t';
				321	break;
				322	case u'n':
				323	mStr += u'\n';
				324	break;
				325	case u'#':
				326	mStr += u'#';
				327	break;
				328	case u'@':
				329	mStr += u'@';
				330	break;
				331	case u'?':
				332	mStr += u'?';
				333	break;
				334	case u'"':
				335	mStr += u'"';
				336	break;
				337	case u'\'':
				338	mStr += u'\'';
				339	break;
				340	case u'\\':
				341	mStr += u'\\';
				342	break;
				343	case u'u': {
				344	current++;
				345	Maybe<char16_t> c = parseUnicodeCodepoint(&current, end);
				346	if (!c) {
				347	mError = "invalid unicode escape sequence";
				348	return *this;
				349	}
				350	mStr += c.value();
				351	current -= 1;
				352	break;
				353	}
				354
				355	default:
				356	// Ignore.
				357	break;
				358	}
				359	mLastCharWasEscape = false;
				360	start = current + 1;
				361	} else if (*current == u'"') {
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	362	if (!mQuote && mTrailingSpace) {
				363	// We found an opening quote, and we have
				364	// trailing space, so we should append that
				365	// space now.
				366	if (mTrailingSpace) {
				367	// We had trailing whitespace, so
				368	// replace with a single space.
				369	if (!mStr.empty()) {
				370	mStr += u' ';
				371	}
				372	mTrailingSpace = false;
				373	}
				374	}
				375	mQuote = !mQuote;
				376	mStr.append(start, current - start);
				377	start = current + 1;
				378	} else if (*current == u'\'' && !mQuote) {
				379	// This should be escaped.
				380	mError = "unescaped apostrophe";
				381	return *this;
				382	} else if (*current == u'\\') {
				383	// This is an escape sequence, convert to the real value.
				384	if (!mQuote && mTrailingSpace) {
				385	// We had trailing whitespace, so
				386	// replace with a single space.
				387	if (!mStr.empty()) {
				388	mStr += u' ';
				389	}
				390	mTrailingSpace = false;
				391	}
				392	mStr.append(start, current - start);
				393	start = current + 1;
Adam Lesinski	9095988	2015-07-06 18:09:18 -0700	[diff] [blame]	394	mLastCharWasEscape = true;
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	395	} else if (!mQuote) {
				396	// This is not quoted text, so look for whitespace.
				397	if (isspace16(*current)) {
				398	// We found whitespace, see if we have seen some
				399	// before.
				400	if (!mTrailingSpace) {
				401	// We didn't see a previous adjacent space,
				402	// so mark that we did.
				403	mTrailingSpace = true;
				404	mStr.append(start, current - start);
				405	}
				406
				407	// Keep skipping whitespace.
				408	start = current + 1;
				409	} else if (mTrailingSpace) {
				410	// We saw trailing space before, so replace all
				411	// that trailing space with one space.
				412	if (!mStr.empty()) {
				413	mStr += u' ';
				414	}
				415	mTrailingSpace = false;
				416	}
				417	}
				418	current++;
				419	}
				420	mStr.append(start, end - start);
				421	return *this;
				422	}
				423
				424	std::u16string utf8ToUtf16(const StringPiece& utf8) {
				425	ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()),
				426	utf8.length());
				427	if (utf16Length <= 0) {
				428	return {};
				429	}
				430
				431	std::u16string utf16;
				432	utf16.resize(utf16Length);
				433	utf8_to_utf16(reinterpret_cast<const uint8_t>(utf8.data()), utf8.length(), &utf16.begin());
				434	return utf16;
				435	}
				436
				437	std::string utf16ToUtf8(const StringPiece16& utf16) {
				438	ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length());
				439	if (utf8Length <= 0) {
				440	return {};
				441	}
				442
				443	std::string utf8;
				444	utf8.resize(utf8Length);
				445	utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin());
				446	return utf8;
				447	}
				448
				449	bool writeAll(std::ostream& out, const BigBuffer& buffer) {
				450	for (const auto& b : buffer) {
				451	if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
				452	return false;
				453	}
				454	}
				455	return true;
				456	}
				457
				458	std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) {
				459	std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
				460	uint8_t* p = data.get();
				461	for (const auto& block : buffer) {
				462	memcpy(p, block.buffer.get(), block.size);
				463	p += block.size;
				464	}
				465	return data;
				466	}
				467
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	468	bool extractResFilePathParts(const StringPiece16& path, StringPiece16* outPrefix,
				469	StringPiece16* outEntry, StringPiece16* outSuffix) {
				470	if (!stringStartsWith<char16_t>(path, u"res/")) {
				471	return false;
				472	}
				473
				474	StringPiece16::const_iterator lastOccurence = path.end();
				475	for (auto iter = path.begin() + StringPiece16(u"res/").size(); iter != path.end(); ++iter) {
				476	if (*iter == u'/') {
				477	lastOccurence = iter;
				478	}
				479	}
				480
				481	if (lastOccurence == path.end()) {
				482	return false;
				483	}
				484
				485	auto iter = std::find(lastOccurence, path.end(), u'.');
				486	*outSuffix = StringPiece16(iter, path.end() - iter);
				487	*outEntry = StringPiece16(lastOccurence + 1, iter - lastOccurence - 1);
				488	*outPrefix = StringPiece16(path.begin(), lastOccurence - path.begin() + 1);
				489	return true;
				490	}
				491
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	492	} // namespace util
				493	} // namespace aapt