Blame - tools/aapt2/util/Util.cpp - platform_frameworks_base

blob: 59b838587a6acfffc89699f779d7b4d8e7238616 [file] [log] [blame]

Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2015 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	17	#include "util/BigBuffer.h"
				18	#include "util/Maybe.h"
				19	#include "util/StringPiece.h"
				20	#include "util/Util.h"
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	21
				22	#include <algorithm>
				23	#include <ostream>
				24	#include <string>
				25	#include <utils/Unicode.h>
				26	#include <vector>
				27
				28	namespace aapt {
				29	namespace util {
				30
Adam Lesinski	24aad16	2015-04-24 19:19:30 -0700	[diff] [blame]	31	constexpr const char16_t* kSchemaAuto = u"http://schemas.android.com/apk/res-auto";
				32	constexpr const char16_t* kSchemaPrefix = u"http://schemas.android.com/apk/res/";
				33
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	34	static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep,
				35	const std::function<char(char)>& f) {
				36	std::vector<std::string> parts;
				37	const StringPiece::const_iterator end = std::end(str);
				38	StringPiece::const_iterator start = std::begin(str);
				39	StringPiece::const_iterator current;
				40	do {
				41	current = std::find(start, end, sep);
				42	parts.emplace_back(str.substr(start, current).toString());
				43	if (f) {
				44	std::string& part = parts.back();
				45	std::transform(part.begin(), part.end(), part.begin(), f);
				46	}
				47	start = current + 1;
				48	} while (current != end);
				49	return parts;
				50	}
				51
				52	std::vector<std::string> split(const StringPiece& str, char sep) {
				53	return splitAndTransform(str, sep, nullptr);
				54	}
				55
				56	std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) {
				57	return splitAndTransform(str, sep, ::tolower);
				58	}
				59
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	60	StringPiece16 trimWhitespace(const StringPiece16& str) {
				61	if (str.size() == 0 \|\| str.data() == nullptr) {
				62	return str;
				63	}
				64
				65	const char16_t* start = str.data();
				66	const char16_t* end = str.data() + str.length();
				67
				68	while (start != end && util::isspace16(*start)) {
				69	start++;
				70	}
				71
				72	while (end != start && util::isspace16(*(end - 1))) {
				73	end--;
				74	}
				75
				76	return StringPiece16(start, end - start);
				77	}
				78
Adam Lesinski	3b4cd94	2015-10-30 16:31:42 -0700	[diff] [blame]	79	StringPiece trimWhitespace(const StringPiece& str) {
				80	if (str.size() == 0 \|\| str.data() == nullptr) {
				81	return str;
				82	}
				83
				84	const char* start = str.data();
				85	const char* end = str.data() + str.length();
				86
				87	while (start != end && isspace(*start)) {
				88	start++;
				89	}
				90
				91	while (end != start && isspace(*(end - 1))) {
				92	end--;
				93	}
				94
				95	return StringPiece(start, end - start);
				96	}
				97
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	98	StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str,
				99	const StringPiece16& allowedChars) {
				100	const auto endIter = str.end();
				101	for (auto iter = str.begin(); iter != endIter; ++iter) {
				102	char16_t c = *iter;
				103	if ((c >= u'a' && c <= u'z') \|\|
				104	(c >= u'A' && c <= u'Z') \|\|
				105	(c >= u'0' && c <= u'9')) {
				106	continue;
				107	}
				108
				109	bool match = false;
				110	for (char16_t i : allowedChars) {
				111	if (c == i) {
				112	match = true;
				113	break;
				114	}
				115	}
				116
				117	if (!match) {
				118	return iter;
				119	}
				120	}
				121	return endIter;
				122	}
				123
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	124	bool isJavaClassName(const StringPiece16& str) {
				125	size_t pieces = 0;
				126	for (const StringPiece16& piece : tokenize(str, u'.')) {
				127	pieces++;
				128	if (piece.empty()) {
				129	return false;
				130	}
				131
				132	// Can't have starting or trailing $ character.
				133	if (piece.data()[0] == u'$' \|\| piece.data()[piece.size() - 1] == u'$') {
				134	return false;
				135	}
				136
				137	if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) {
				138	return false;
				139	}
				140	}
				141	return pieces >= 2;
				142	}
				143
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	144	bool isJavaPackageName(const StringPiece16& str) {
				145	if (str.empty()) {
				146	return false;
				147	}
				148
				149	size_t pieces = 0;
				150	for (const StringPiece16& piece : tokenize(str, u'.')) {
				151	pieces++;
				152	if (piece.empty()) {
				153	return false;
				154	}
				155
				156	if (piece.data()[0] == u'_' \|\| piece.data()[piece.size() - 1] == u'_') {
				157	return false;
				158	}
				159
				160	if (findNonAlphaNumericAndNotInSet(piece, u"_") != piece.end()) {
				161	return false;
				162	}
				163	}
				164	return pieces >= 1;
				165	}
				166
Adam Lesinski	a1ad4a8	2015-06-08 11:41:09 -0700	[diff] [blame]	167	Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package,
				168	const StringPiece16& className) {
				169	if (className.empty()) {
				170	return {};
				171	}
				172
				173	if (util::isJavaClassName(className)) {
				174	return className.toString();
				175	}
				176
				177	if (package.empty()) {
				178	return {};
				179	}
				180
				181	std::u16string result(package.data(), package.size());
				182	if (className.data()[0] != u'.') {
				183	result += u'.';
				184	}
				185	result.append(className.data(), className.size());
				186	if (!isJavaClassName(result)) {
				187	return {};
				188	}
				189	return result;
				190	}
				191
Adam Lesinski	b23f1e0	2015-11-03 12:24:17 -0800	[diff] [blame^]	192	static size_t consumeDigits(const char16_t* start, const char16_t* end) {
				193	const char16_t* c = start;
				194	for (; c != end && c >= u'0' && c <= u'9'; c++) {}
				195	return static_cast<size_t>(c - start);
				196	}
				197
				198	bool verifyJavaStringFormat(const StringPiece16& str) {
				199	const char16_t* c = str.begin();
				200	const char16_t* const end = str.end();
				201
				202	size_t argCount = 0;
				203	bool nonpositional = false;
				204	while (c != end) {
				205	if (*c == u'%' && c + 1 < end) {
				206	c++;
				207
				208	if (*c == u'%') {
				209	c++;
				210	continue;
				211	}
				212
				213	argCount++;
				214
				215	size_t numDigits = consumeDigits(c, end);
				216	if (numDigits > 0) {
				217	c += numDigits;
				218	if (c != end && *c != u'$') {
				219	// The digits were a size, but not a positional argument.
				220	nonpositional = true;
				221	}
				222	} else if (*c == u'<') {
				223	// Reusing last argument, bad idea since positions can be moved around
				224	// during translation.
				225	nonpositional = true;
				226
				227	c++;
				228
				229	// Optionally we can have a $ after
				230	if (c != end && *c == u'$') {
				231	c++;
				232	}
				233	} else {
				234	nonpositional = true;
				235	}
				236
				237	// Ignore size, width, flags, etc.
				238	while (c != end && (*c == u'-' \|\|
				239	*c == u'#' \|\|
				240	*c == u'+' \|\|
				241	*c == u' ' \|\|
				242	*c == u',' \|\|
				243	*c == u'(' \|\|
				244	(c >= u'0' && c <= '9'))) {
				245	c++;
				246	}
				247
				248	/*
				249	* This is a shortcut to detect strings that are going to Time.format()
				250	* instead of String.format()
				251	*
				252	* Comparison of String.format() and Time.format() args:
				253	*
				254	* String: ABC E GH ST X abcdefgh nost x
				255	* Time: DEFGHKMS W Za d hkm s w yz
				256	*
				257	* Therefore we know it's definitely Time if we have:
				258	* DFKMWZkmwyz
				259	*/
				260	if (c != end) {
				261	switch (*c) {
				262	case 'D':
				263	case 'F':
				264	case 'K':
				265	case 'M':
				266	case 'W':
				267	case 'Z':
				268	case 'k':
				269	case 'm':
				270	case 'w':
				271	case 'y':
				272	case 'z':
				273	return true;
				274	}
				275	}
				276	}
				277
				278	if (c != end) {
				279	c++;
				280	}
				281	}
				282
				283	if (argCount > 1 && nonpositional) {
				284	// Multiple arguments were specified, but some or all were non positional. Translated
				285	// strings may rearrange the order of the arguments, which will break the string.
				286	return false;
				287	}
				288	return true;
				289	}
				290
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	291	static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) {
				292	char16_t code = 0;
				293	for (size_t i = 0; i < 4 && start != end; i++, (start)++) {
				294	char16_t c = **start;
				295	int a;
				296	if (c >= '0' && c <= '9') {
				297	a = c - '0';
				298	} else if (c >= 'a' && c <= 'f') {
				299	a = c - 'a' + 10;
				300	} else if (c >= 'A' && c <= 'F') {
				301	a = c - 'A' + 10;
				302	} else {
				303	return make_nothing<char16_t>();
				304	}
				305	code = (code << 4) \| a;
				306	}
				307	return make_value(code);
				308	}
				309
				310	StringBuilder& StringBuilder::append(const StringPiece16& str) {
				311	if (!mError.empty()) {
				312	return *this;
				313	}
				314
				315	const char16_t* const end = str.end();
				316	const char16_t* start = str.begin();
				317	const char16_t* current = start;
				318	while (current != end) {
Adam Lesinski	9095988	2015-07-06 18:09:18 -0700	[diff] [blame]	319	if (mLastCharWasEscape) {
				320	switch (*current) {
				321	case u't':
				322	mStr += u'\t';
				323	break;
				324	case u'n':
				325	mStr += u'\n';
				326	break;
				327	case u'#':
				328	mStr += u'#';
				329	break;
				330	case u'@':
				331	mStr += u'@';
				332	break;
				333	case u'?':
				334	mStr += u'?';
				335	break;
				336	case u'"':
				337	mStr += u'"';
				338	break;
				339	case u'\'':
				340	mStr += u'\'';
				341	break;
				342	case u'\\':
				343	mStr += u'\\';
				344	break;
				345	case u'u': {
				346	current++;
				347	Maybe<char16_t> c = parseUnicodeCodepoint(&current, end);
				348	if (!c) {
				349	mError = "invalid unicode escape sequence";
				350	return *this;
				351	}
				352	mStr += c.value();
				353	current -= 1;
				354	break;
				355	}
				356
				357	default:
				358	// Ignore.
				359	break;
				360	}
				361	mLastCharWasEscape = false;
				362	start = current + 1;
				363	} else if (*current == u'"') {
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	364	if (!mQuote && mTrailingSpace) {
				365	// We found an opening quote, and we have
				366	// trailing space, so we should append that
				367	// space now.
				368	if (mTrailingSpace) {
				369	// We had trailing whitespace, so
				370	// replace with a single space.
				371	if (!mStr.empty()) {
				372	mStr += u' ';
				373	}
				374	mTrailingSpace = false;
				375	}
				376	}
				377	mQuote = !mQuote;
				378	mStr.append(start, current - start);
				379	start = current + 1;
				380	} else if (*current == u'\'' && !mQuote) {
				381	// This should be escaped.
				382	mError = "unescaped apostrophe";
				383	return *this;
				384	} else if (*current == u'\\') {
				385	// This is an escape sequence, convert to the real value.
				386	if (!mQuote && mTrailingSpace) {
				387	// We had trailing whitespace, so
				388	// replace with a single space.
				389	if (!mStr.empty()) {
				390	mStr += u' ';
				391	}
				392	mTrailingSpace = false;
				393	}
				394	mStr.append(start, current - start);
				395	start = current + 1;
Adam Lesinski	9095988	2015-07-06 18:09:18 -0700	[diff] [blame]	396	mLastCharWasEscape = true;
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	397	} else if (!mQuote) {
				398	// This is not quoted text, so look for whitespace.
				399	if (isspace16(*current)) {
				400	// We found whitespace, see if we have seen some
				401	// before.
				402	if (!mTrailingSpace) {
				403	// We didn't see a previous adjacent space,
				404	// so mark that we did.
				405	mTrailingSpace = true;
				406	mStr.append(start, current - start);
				407	}
				408
				409	// Keep skipping whitespace.
				410	start = current + 1;
				411	} else if (mTrailingSpace) {
				412	// We saw trailing space before, so replace all
				413	// that trailing space with one space.
				414	if (!mStr.empty()) {
				415	mStr += u' ';
				416	}
				417	mTrailingSpace = false;
				418	}
				419	}
				420	current++;
				421	}
				422	mStr.append(start, end - start);
				423	return *this;
				424	}
				425
				426	std::u16string utf8ToUtf16(const StringPiece& utf8) {
				427	ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()),
				428	utf8.length());
				429	if (utf16Length <= 0) {
				430	return {};
				431	}
				432
				433	std::u16string utf16;
				434	utf16.resize(utf16Length);
				435	utf8_to_utf16(reinterpret_cast<const uint8_t>(utf8.data()), utf8.length(), &utf16.begin());
				436	return utf16;
				437	}
				438
				439	std::string utf16ToUtf8(const StringPiece16& utf16) {
				440	ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length());
				441	if (utf8Length <= 0) {
				442	return {};
				443	}
				444
				445	std::string utf8;
				446	utf8.resize(utf8Length);
				447	utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin());
				448	return utf8;
				449	}
				450
				451	bool writeAll(std::ostream& out, const BigBuffer& buffer) {
				452	for (const auto& b : buffer) {
				453	if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
				454	return false;
				455	}
				456	}
				457	return true;
				458	}
				459
				460	std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) {
				461	std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
				462	uint8_t* p = data.get();
				463	for (const auto& block : buffer) {
				464	memcpy(p, block.buffer.get(), block.size);
				465	p += block.size;
				466	}
				467	return data;
				468	}
				469
Adam Lesinski	24aad16	2015-04-24 19:19:30 -0700	[diff] [blame]	470	Maybe<std::u16string> extractPackageFromNamespace(const std::u16string& namespaceUri) {
				471	if (stringStartsWith<char16_t>(namespaceUri, kSchemaPrefix)) {
				472	StringPiece16 schemaPrefix = kSchemaPrefix;
				473	StringPiece16 package = namespaceUri;
				474	return package.substr(schemaPrefix.size(), package.size() - schemaPrefix.size())
				475	.toString();
				476	} else if (namespaceUri == kSchemaAuto) {
				477	return std::u16string();
				478	}
				479	return {};
				480	}
				481
Adam Lesinski	1ab598f	2015-08-14 14:26:04 -0700	[diff] [blame]	482	bool extractResFilePathParts(const StringPiece16& path, StringPiece16* outPrefix,
				483	StringPiece16* outEntry, StringPiece16* outSuffix) {
				484	if (!stringStartsWith<char16_t>(path, u"res/")) {
				485	return false;
				486	}
				487
				488	StringPiece16::const_iterator lastOccurence = path.end();
				489	for (auto iter = path.begin() + StringPiece16(u"res/").size(); iter != path.end(); ++iter) {
				490	if (*iter == u'/') {
				491	lastOccurence = iter;
				492	}
				493	}
				494
				495	if (lastOccurence == path.end()) {
				496	return false;
				497	}
				498
				499	auto iter = std::find(lastOccurence, path.end(), u'.');
				500	*outSuffix = StringPiece16(iter, path.end() - iter);
				501	*outEntry = StringPiece16(lastOccurence + 1, iter - lastOccurence - 1);
				502	*outPrefix = StringPiece16(path.begin(), lastOccurence - path.begin() + 1);
				503	return true;
				504	}
				505
Adam Lesinski	6f6ceb7	2014-11-14 14:48:12 -0800	[diff] [blame]	506	} // namespace util
				507	} // namespace aapt