blob: c0c3ab883a427fd733ce0b5918c48f05f225cddc [file] [log] [blame]
Roozbeh Pournaderb927c552016-01-15 11:23:42 -08001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <cstdint>
18#include <cstdlib>
19#include <cstring>
20#include <string>
21#include <unordered_map>
22#include <unordered_set>
23
24#include <androidfw/LocaleData.h>
25
26namespace android {
27
28#include "LocaleDataTables.cpp"
29
30inline uint32_t packLocale(const char* language, const char* region) {
31 return (((uint8_t) language[0]) << 24u) | (((uint8_t) language[1]) << 16u) |
32 (((uint8_t) region[0]) << 8u) | ((uint8_t) region[1]);
33}
34
35inline uint32_t dropRegion(uint32_t packed_locale) {
36 return packed_locale & 0xFFFF0000lu;
37}
38
39inline bool hasRegion(uint32_t packed_locale) {
40 return (packed_locale & 0x0000FFFFlu) != 0;
41}
42
43const size_t SCRIPT_LENGTH = 4;
44const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]);
45const uint32_t PACKED_ROOT = 0; // to represent the root locale
46
47uint32_t findParent(uint32_t packed_locale, const char* script) {
48 if (hasRegion(packed_locale)) {
49 for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) {
50 if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) {
51 auto map = SCRIPT_PARENTS[i].map;
52 auto lookup_result = map->find(packed_locale);
53 if (lookup_result != map->end()) {
54 return lookup_result->second;
55 }
56 break;
57 }
58 }
59 return dropRegion(packed_locale);
60 }
61 return PACKED_ROOT;
62}
63
64// Find the ancestors of a locale, and fill 'out' with it (assumes out has enough
65// space). If any of the members of stop_list was seen, write it in the
66// output but stop afterwards.
67//
68// This also outputs the index of the last written ancestor in the stop_list
69// to stop_list_index, which will be -1 if it is not found in the stop_list.
70//
71// Returns the number of ancestors written in the output, which is always
72// at least one.
73size_t findAncestors(uint32_t* out, ssize_t* stop_list_index,
74 uint32_t packed_locale, const char* script,
75 const uint32_t* stop_list, size_t stop_set_length) {
76 uint32_t ancestor = packed_locale;
77 size_t count = 0;
78 do {
79 out[count++] = ancestor;
80 for (size_t i = 0; i < stop_set_length; i++) {
81 if (stop_list[i] == ancestor) {
82 *stop_list_index = (ssize_t) i;
83 return count;
84 }
85 }
86 ancestor = findParent(ancestor, script);
87 } while (ancestor != PACKED_ROOT);
88 *stop_list_index = (ssize_t) -1;
89 return count;
90}
91
92size_t findDistance(uint32_t supported,
93 const char* script,
94 const uint32_t* request_ancestors,
95 size_t request_ancestors_count) {
96 uint32_t supported_ancestors[MAX_PARENT_DEPTH+1];
97 ssize_t request_ancestors_index;
98 const size_t supported_ancestor_count = findAncestors(
99 supported_ancestors, &request_ancestors_index,
100 supported, script,
101 request_ancestors, request_ancestors_count);
102 // Since both locales share the same root, there will always be a shared
103 // ancestor, so the distance in the parent tree is the sum of the distance
104 // of 'supported' to the lowest common ancestor (number of ancestors
105 // written for 'supported' minus 1) plus the distance of 'request' to the
106 // lowest common ancestor (the index of the ancestor in request_ancestors).
107 return supported_ancestor_count + request_ancestors_index - 1;
108}
109
110inline bool isRepresentative(uint32_t language_and_region, const char* script) {
111 const uint64_t packed_locale = (
112 (((uint64_t) language_and_region) << 32u) |
113 (((uint64_t) script[0]) << 24u) |
114 (((uint64_t) script[1]) << 16u) |
115 (((uint64_t) script[2]) << 8u) |
116 ((uint64_t) script[3]));
117
118 return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0);
119}
120
121int localeDataCompareRegions(
122 const char* left_region, const char* right_region,
123 const char* requested_language, const char* requested_script,
124 const char* requested_region) {
125
126 if (left_region[0] == right_region[0] && left_region[1] == right_region[1]) {
127 return 0;
128 }
129 const uint32_t left = packLocale(requested_language, left_region);
130 const uint32_t right = packLocale(requested_language, right_region);
131 const uint32_t request = packLocale(requested_language, requested_region);
132
133 uint32_t request_ancestors[MAX_PARENT_DEPTH+1];
134 ssize_t left_right_index;
135 // Find the parents of the request, but stop as soon as we saw left or right
136 const uint32_t left_and_right[] = {left, right};
137 const size_t ancestor_count = findAncestors(
138 request_ancestors, &left_right_index,
139 request, requested_script,
140 left_and_right, sizeof(left_and_right)/sizeof(left_and_right[0]));
141 if (left_right_index == 0) { // We saw left earlier
142 return 1;
143 }
144 if (left_right_index == 1) { // We saw right earlier
145 return -1;
146 }
147
148 // If we are here, neither left nor right are an ancestor of the
149 // request. This means that all the ancestors have been computed and
150 // the last ancestor is just the language by itself. We will use the
151 // distance in the parent tree for determining the better match.
152 const size_t left_distance = findDistance(
153 left, requested_script, request_ancestors, ancestor_count);
154 const size_t right_distance = findDistance(
155 right, requested_script, request_ancestors, ancestor_count);
156 if (left_distance != right_distance) {
157 return (int) right_distance - (int) left_distance; // smaller distance is better
158 }
159
160 // If we are here, left and right are equidistant from the request. We will
161 // try and see if any of them is a representative locale.
162 const bool left_is_representative = isRepresentative(left, requested_script);
163 const bool right_is_representative = isRepresentative(right, requested_script);
164 if (left_is_representative != right_is_representative) {
165 return (int) left_is_representative - (int) right_is_representative;
166 }
167
168 // We have no way of figuring out which locale is a better match. For
169 // the sake of stability, we consider the locale with the lower region
170 // code (in dictionary order) better, with two-letter codes before
171 // three-digit codes (since two-letter codes are more specific).
172 return (int64_t) right - (int64_t) left;
173}
174
175void localeDataComputeScript(char out[4], const char* language, const char* region) {
176 if (language[0] == '\0') {
177 memset(out, '\0', SCRIPT_LENGTH);
178 return;
179 }
180 uint32_t lookup_key = packLocale(language, region);
181 auto lookup_result = LIKELY_SCRIPTS.find(lookup_key);
182 if (lookup_result == LIKELY_SCRIPTS.end()) {
183 // We couldn't find the locale. Let's try without the region
184 if (region[0] != '\0') {
185 lookup_key = dropRegion(lookup_key);
186 lookup_result = LIKELY_SCRIPTS.find(lookup_key);
187 if (lookup_result != LIKELY_SCRIPTS.end()) {
188 memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
189 return;
190 }
191 }
192 // We don't know anything about the locale
193 memset(out, '\0', SCRIPT_LENGTH);
194 return;
195 } else {
196 // We found the locale.
197 memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
198 }
199}
200
201} // namespace android