blob: d03c811b29b742833ee86c93fdd93aecdaa5b9c1 [file] [log] [blame]
The Android Open Source Project54b6cfa2008-10-21 07:00:00 -07001#include "SourcePos.h"
2#include "ValuesFile.h"
3#include "XLIFFFile.h"
4#include "Perforce.h"
5#include "merge_res_and_xliff.h"
6#include "localize.h"
7#include "file_utils.h"
8#include "res_check.h"
9#include "xmb.h"
10
11#include <host/pseudolocalize.h>
12
13#include <stdarg.h>
14#include <sstream>
15#include <stdio.h>
16#include <string.h>
17
18using namespace std;
19
20FILE* g_logFile = NULL;
21
22int test();
23
24int
25read_settings(const string& filename, map<string,Settings>* result, const string& rootDir)
26{
27 XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
28 if (root == NULL) {
29 SourcePos(filename, -1).Error("Error reading file.");
30 return 1;
31 }
32
33 // <configuration>
34 vector<XMLNode*> configNodes = root->GetElementsByName("", "configuration");
35 const size_t I = configNodes.size();
36 for (size_t i=0; i<I; i++) {
37 const XMLNode* configNode = configNodes[i];
38
39 Settings settings;
40 settings.id = configNode->GetAttribute("", "id", "");
41 if (settings.id == "") {
42 configNode->Position().Error("<configuration> needs an id attribute.");
43 delete root;
44 return 1;
45 }
46
47 settings.oldVersion = configNode->GetAttribute("", "old-cl", "");
48
49 settings.currentVersion = configNode->GetAttribute("", "new-cl", "");
50 if (settings.currentVersion == "") {
51 configNode->Position().Error("<configuration> needs a new-cl attribute.");
52 delete root;
53 return 1;
54 }
55
56 // <app>
57 vector<XMLNode*> appNodes = configNode->GetElementsByName("", "app");
58
59 const size_t J = appNodes.size();
60 for (size_t j=0; j<J; j++) {
61 const XMLNode* appNode = appNodes[j];
62
63 string dir = appNode->GetAttribute("", "dir", "");
64 if (dir == "") {
65 appNode->Position().Error("<app> needs a dir attribute.");
66 delete root;
67 return 1;
68 }
69
70 settings.apps.push_back(dir);
71 }
72
73 // <reject>
74 vector<XMLNode*> rejectNodes = configNode->GetElementsByName("", "reject");
75
76 const size_t K = rejectNodes.size();
77 for (size_t k=0; k<K; k++) {
78 const XMLNode* rejectNode = rejectNodes[k];
79
80 Reject reject;
81
82 reject.file = rejectNode->GetAttribute("", "file", "");
83 if (reject.file == "") {
84 rejectNode->Position().Error("<reject> needs a file attribute.");
85 delete root;
86 return 1;
87 }
88 string f = reject.file;
89 reject.file = rootDir;
90 reject.file += '/';
91 reject.file += f;
92
93 reject.name = rejectNode->GetAttribute("", "name", "");
94 if (reject.name == "") {
95 rejectNode->Position().Error("<reject> needs a name attribute.");
96 delete root;
97 return 1;
98 }
99
100 reject.comment = trim_string(rejectNode->CollapseTextContents());
101
102 settings.reject.push_back(reject);
103 }
104
105 (*result)[settings.id] = settings;
106 }
107
108 delete root;
109 return 0;
110}
111
112
113static void
114ValuesFile_to_XLIFFFile(const ValuesFile* values, XLIFFFile* xliff, const string& englishFilename)
115{
116 const set<StringResource>& strings = values->GetStrings();
117 for (set<StringResource>::const_iterator it=strings.begin(); it!=strings.end(); it++) {
118 StringResource res = *it;
119 res.file = englishFilename;
120 xliff->AddStringResource(res);
121 }
122}
123
124static bool
125contains_reject(const Settings& settings, const string& file, const TransUnit& tu)
126{
127 const string name = tu.id;
128 const vector<Reject>& reject = settings.reject;
129 const size_t I = reject.size();
130 for (size_t i=0; i<I; i++) {
131 const Reject& r = reject[i];
132 if (r.file == file && r.name == name) {
133 return true;
134 }
135 }
136 return false;
137}
138
139/**
140 * If it's been rejected, then we keep whatever info we have.
141 *
142 * Implements this truth table:
143 *
144 * S AT AS Keep
145 * -----------------------
146 * 0 0 0 0 (this case can't happen)
147 * 0 0 1 0 (it was there, never translated, and removed)
148 * 0 1 0 0 (somehow it got translated, but it was removed)
149 * 0 1 1 0 (it was removed after having been translated)
150 *
151 * 1 0 0 1 (it was just added)
152 * 1 0 1 1 (it was added, has been changed, but it never got translated)
153 * 1 1 0 1 (somehow it got translated, but we don't know based on what)
154 * 1 1 1 0/1 (it's in both. 0 if S=AS b/c there's no need to retranslate if they're
155 * the same. 1 if S!=AS because S changed, so it should be retranslated)
156 *
157 * The first four are cases where, whatever happened in the past, the string isn't there
158 * now, so it shouldn't be in the XLIFF file.
159 *
160 * For cases 4 and 5, the string has never been translated, so get it translated.
161 *
162 * For case 6, it's unclear where the translated version came from, so we're conservative
163 * and send it back for them to have another shot at.
164 *
165 * For case 7, we have some data. We have two choices. We could rely on the translator's
166 * translation memory or tools to notice that the strings haven't changed, and populate the
167 * <target> field themselves. Or if the string hasn't changed since last time, we can just
168 * not even tell them about it. As the project nears the end, it will be convenient to see
169 * the xliff files reducing in size, so we pick the latter. Obviously, if the string has
170 * changed, then we need to get it retranslated.
171 */
172bool
173keep_this_trans_unit(const string& file, const TransUnit& unit, void* cookie)
174{
175 const Settings* settings = reinterpret_cast<const Settings*>(cookie);
176
177 if (contains_reject(*settings, file, unit)) {
178 return true;
179 }
180
181 if (unit.source.id == "") {
182 return false;
183 }
184 if (unit.altTarget.id == "" || unit.altSource.id == "") {
185 return true;
186 }
187 return unit.source.value->ContentsToString(XLIFF_NAMESPACES)
188 != unit.altSource.value->ContentsToString(XLIFF_NAMESPACES);
189}
190
191int
192validate_config(const string& settingsFile, const map<string,Settings>& settings,
193 const string& config)
194{
195 if (settings.find(config) == settings.end()) {
196 SourcePos(settingsFile, -1).Error("settings file does not contain setting: %s\n",
197 config.c_str());
198 return 1;
199 }
200 return 0;
201}
202
203int
204validate_configs(const string& settingsFile, const map<string,Settings>& settings,
205 const vector<string>& configs)
206{
207 int err = 0;
208 for (size_t i=0; i<configs.size(); i++) {
209 string config = configs[i];
210 err |= validate_config(settingsFile, settings, config);
211 }
212 return err;
213}
214
215int
216select_files(vector<string> *resFiles, const string& config,
217 const map<string,Settings>& settings, const string& rootDir)
218{
219 int err;
220 vector<vector<string> > allResFiles;
221 vector<string> configs;
222 configs.push_back(config);
223 err = select_files(&allResFiles, configs, settings, rootDir);
224 if (err == 0) {
225 *resFiles = allResFiles[0];
226 }
227 return err;
228}
229
230int
231select_files(vector<vector<string> > *allResFiles, const vector<string>& configs,
232 const map<string,Settings>& settings, const string& rootDir)
233{
234 int err;
235 printf("Selecting files...");
236 fflush(stdout);
237
238 for (size_t i=0; i<configs.size(); i++) {
239 const string& config = configs[i];
240 const Settings& setting = settings.find(config)->second;
241
242 vector<string> resFiles;
243 err = Perforce::GetResourceFileNames(setting.currentVersion, rootDir,
244 setting.apps, &resFiles, true);
245 if (err != 0) {
246 fprintf(stderr, "error with perforce. bailing\n");
247 return err;
248 }
249
250 allResFiles->push_back(resFiles);
251 }
252 return 0;
253}
254
255static int
256do_export(const string& settingsFile, const string& rootDir, const string& outDir,
257 const string& targetLocale, const vector<string>& configs)
258{
259 bool success = true;
260 int err;
261
262 if (false) {
263 printf("settingsFile=%s\n", settingsFile.c_str());
264 printf("rootDir=%s\n", rootDir.c_str());
265 printf("outDir=%s\n", outDir.c_str());
266 for (size_t i=0; i<configs.size(); i++) {
267 printf("config[%zd]=%s\n", i, configs[i].c_str());
268 }
269 }
270
271 map<string,Settings> settings;
272 err = read_settings(settingsFile, &settings, rootDir);
273 if (err != 0) {
274 return err;
275 }
276
277 err = validate_configs(settingsFile, settings, configs);
278 if (err != 0) {
279 return err;
280 }
281
282 vector<vector<string> > allResFiles;
283 err = select_files(&allResFiles, configs, settings, rootDir);
284 if (err != 0) {
285 return err;
286 }
287
288 size_t totalFileCount = 0;
289 for (size_t i=0; i<allResFiles.size(); i++) {
290 totalFileCount += allResFiles[i].size();
291 }
292 totalFileCount *= 3; // we try all 3 versions of the file
293
294 size_t fileProgress = 0;
295 vector<Stats> stats;
296 vector<pair<string,XLIFFFile*> > xliffs;
297
298 for (size_t i=0; i<configs.size(); i++) {
299 const string& config = configs[i];
300 const Settings& setting = settings[config];
301
302 if (false) {
303 fprintf(stderr, "Configuration: %s (%zd of %zd)\n", config.c_str(), i+1,
304 configs.size());
305 fprintf(stderr, " Old CL: %s\n", setting.oldVersion.c_str());
306 fprintf(stderr, " Current CL: %s\n", setting.currentVersion.c_str());
307 }
308
309 Configuration english;
310 english.locale = "en_US";
311 Configuration translated;
312 translated.locale = targetLocale;
313 XLIFFFile* xliff = XLIFFFile::Create(english, translated, setting.currentVersion);
314
315 const vector<string>& resFiles = allResFiles[i];
316 const size_t J = resFiles.size();
317 for (size_t j=0; j<J; j++) {
318 string resFile = resFiles[j];
319
320 // parse the files into a ValuesFile
321 // pull out the strings and add them to the XLIFFFile
322
323 // current file
324 print_file_status(++fileProgress, totalFileCount);
325 ValuesFile* currentFile = get_values_file(resFile, english, CURRENT_VERSION,
326 setting.currentVersion, true);
327 if (currentFile != NULL) {
328 ValuesFile_to_XLIFFFile(currentFile, xliff, resFile);
329 //printf("currentFile=[%s]\n", currentFile->ToString().c_str());
330 } else {
331 fprintf(stderr, "error reading file %s@%s\n", resFile.c_str(),
332 setting.currentVersion.c_str());
333 success = false;
334 }
335
336 // old file
337 print_file_status(++fileProgress, totalFileCount);
338 ValuesFile* oldFile = get_values_file(resFile, english, OLD_VERSION,
339 setting.oldVersion, false);
340 if (oldFile != NULL) {
341 ValuesFile_to_XLIFFFile(oldFile, xliff, resFile);
342 //printf("oldFile=[%s]\n", oldFile->ToString().c_str());
343 }
344
345 // translated version
346 // (get the head of the tree for the most recent translation, but it's considered
347 // the old one because the "current" one hasn't been made yet, and this goes into
348 // the <alt-trans> tag if necessary
349 print_file_status(++fileProgress, totalFileCount);
350 string transFilename = translated_file_name(resFile, targetLocale);
351 ValuesFile* transFile = get_values_file(transFilename, translated, OLD_VERSION,
352 setting.currentVersion, false);
353 if (transFile != NULL) {
354 ValuesFile_to_XLIFFFile(transFile, xliff, resFile);
355 }
356
357 delete currentFile;
358 delete oldFile;
359 delete transFile;
360 }
361
362 Stats beforeFilterStats = xliff->GetStats(config);
363
364 // run through the XLIFFFile and strip out TransUnits that have identical
365 // old and current source values and are not in the reject list, or just
366 // old values and no source values
367 xliff->Filter(keep_this_trans_unit, (void*)&setting);
368
369 Stats afterFilterStats = xliff->GetStats(config);
370 afterFilterStats.totalStrings = beforeFilterStats.totalStrings;
371
372 // add the reject comments
373 for (vector<Reject>::const_iterator reject = setting.reject.begin();
374 reject != setting.reject.end(); reject++) {
375 TransUnit* tu = xliff->EditTransUnit(reject->file, reject->name);
376 tu->rejectComment = reject->comment;
377 }
378
379 // config-locale-current_cl.xliff
380 stringstream filename;
381 if (outDir != "") {
382 filename << outDir << '/';
383 }
384 filename << config << '-' << targetLocale << '-' << setting.currentVersion << ".xliff";
385 xliffs.push_back(pair<string,XLIFFFile*>(filename.str(), xliff));
386
387 stats.push_back(afterFilterStats);
388 }
389
390 // today is a good day to die
391 if (!success || SourcePos::HasErrors()) {
392 return 1;
393 }
394
395 // write the XLIFF files
396 printf("\nWriting %zd file%s...\n", xliffs.size(), xliffs.size() == 1 ? "" : "s");
397 for (vector<pair<string,XLIFFFile*> >::iterator it = xliffs.begin(); it != xliffs.end(); it++) {
398 const string& filename = it->first;
399 XLIFFFile* xliff = it->second;
400 string text = xliff->ToString();
401 write_to_file(filename, text);
402 }
403
404 // the stats
405 printf("\n"
406 " to without total\n"
407 " config files translate comments strings\n"
408 "-----------------------------------------------------------------------\n");
409 Stats totals;
410 totals.config = "total";
411 totals.files = 0;
412 totals.toBeTranslated = 0;
413 totals.noComments = 0;
414 totals.totalStrings = 0;
415 for (vector<Stats>::iterator it=stats.begin(); it!=stats.end(); it++) {
416 string cfg = it->config;
417 if (cfg.length() > 20) {
418 cfg.resize(20);
419 }
420 printf(" %-20s %-9zd %-9zd %-9zd %-19zd\n", cfg.c_str(), it->files,
421 it->toBeTranslated, it->noComments, it->totalStrings);
422 totals.files += it->files;
423 totals.toBeTranslated += it->toBeTranslated;
424 totals.noComments += it->noComments;
425 totals.totalStrings += it->totalStrings;
426 }
427 if (stats.size() > 1) {
428 printf("-----------------------------------------------------------------------\n"
429 " %-20s %-9zd %-9zd %-9zd %-19zd\n", totals.config.c_str(), totals.files,
430 totals.toBeTranslated, totals.noComments, totals.totalStrings);
431 }
432 printf("\n");
433 return 0;
434}
435
436struct PseudolocalizeSettings {
437 XLIFFFile* xliff;
438 bool expand;
439};
440
441
442string
443pseudolocalize_string(const string& source, const PseudolocalizeSettings* settings)
444{
445 return pseudolocalize_string(source);
446}
447
448static XMLNode*
449pseudolocalize_xml_node(const XMLNode* source, const PseudolocalizeSettings* settings)
450{
451 if (source->Type() == XMLNode::TEXT) {
452 return XMLNode::NewText(source->Position(), pseudolocalize_string(source->Text(), settings),
453 source->Pretty());
454 } else {
455 XMLNode* target;
456 if (source->Namespace() == XLIFF_XMLNS && source->Name() == "g") {
457 // XXX don't translate these
458 target = XMLNode::NewElement(source->Position(), source->Namespace(),
459 source->Name(), source->Attributes(), source->Pretty());
460 } else {
461 target = XMLNode::NewElement(source->Position(), source->Namespace(),
462 source->Name(), source->Attributes(), source->Pretty());
463 }
464
465 const vector<XMLNode*>& children = source->Children();
466 const size_t I = children.size();
467 for (size_t i=0; i<I; i++) {
468 target->EditChildren().push_back(pseudolocalize_xml_node(children[i], settings));
469 }
470
471 return target;
472 }
473}
474
475void
476pseudolocalize_trans_unit(const string&file, TransUnit* unit, void* cookie)
477{
478 const PseudolocalizeSettings* settings = (PseudolocalizeSettings*)cookie;
479
480 const StringResource& source = unit->source;
481 StringResource* target = &unit->target;
482 *target = source;
483
484 target->config = settings->xliff->TargetConfig();
485
486 delete target->value;
487 target->value = pseudolocalize_xml_node(source.value, settings);
488}
489
490int
491pseudolocalize_xliff(XLIFFFile* xliff, bool expand)
492{
493 PseudolocalizeSettings settings;
494
495 settings.xliff = xliff;
496 settings.expand = expand;
497 xliff->Map(pseudolocalize_trans_unit, &settings);
498 return 0;
499}
500
501static int
502do_pseudo(const string& infile, const string& outfile, bool expand)
503{
504 int err;
505
506 XLIFFFile* xliff = XLIFFFile::Parse(infile);
507 if (xliff == NULL) {
508 return 1;
509 }
510
511 pseudolocalize_xliff(xliff, expand);
512
513 err = write_to_file(outfile, xliff->ToString());
514
515 delete xliff;
516
517 return err;
518}
519
520void
521log_printf(const char *fmt, ...)
522{
523 int ret;
524 va_list ap;
525
526 if (g_logFile != NULL) {
527 va_start(ap, fmt);
528 ret = vfprintf(g_logFile, fmt, ap);
529 va_end(ap);
530 fflush(g_logFile);
531 }
532}
533
534void
535close_log_file()
536{
537 if (g_logFile != NULL) {
538 fclose(g_logFile);
539 }
540}
541
542void
543open_log_file(const char* file)
544{
545 g_logFile = fopen(file, "w");
546 printf("log file: %s -- %p\n", file, g_logFile);
547 atexit(close_log_file);
548}
549
550static int
551usage()
552{
553 fprintf(stderr,
554 "usage: localize export OPTIONS CONFIGS...\n"
555 " REQUIRED OPTIONS\n"
556 " --settings SETTINGS The settings file to use. See CONFIGS below.\n"
557 " --root TREE_ROOT The location in Perforce of the files. e.g. //device\n"
558 " --target LOCALE The target locale. See LOCALES below.\n"
559 "\n"
560 " OPTIONAL OPTIONS\n"
561 " --out DIR Directory to put the output files. Defaults to the\n"
562 " current directory if not supplied. Files are\n"
563 " named as follows:\n"
564 " CONFIG-LOCALE-CURRENT_CL.xliff\n"
565 "\n"
566 "\n"
567 "usage: localize import XLIFF_FILE...\n"
568 "\n"
569 "Import a translated XLIFF file back into the tree.\n"
570 "\n"
571 "\n"
572 "usage: localize xlb XMB_FILE VALUES_FILES...\n"
573 "\n"
574 "Read resource files from the tree file and write the corresponding XLB file\n"
575 "\n"
576 "Supply all of the android resource files (values files) to export after that.\n"
577 "\n"
578 "\n"
579 "\n"
580 "CONFIGS\n"
581 "\n"
582 "LOCALES\n"
583 "Locales are specified in the form en_US They will be processed correctly\n"
584 "to locate the resouce files in the tree.\n"
585 "\n"
586 "\n"
587 "usage: localize pseudo OPTIONS INFILE [OUTFILE]\n"
588 " OPTIONAL OPTIONS\n"
589 " --big Pad strings so they get longer.\n"
590 "\n"
591 "Read INFILE, an XLIFF file, and output a pseudotranslated version of that file. If\n"
592 "OUTFILE is specified, the results are written there; otherwise, the results are\n"
593 "written back to INFILE.\n"
594 "\n"
595 "\n"
596 "usage: localize rescheck FILES...\n"
597 "\n"
598 "Reads the base strings and prints warnings about bad resources from the given files.\n"
599 "\n");
600 return 1;
601}
602
603int
604main(int argc, const char** argv)
605{
606 //open_log_file("log.txt");
607 //g_logFile = stdout;
608
609 if (argc == 2 && 0 == strcmp(argv[1], "--test")) {
610 return test();
611 }
612
613 if (argc < 2) {
614 return usage();
615 }
616
617 int index = 1;
618
619 if (0 == strcmp("export", argv[index])) {
620 string settingsFile;
621 string rootDir;
622 string outDir;
623 string baseLocale = "en";
624 string targetLocale;
625 string language, region;
626 vector<string> configs;
627
628 index++;
629 while (index < argc) {
630 if (0 == strcmp("--settings", argv[index])) {
631 settingsFile = argv[index+1];
632 index += 2;
633 }
634 else if (0 == strcmp("--root", argv[index])) {
635 rootDir = argv[index+1];
636 index += 2;
637 }
638 else if (0 == strcmp("--out", argv[index])) {
639 outDir = argv[index+1];
640 index += 2;
641 }
642 else if (0 == strcmp("--target", argv[index])) {
643 targetLocale = argv[index+1];
644 index += 2;
645 }
646 else if (argv[index][0] == '-') {
647 fprintf(stderr, "unknown argument %s\n", argv[index]);
648 return usage();
649 }
650 else {
651 break;
652 }
653 }
654 for (; index<argc; index++) {
655 configs.push_back(argv[index]);
656 }
657
658 if (settingsFile == "" || rootDir == "" || configs.size() == 0 || targetLocale == "") {
659 return usage();
660 }
661 if (!split_locale(targetLocale, &language, &region)) {
662 fprintf(stderr, "illegal --target locale: '%s'\n", targetLocale.c_str());
663 return usage();
664 }
665
666
667 return do_export(settingsFile, rootDir, outDir, targetLocale, configs);
668 }
669 else if (0 == strcmp("import", argv[index])) {
670 vector<string> xliffFilenames;
671
672 index++;
673 for (; index<argc; index++) {
674 xliffFilenames.push_back(argv[index]);
675 }
676
677 return do_merge(xliffFilenames);
678 }
679 else if (0 == strcmp("xlb", argv[index])) {
680 string outfile;
681 vector<string> resFiles;
682
683 index++;
684 if (argc < index+1) {
685 return usage();
686 }
687
688 outfile = argv[index];
689
690 index++;
691 for (; index<argc; index++) {
692 resFiles.push_back(argv[index]);
693 }
694
695 return do_xlb_export(outfile, resFiles);
696 }
697 else if (0 == strcmp("pseudo", argv[index])) {
698 string infile;
699 string outfile;
700 bool big = false;
701
702 index++;
703 while (index < argc) {
704 if (0 == strcmp("--big", argv[index])) {
705 big = true;
706 index += 1;
707 }
708 else if (argv[index][0] == '-') {
709 fprintf(stderr, "unknown argument %s\n", argv[index]);
710 return usage();
711 }
712 else {
713 break;
714 }
715 }
716
717 if (index == argc-1) {
718 infile = argv[index];
719 outfile = argv[index];
720 }
721 else if (index == argc-2) {
722 infile = argv[index];
723 outfile = argv[index+1];
724 }
725 else {
726 fprintf(stderr, "unknown argument %s\n", argv[index]);
727 return usage();
728 }
729
730 return do_pseudo(infile, outfile, big);
731 }
732 else if (0 == strcmp("rescheck", argv[index])) {
733 vector<string> files;
734
735 index++;
736 while (index < argc) {
737 if (argv[index][0] == '-') {
738 fprintf(stderr, "unknown argument %s\n", argv[index]);
739 return usage();
740 }
741 else {
742 break;
743 }
744 }
745 for (; index<argc; index++) {
746 files.push_back(argv[index]);
747 }
748
749 if (files.size() == 0) {
750 return usage();
751 }
752
753 return do_rescheck(files);
754 }
755 else {
756 return usage();
757 }
758
759 if (SourcePos::HasErrors()) {
760 SourcePos::PrintErrors(stderr);
761 return 1;
762 }
763
764 return 0;
765}
766