/[soft]/drakwizard/trunk/data/intltool-merge
ViewVC logotype

Contents of /drakwizard/trunk/data/intltool-merge

Parent Directory Parent Directory | Revision Log Revision Log


Revision 486 - (show annotations) (download)
Tue Feb 8 00:14:32 2011 UTC (13 years, 2 months ago) by dmorgan
File size: 16167 byte(s)
Import cleaned drakwizard
1 #!/usr/bin/perl -w
2
3 #
4 # The Intltool Message Merger
5 #
6 # Copyright (C) 2000, 2002 Free Software Foundation.
7 # Copyright (C) 2000, 2001 Eazel, Inc
8 #
9 # Intltool is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # version 2 published by the Free Software Foundation.
12 #
13 # Intltool is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 #
22 # As a special exception to the GNU General Public License, if you
23 # distribute this file as part of a program that contains a
24 # configuration script generated by Autoconf, you may include it under
25 # the same distribution terms that you use for the rest of that program.
26 #
27 # Authors: Maciej Stachowiak <mjs@noisehavoc.org>
28 # Kenneth Christiansen <kenneth@gnu.org>
29 # Darin Adler <darin@bentspoon.com>
30 #
31 # Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
32 #
33
34 ## Release information
35 my $PROGRAM = "intltool-merge";
36 my $PACKAGE = "intltool";
37 my $VERSION = "0.22";
38
39 ## Loaded modules
40 use strict;
41 use Getopt::Long;
42
43 ## Scalars used by the option stuff
44 my $HELP_ARG = 0;
45 my $VERSION_ARG = 0;
46 my $BA_STYLE_ARG = 0;
47 my $XML_STYLE_ARG = 0;
48 my $KEYS_STYLE_ARG = 0;
49 my $DESKTOP_STYLE_ARG = 0;
50 my $SCHEMAS_STYLE_ARG = 0;
51 my $QUIET_ARG = 0;
52 my $PASS_THROUGH_ARG = 0;
53 my $UTF8_ARG = 0;
54 my $cache_file;
55
56 ## Handle options
57 GetOptions
58 (
59 "help" => \$HELP_ARG,
60 "version" => \$VERSION_ARG,
61 "quiet|q" => \$QUIET_ARG,
62 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
63 "ba-style|b" => \$BA_STYLE_ARG,
64 "xml-style|x" => \$XML_STYLE_ARG,
65 "keys-style|k" => \$KEYS_STYLE_ARG,
66 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
67 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
68 "pass-through|p" => \$PASS_THROUGH_ARG,
69 "utf8|u" => \$UTF8_ARG,
70 "cache|c=s" => \$cache_file
71 ) or &error;
72
73 my $PO_DIR;
74 my $FILE;
75 my $OUTFILE;
76
77 my %po_files_by_lang = ();
78 my %translations = ();
79
80 # Use this instead of \w for XML files to handle more possible characters.
81 my $w = "[-A-Za-z0-9._:]";
82
83 # XML quoted string contents
84 my $q = "[^\\\"]*";
85
86 ## Check for options.
87
88 if ($VERSION_ARG) {
89 &print_version;
90 } elsif ($HELP_ARG) {
91 &print_help;
92 } elsif ($BA_STYLE_ARG && @ARGV > 2) {
93 &preparation;
94 &print_message;
95 &ba_merge_translations;
96 &finalize;
97 } elsif ($XML_STYLE_ARG && @ARGV > 2) {
98 &utf8_sanity_check;
99 &preparation;
100 &print_message;
101 &xml_merge_translations;
102 &finalize;
103 } elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
104 &utf8_sanity_check;
105 &preparation;
106 &print_message;
107 &keys_merge_translations;
108 &finalize;
109 } elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
110 &preparation;
111 &print_message;
112 &desktop_merge_translations;
113 &finalize;
114 } elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) {
115 &preparation;
116 &print_message;
117 &schemas_merge_translations;
118 &finalize;
119 } else {
120 &print_help;
121 }
122
123 exit;
124
125 ## Sub for printing release information
126 sub print_version
127 {
128 print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
129 print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
130 print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
131 print "Copyright (C) 2000-2001 Eazel, Inc.\n";
132 print "This is free software; see the source for copying conditions. There is NO\n";
133 print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
134 exit;
135 }
136
137 ## Sub for printing usage information
138 sub print_help
139 {
140 print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
141 print "Generates an output file that includes translated versions of some attributes,\n";
142 print "from an untranslated source and a po directory that includes translations.\n\n";
143 print " -b, --ba-style includes translations in the bonobo-activation style\n";
144 print " -d, --desktop-style includes translations in the desktop style\n";
145 print " -k, --keys-style includes translations in the keys style\n";
146 print " -s, --schemas-style includes translations in the schemas style\n";
147 print " -x, --xml-style includes translations in the standard xml style\n";
148 print " -u, --utf8 convert all strings to UTF-8 before merging\n";
149 print " -p, --pass-through use strings as found in .po files, without\n";
150 print " conversion (STRONGLY unrecommended with -x)\n";
151 print " -q, --quiet suppress most messages\n";
152 print " --help display this help and exit\n";
153 print " --version output version information and exit\n";
154 print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
155 print "<xml-i18n-tools-list\@gnome.org>.\n";
156 exit;
157 }
158
159
160 ## Sub for printing error messages
161 sub print_error
162 {
163 print "Try `${PROGRAM} --help' for more information.\n";
164 exit;
165 }
166
167
168 sub print_message
169 {
170 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
171 }
172
173
174 sub preparation
175 {
176 $PO_DIR = $ARGV[0];
177 $FILE = $ARGV[1];
178 $OUTFILE = $ARGV[2];
179
180 &gather_po_files;
181 &get_translation_database;
182 }
183
184 # General-purpose code for looking up translations in .po files
185
186 sub po_file2lang
187 {
188 my ($tmp) = @_;
189 $tmp =~ s/^.*\/(.*)\.po$/$1/;
190 return $tmp;
191 }
192
193 sub gather_po_files
194 {
195 for my $po_file (glob "$PO_DIR/*.po") {
196 $po_files_by_lang{po_file2lang($po_file)} = $po_file;
197 }
198 }
199
200 sub get_po_encoding
201 {
202 my ($in_po_file) = @_;
203 my $encoding = "";
204
205 open IN_PO_FILE, $in_po_file or die;
206 while (<IN_PO_FILE>) {
207 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
208 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
209 $encoding = $1;
210 last;
211 }
212 }
213 close IN_PO_FILE;
214
215 if (!$encoding) {
216 print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
217 $encoding = "ISO-8859-1";
218 }
219 return $encoding
220 }
221
222 sub utf8_sanity_check
223 {
224 if (!$UTF8_ARG) {
225 if (!$PASS_THROUGH_ARG) {
226 $PASS_THROUGH_ARG="1";
227 }
228 }
229 }
230
231 sub get_translation_database
232 {
233 if ($cache_file) {
234 &get_cached_translation_database;
235 } else {
236 &create_translation_database;
237 }
238 }
239
240 sub get_newest_po_age
241 {
242 my $newest_age;
243
244 foreach my $file (values %po_files_by_lang) {
245 my $file_age = -M $file;
246 $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
247 }
248
249 return $newest_age;
250 }
251
252 sub create_cache
253 {
254 print "Generating and caching the translation database\n" unless $QUIET_ARG;
255
256 &create_translation_database;
257
258 open CACHE, ">$cache_file" || die;
259 print CACHE join "\x01", %translations;
260 close CACHE;
261 }
262
263 sub load_cache
264 {
265 print "Found cached translation database\n" unless $QUIET_ARG;
266
267 my $contents;
268 open CACHE, "<$cache_file" || die;
269 {
270 local $/;
271 $contents = <CACHE>;
272 }
273 close CACHE;
274 %translations = split "\x01", $contents;
275 }
276
277 sub get_cached_translation_database
278 {
279 my $cache_file_age = -M $cache_file;
280 if (defined $cache_file_age) {
281 if ($cache_file_age <= &get_newest_po_age) {
282 &load_cache;
283 return;
284 }
285 print "Found too-old cached translation database\n" unless $QUIET_ARG;
286 }
287
288 &create_cache;
289 }
290
291 sub create_translation_database
292 {
293 for my $lang (keys %po_files_by_lang) {
294 my $po_file = $po_files_by_lang{$lang};
295
296 if ($UTF8_ARG) {
297 my $encoding = get_po_encoding ($po_file);
298 if (lc $encoding eq "utf-8") {
299 open PO_FILE, "<$po_file";
300 } else {
301 my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
302 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
303 }
304 } else {
305 open PO_FILE, "<$po_file";
306 }
307
308 my $nextfuzzy = 0;
309 my $inmsgid = 0;
310 my $inmsgstr = 0;
311 my $msgid = "";
312 my $msgstr = "";
313 while (<PO_FILE>) {
314 $nextfuzzy = 1 if /^#, fuzzy/;
315 if (/^msgid "((\\.|[^\\])*)"/ ) {
316 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
317 $msgid = "";
318 $msgstr = "";
319
320 if ($nextfuzzy) {
321 $inmsgid = 0;
322 } else {
323 $msgid = unescape_po_string($1);
324 $inmsgid = 1;
325 }
326 $inmsgstr = 0;
327 $nextfuzzy = 0;
328 }
329 if (/^msgstr "((\\.|[^\\])*)"/) {
330 $msgstr = unescape_po_string($1);
331 $inmsgstr = 1;
332 $inmsgid = 0;
333 }
334 if (/^"((\\.|[^\\])*)"/) {
335 $msgid .= unescape_po_string($1) if $inmsgid;
336 $msgstr .= unescape_po_string($1) if $inmsgstr;
337 }
338 }
339 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
340 }
341 }
342
343 sub finalize
344 {
345 }
346
347 sub unescape_one_sequence
348 {
349 my ($sequence) = @_;
350
351 return "\\" if $sequence eq "\\\\";
352 return "\"" if $sequence eq "\\\"";
353
354 # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
355 # \xXX (hex) and has a comment saying they want to handle \u and \U.
356
357 return $sequence;
358 }
359
360 sub unescape_po_string
361 {
362 my ($string) = @_;
363
364 $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
365
366 return $string;
367 }
368
369 sub entity_decode
370 {
371 local ($_) = @_;
372
373 s/&apos;/'/g; # '
374 s/&quot;/"/g; # "
375 s/&amp;/&/g;
376
377 return $_;
378 }
379
380 sub entity_encode
381 {
382 my ($pre_encoded) = @_;
383
384 my @list_of_chars = unpack ('C*', $pre_encoded);
385
386 if ($PASS_THROUGH_ARG) {
387 return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
388 } else {
389 return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
390 }
391 }
392
393 sub entity_encode_int_minimalist
394 {
395 return "&quot;" if $_ == 34;
396 return "&amp;" if $_ == 38;
397 return "&apos;" if $_ == 39;
398 return chr $_;
399 }
400
401 sub entity_encode_int_even_high_bit
402 {
403 if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) {
404 # the ($_ > 127) should probably be removed
405 return "&#" . $_ . ";";
406 } else {
407 return chr $_;
408 }
409 }
410
411 sub entity_encoded_translation
412 {
413 my ($lang, $string) = @_;
414
415 my $translation = $translations{$lang, $string};
416 return $string if !$translation;
417 return entity_encode ($translation);
418 }
419
420 ## XML (bonobo-activation specific) merge code
421
422 sub ba_merge_translations
423 {
424 my $source;
425
426 {
427 local $/; # slurp mode
428 open INPUT, "<$FILE" or die "can't open $FILE: $!";
429 $source = <INPUT>;
430 close INPUT;
431 }
432
433 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
434
435 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
436 print OUTPUT $1;
437
438 my $node = $2 . "\n";
439
440 my @strings = ();
441 $_ = $node;
442 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
443 push @strings, entity_decode($3);
444 }
445 print OUTPUT;
446
447 my %langs;
448 for my $string (@strings) {
449 for my $lang (keys %po_files_by_lang) {
450 $langs{$lang} = 1 if $translations{$lang, $string};
451 }
452 }
453
454 for my $lang (sort keys %langs) {
455 $_ = $node;
456 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
457 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
458 print OUTPUT;
459 }
460 }
461
462 print OUTPUT $source;
463
464 close OUTPUT;
465 }
466
467
468 ## XML (non-bonobo-activation) merge code
469
470 sub xml_merge_translations
471 {
472 my $source;
473
474 {
475 local $/; # slurp mode
476 open INPUT, "<$FILE" or die "can't open $FILE: $!";
477 $source = <INPUT>;
478 close INPUT;
479 }
480
481 open OUTPUT, ">$OUTFILE" or die;
482
483 # FIXME: support attribute translations
484
485 # Empty nodes never need translation, so unmark all of them.
486 # For example, <_foo/> is just replaced by <foo/>.
487 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
488
489 # Support for <_foo>blah</_foo> style translations.
490 while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
491 print OUTPUT $1;
492
493 my $spaces = $2;
494 my $tag = $3;
495 my $string = $4;
496
497 print OUTPUT "$spaces<$tag>$string</$tag>\n";
498
499 $string =~ s/\s+/ /g;
500 $string =~ s/^ //;
501 $string =~ s/ $//;
502 $string = entity_decode($string);
503
504 for my $lang (sort keys %po_files_by_lang) {
505 my $translation = $translations{$lang, $string};
506 next if !$translation;
507 $translation = entity_encode($translation);
508 print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
509 }
510 }
511
512 print OUTPUT $source;
513
514 close OUTPUT;
515 }
516
517 sub keys_merge_translations
518 {
519 open INPUT, "<${FILE}" or die;
520 open OUTPUT, ">${OUTFILE}" or die;
521
522 while (<INPUT>) {
523 if (s/^(\s*)_(\w+=(.*))/$1$2/) {
524 my $string = $3;
525
526 print OUTPUT;
527
528 my $non_translated_line = $_;
529
530 for my $lang (sort keys %po_files_by_lang) {
531 my $translation = $translations{$lang, $string};
532 next if !$translation;
533
534 $_ = $non_translated_line;
535 s/(\w+)=.*/[$lang]$1=$translation/;
536 print OUTPUT;
537 }
538 } else {
539 print OUTPUT;
540 }
541 }
542
543 close OUTPUT;
544 close INPUT;
545 }
546
547 sub desktop_merge_translations
548 {
549 open INPUT, "<${FILE}" or die;
550 open OUTPUT, ">${OUTFILE}" or die;
551
552 while (<INPUT>) {
553 if (s/^(\s*)_(\w+=(.*))/$1$2/) {
554 my $string = $3;
555
556 print OUTPUT;
557
558 my $non_translated_line = $_;
559
560 for my $lang (sort keys %po_files_by_lang) {
561 my $translation = $translations{$lang, $string};
562 next if !$translation;
563
564 $_ = $non_translated_line;
565 s/(\w+)=.*/${1}[$lang]=$translation/;
566 print OUTPUT;
567 }
568 } else {
569 print OUTPUT;
570 }
571 }
572
573 close OUTPUT;
574 close INPUT;
575 }
576
577 sub schemas_merge_translations
578 {
579 my $source;
580
581 {
582 local $/; # slurp mode
583 open INPUT, "<$FILE" or die "can't open $FILE: $!";
584 $source = <INPUT>;
585 close INPUT;
586 }
587
588 open OUTPUT, ">$OUTFILE" or die;
589
590 # FIXME: support attribute translations
591
592 # Empty nodes never need translation, so unmark all of them.
593 # For example, <_foo/> is just replaced by <foo/>.
594 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
595
596 # Support for <_foo>blah</_foo> style translations.
597
598 my $regex_start = "^(.*?)([ \t]*)<locale name=\"C\">";
599 my $regex_short = "([ \t\n]*)<short>(.*?)</short>";
600 my $regex_long = "([ \t\n]*)<long>(.*?)</long>";
601 my $regex_end = "([ \t\n]*)</locale>";
602
603 while ($source =~ s|$regex_start$regex_short$regex_long$regex_end||s) {
604 print OUTPUT $1;
605
606 my $locale_start_spaces = $2;
607 my $locale_end_spaces = $7;
608 my $short_spaces = $3;
609 my $short_string = $4;
610 my $long_spaces = $5;
611 my $long_string = $6;
612
613 # English first
614
615 print OUTPUT "$locale_start_spaces<locale name=\"C\">";
616 print OUTPUT "$short_spaces<short>$short_string</short>";
617 print OUTPUT "$long_spaces<long>$long_string</long>";
618 print OUTPUT "$locale_end_spaces</locale>";
619
620 $short_string =~ s/\s+/ /g;
621 $short_string =~ s/^ //;
622 $short_string =~ s/ $//;
623 $short_string = entity_decode($short_string);
624
625 $long_string =~ s/\s+/ /g;
626 $long_string =~ s/^ //;
627 $long_string =~ s/ $//;
628 $long_string = entity_decode($long_string);
629
630 for my $lang (sort keys %po_files_by_lang) {
631 my $short_translation = $translations{$lang, $short_string};
632 my $long_translation = $translations{$lang, $long_string};
633
634 next if (!$short_translation && !$long_translation);
635
636 print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
637
638 if ($short_translation)
639 {
640 $short_translation = entity_encode($short_translation);
641 print OUTPUT "$short_spaces<short>$short_translation</short>";
642 }
643
644 if ($long_translation)
645 {
646 $long_translation = entity_encode($long_translation);
647 print OUTPUT "$long_spaces<long>$long_translation</long>";
648 }
649
650 print OUTPUT "$locale_end_spaces</locale>";
651 }
652 }
653
654 print OUTPUT $source;
655
656 close OUTPUT;
657 }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.30