Contents of /drakwizard/trunk/data/intltool-merge

#!/usr/bin/perl -w

#
#  The Intltool Message Merger
#
#  Copyright (C) 2000, 2002 Free Software Foundation.
#  Copyright (C) 2000, 2001 Eazel, Inc
#
#  Intltool is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License 
#  version 2 published by the Free Software Foundation.
#
#  Intltool is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  As a special exception to the GNU General Public License, if you
#  distribute this file as part of a program that contains a
#  configuration script generated by Autoconf, you may include it under
#  the same distribution terms that you use for the rest of that program.
#
#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
#            Kenneth Christiansen <kenneth@gnu.org>
#            Darin Adler <darin@bentspoon.com>
#
#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
#

## Release information
my $PROGRAM = "intltool-merge";
my $PACKAGE = "intltool";
my $VERSION = "0.22";

## Loaded modules
use strict; 
use Getopt::Long;

## Scalars used by the option stuff
my $HELP_ARG = 0;
my $VERSION_ARG = 0;
my $BA_STYLE_ARG = 0;
my $XML_STYLE_ARG = 0;
my $KEYS_STYLE_ARG = 0;
my $DESKTOP_STYLE_ARG = 0;
my $SCHEMAS_STYLE_ARG = 0;
my $QUIET_ARG = 0;
my $PASS_THROUGH_ARG = 0;
my $UTF8_ARG = 0;
my $cache_file;

## Handle options
GetOptions 
(
 "help" => \$HELP_ARG,
 "version" => \$VERSION_ARG,
 "quiet|q" => \$QUIET_ARG,
 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
 "ba-style|b" => \$BA_STYLE_ARG,
 "xml-style|x" => \$XML_STYLE_ARG,
 "keys-style|k" => \$KEYS_STYLE_ARG,
 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
 "pass-through|p" => \$PASS_THROUGH_ARG,
 "utf8|u" => \$UTF8_ARG,
 "cache|c=s" => \$cache_file
 ) or &error;

my $PO_DIR;
my $FILE;
my $OUTFILE;

my %po_files_by_lang = ();
my %translations = ();

# Use this instead of \w for XML files to handle more possible characters.
my $w = "[-A-Za-z0-9._:]";

# XML quoted string contents
my $q = "[^\\\"]*";

## Check for options. 

if ($VERSION_ARG) {
        &print_version;
} elsif ($HELP_ARG) {
        &print_help;
} elsif ($BA_STYLE_ARG && @ARGV > 2) {
        &preparation;
        &print_message;
        &ba_merge_translations;
        &finalize;
} elsif ($XML_STYLE_ARG && @ARGV > 2) {
        &utf8_sanity_check;
        &preparation;
        &print_message;
        &xml_merge_translations;
        &finalize;
} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
        &utf8_sanity_check;
        &preparation;
        &print_message;
        &keys_merge_translations;
        &finalize;
} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
        &preparation;
        &print_message;
        &desktop_merge_translations;
        &finalize;
} elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) {
        &preparation;
        &print_message;
        &schemas_merge_translations;
        &finalize;
} else {
        &print_help;
}

exit;

## Sub for printing release information
sub print_version
{
    print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
    print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
    print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
    print "Copyright (C) 2000-2001 Eazel, Inc.\n";
    print "This is free software; see the source for copying conditions.  There is NO\n";
    print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
    exit;
}

## Sub for printing usage information
sub print_help
{
    print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
    print "Generates an output file that includes translated versions of some attributes,\n";
    print "from an untranslated source and a po directory that includes translations.\n\n";
    print "  -b, --ba-style         includes translations in the bonobo-activation style\n";
    print "  -d, --desktop-style    includes translations in the desktop style\n";
    print "  -k, --keys-style       includes translations in the keys style\n";
    print "  -s, --schemas-style    includes translations in the schemas style\n";
    print "  -x, --xml-style        includes translations in the standard xml style\n";
    print "  -u, --utf8             convert all strings to UTF-8 before merging\n";
    print "  -p, --pass-through     use strings as found in .po files, without\n";
    print "                         conversion (STRONGLY unrecommended with -x)\n";
    print "  -q, --quiet            suppress most messages\n";
    print "      --help             display this help and exit\n";
    print "      --version          output version information and exit\n";
    print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
    print "<xml-i18n-tools-list\@gnome.org>.\n";
    exit;
}


## Sub for printing error messages
sub print_error
{
    print "Try `${PROGRAM} --help' for more information.\n";
    exit;
}


sub print_message 
{
    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
}


sub preparation 
{
    $PO_DIR = $ARGV[0];
    $FILE = $ARGV[1];
    $OUTFILE = $ARGV[2];

    &gather_po_files;
    &get_translation_database;
}

# General-purpose code for looking up translations in .po files

sub po_file2lang
{
    my ($tmp) = @_; 
    $tmp =~ s/^.*\/(.*)\.po$/$1/; 
    return $tmp; 
}

sub gather_po_files
{
    for my $po_file (glob "$PO_DIR/*.po") {
        $po_files_by_lang{po_file2lang($po_file)} = $po_file;
    }
}

sub get_po_encoding
{
    my ($in_po_file) = @_;
    my $encoding = "";

    open IN_PO_FILE, $in_po_file or die;
    while (<IN_PO_FILE>) {
        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
            $encoding = $1; 
            last;
        }
    }
    close IN_PO_FILE;

    if (!$encoding) {
        print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
        $encoding = "ISO-8859-1";
    }
    return $encoding
}

sub utf8_sanity_check 
{
    if (!$UTF8_ARG) {
        if (!$PASS_THROUGH_ARG) {
            $PASS_THROUGH_ARG="1";
        }
    }
}

sub get_translation_database
{
    if ($cache_file) {
        &get_cached_translation_database;
    } else {
        &create_translation_database;
    }
}

sub get_newest_po_age
{
    my $newest_age;

    foreach my $file (values %po_files_by_lang) {
        my $file_age = -M $file;
        $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
    }

    return $newest_age;
}

sub create_cache
{
    print "Generating and caching the translation database\n" unless $QUIET_ARG;

    &create_translation_database;

    open CACHE, ">$cache_file" || die;
    print CACHE join "\x01", %translations;
    close CACHE;
}

sub load_cache 
{
    print "Found cached translation database\n" unless $QUIET_ARG;

    my $contents;
    open CACHE, "<$cache_file" || die;
    {
        local $/;
        $contents = <CACHE>;
    }
    close CACHE;
    %translations = split "\x01", $contents;
}

sub get_cached_translation_database
{
    my $cache_file_age = -M $cache_file;
    if (defined $cache_file_age) {
        if ($cache_file_age <= &get_newest_po_age) {
            &load_cache;
            return;
        }
        print "Found too-old cached translation database\n" unless $QUIET_ARG;
    }

    &create_cache;
}

sub create_translation_database
{
    for my $lang (keys %po_files_by_lang) {
        my $po_file = $po_files_by_lang{$lang};

        if ($UTF8_ARG) {
            my $encoding = get_po_encoding ($po_file);
            if (lc $encoding eq "utf-8") {
                open PO_FILE, "<$po_file";      
            } else {
                my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|"; 
            }
        } else {
            open PO_FILE, "<$po_file";  
        }

        my $nextfuzzy = 0;
        my $inmsgid = 0;
        my $inmsgstr = 0;
        my $msgid = "";
        my $msgstr = "";
        while (<PO_FILE>) {
            $nextfuzzy = 1 if /^#, fuzzy/;
            if (/^msgid "((\\.|[^\\])*)"/ ) {
                $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
                $msgid = "";
                $msgstr = "";

                if ($nextfuzzy) {
                    $inmsgid = 0;
                } else {
                    $msgid = unescape_po_string($1);
                    $inmsgid = 1;
                }
                $inmsgstr = 0;
                $nextfuzzy = 0;
            }
            if (/^msgstr "((\\.|[^\\])*)"/) {
                $msgstr = unescape_po_string($1);
                $inmsgstr = 1;
                $inmsgid = 0;
            }
            if (/^"((\\.|[^\\])*)"/) {
                $msgid .= unescape_po_string($1) if $inmsgid;
                $msgstr .= unescape_po_string($1) if $inmsgstr;
            }
        }
        $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
    }
}

sub finalize
{
}

sub unescape_one_sequence
{
    my ($sequence) = @_;

    return "\\" if $sequence eq "\\\\";
    return "\"" if $sequence eq "\\\"";

    # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
    # \xXX (hex) and has a comment saying they want to handle \u and \U.

    return $sequence;
}

sub unescape_po_string
{
    my ($string) = @_;

    $string =~ s/(\\.)/unescape_one_sequence($1)/eg;

    return $string;
}

sub entity_decode
{
    local ($_) = @_;

    s/&apos;/'/g; # '
    s/&quot;/"/g; # "
    s/&amp;/&/g;

    return $_;
}

sub entity_encode
{
    my ($pre_encoded) = @_;

    my @list_of_chars = unpack ('C*', $pre_encoded);

    if ($PASS_THROUGH_ARG) {
        return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
    } else {
        return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
    }
}

sub entity_encode_int_minimalist
{
    return "&quot;" if $_ == 34;
    return "&amp;" if $_ == 38;
    return "&apos;" if $_ == 39;
    return chr $_;
}

sub entity_encode_int_even_high_bit
{
    if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39) {
        # the ($_ > 127) should probably be removed
        return "&#" . $_ . ";"; 
    } else {
        return chr $_;
    }
}

sub entity_encoded_translation
{
    my ($lang, $string) = @_;

    my $translation = $translations{$lang, $string};
    return $string if !$translation;
    return entity_encode ($translation);
}

## XML (bonobo-activation specific) merge code

sub ba_merge_translations
{
    my $source;

    {
       local $/; # slurp mode
       open INPUT, "<$FILE" or die "can't open $FILE: $!";
       $source = <INPUT>;
       close INPUT;
    }

    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";

    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) {
        print OUTPUT $1;

        my $node = $2 . "\n";

        my @strings = ();
        $_ = $node;
        while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
             push @strings, entity_decode($3);
        }
        print OUTPUT;

        my %langs;
        for my $string (@strings) {
            for my $lang (keys %po_files_by_lang) {
                $langs{$lang} = 1 if $translations{$lang, $string};
            }
        }
        
        for my $lang (sort keys %langs) {
            $_ = $node;
            s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
            s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
            print OUTPUT;
        }
    }

    print OUTPUT $source;

    close OUTPUT;
}


## XML (non-bonobo-activation) merge code

sub xml_merge_translations
{
    my $source;

    {
       local $/; # slurp mode
       open INPUT, "<$FILE" or die "can't open $FILE: $!";
       $source = <INPUT>;
       close INPUT;
    }

    open OUTPUT, ">$OUTFILE" or die;

    # FIXME: support attribute translations

    # Empty nodes never need translation, so unmark all of them.
    # For example, <_foo/> is just replaced by <foo/>.
    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;

    # Support for <_foo>blah</_foo> style translations.
    while ($source =~ s|^(.*?)([ \t]*)<\s*_($w+)\s*>(.*?)<\s*/_\3\s*>([ \t]*\n)?||s) {
        print OUTPUT $1;

        my $spaces = $2;
        my $tag = $3;
        my $string = $4;

        print OUTPUT "$spaces<$tag>$string</$tag>\n";

        $string =~ s/\s+/ /g;
        $string =~ s/^ //;
        $string =~ s/ $//;
        $string = entity_decode($string);

        for my $lang (sort keys %po_files_by_lang) {
            my $translation = $translations{$lang, $string};
            next if !$translation;
            $translation = entity_encode($translation);
            print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
        }
    }

    print OUTPUT $source;

    close OUTPUT;
}

sub keys_merge_translations
{
    open INPUT, "<${FILE}" or die;
    open OUTPUT, ">${OUTFILE}" or die;

    while (<INPUT>) {
        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
            my $string = $3;

            print OUTPUT;

            my $non_translated_line = $_;

            for my $lang (sort keys %po_files_by_lang) {
                my $translation = $translations{$lang, $string};
                next if !$translation;

                $_ = $non_translated_line;
                s/(\w+)=.*/[$lang]$1=$translation/;
                print OUTPUT;
            }
        } else {
            print OUTPUT;
        }
    }

    close OUTPUT;
    close INPUT;
}

sub desktop_merge_translations
{
    open INPUT, "<${FILE}" or die;
    open OUTPUT, ">${OUTFILE}" or die;

    while (<INPUT>) {
        if (s/^(\s*)_(\w+=(.*))/$1$2/)  {
            my $string = $3;

            print OUTPUT;

            my $non_translated_line = $_;

            for my $lang (sort keys %po_files_by_lang) {
                my $translation = $translations{$lang, $string};
                next if !$translation;

                $_ = $non_translated_line;
                s/(\w+)=.*/${1}[$lang]=$translation/;
                print OUTPUT;
            }
        } else {
            print OUTPUT;
        }
    }

    close OUTPUT;
    close INPUT;
}

sub schemas_merge_translations
{
    my $source;

    {
       local $/; # slurp mode
       open INPUT, "<$FILE" or die "can't open $FILE: $!";
       $source = <INPUT>;
       close INPUT;
    }

    open OUTPUT, ">$OUTFILE" or die;

    # FIXME: support attribute translations

    # Empty nodes never need translation, so unmark all of them.
    # For example, <_foo/> is just replaced by <foo/>.
    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;

    # Support for <_foo>blah</_foo> style translations.

    my $regex_start = "^(.*?)([ \t]*)<locale name=\"C\">";
    my $regex_short = "([ \t\n]*)<short>(.*?)</short>";
    my $regex_long  = "([ \t\n]*)<long>(.*?)</long>";
    my $regex_end   = "([ \t\n]*)</locale>";

    while ($source =~ s|$regex_start$regex_short$regex_long$regex_end||s) {
        print OUTPUT $1;

        my $locale_start_spaces = $2;
        my $locale_end_spaces = $7;
        my $short_spaces = $3;
        my $short_string = $4;
        my $long_spaces = $5;
        my $long_string = $6;

        # English first

        print OUTPUT "$locale_start_spaces<locale name=\"C\">";
        print OUTPUT "$short_spaces<short>$short_string</short>";
        print OUTPUT "$long_spaces<long>$long_string</long>";
        print OUTPUT "$locale_end_spaces</locale>";

        $short_string =~ s/\s+/ /g;
        $short_string =~ s/^ //;
        $short_string =~ s/ $//;
        $short_string = entity_decode($short_string);

        $long_string =~ s/\s+/ /g;
        $long_string =~ s/^ //;
        $long_string =~ s/ $//;
        $long_string = entity_decode($long_string);

        for my $lang (sort keys %po_files_by_lang) {
            my $short_translation = $translations{$lang, $short_string};
            my $long_translation  = $translations{$lang, $long_string};

            next if (!$short_translation && !$long_translation);

            print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";

            if ($short_translation)
            {
                $short_translation = entity_encode($short_translation);
                print OUTPUT "$short_spaces<short>$short_translation</short>";
            }

            if ($long_translation)
            {
                $long_translation = entity_encode($long_translation);
                print OUTPUT "$long_spaces<long>$long_translation</long>";
            }       

            print OUTPUT "$locale_end_spaces</locale>";
        }
    }

    print OUTPUT $source;

    close OUTPUT;
}
1	#!/usr/bin/perl -w
2
3	#
4	# The Intltool Message Merger
5	#
6	# Copyright (C) 2000, 2002 Free Software Foundation.
7	# Copyright (C) 2000, 2001 Eazel, Inc
8	#
9	# Intltool is free software; you can redistribute it and/or
10	# modify it under the terms of the GNU General Public License
11	# version 2 published by the Free Software Foundation.
12	#
13	# Intltool is distributed in the hope that it will be useful,
14	# but WITHOUT ANY WARRANTY; without even the implied warranty of
15	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16	# General Public License for more details.
17	#
18	# You should have received a copy of the GNU General Public License
19	# along with this program; if not, write to the Free Software
20	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21	#
22	# As a special exception to the GNU General Public License, if you
23	# distribute this file as part of a program that contains a
24	# configuration script generated by Autoconf, you may include it under
25	# the same distribution terms that you use for the rest of that program.
26	#
27	# Authors: Maciej Stachowiak <mjs@noisehavoc.org>
28	# Kenneth Christiansen <kenneth@gnu.org>
29	# Darin Adler <darin@bentspoon.com>
30	#
31	# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
32	#
33
34	## Release information
35	my $PROGRAM = "intltool-merge";
36	my $PACKAGE = "intltool";
37	my $VERSION = "0.22";
38
39	## Loaded modules
40	use strict;
41	use Getopt::Long;
42
43	## Scalars used by the option stuff
44	my $HELP_ARG = 0;
45	my $VERSION_ARG = 0;
46	my $BA_STYLE_ARG = 0;
47	my $XML_STYLE_ARG = 0;
48	my $KEYS_STYLE_ARG = 0;
49	my $DESKTOP_STYLE_ARG = 0;
50	my $SCHEMAS_STYLE_ARG = 0;
51	my $QUIET_ARG = 0;
52	my $PASS_THROUGH_ARG = 0;
53	my $UTF8_ARG = 0;
54	my $cache_file;
55
56	## Handle options
57	GetOptions
58	(
59	"help" => \$HELP_ARG,
60	"version" => \$VERSION_ARG,
61	"quiet\|q" => \$QUIET_ARG,
62	"oaf-style\|o" => \$BA_STYLE_ARG, ## for compatibility
63	"ba-style\|b" => \$BA_STYLE_ARG,
64	"xml-style\|x" => \$XML_STYLE_ARG,
65	"keys-style\|k" => \$KEYS_STYLE_ARG,
66	"desktop-style\|d" => \$DESKTOP_STYLE_ARG,
67	"schemas-style\|s" => \$SCHEMAS_STYLE_ARG,
68	"pass-through\|p" => \$PASS_THROUGH_ARG,
69	"utf8\|u" => \$UTF8_ARG,
70	"cache\|c=s" => \$cache_file
71	) or &error;
72
73	my $PO_DIR;
74	my $FILE;
75	my $OUTFILE;
76
77	my %po_files_by_lang = ();
78	my %translations = ();
79
80	# Use this instead of \w for XML files to handle more possible characters.
81	my $w = "[-A-Za-z0-9._:]";
82
83	# XML quoted string contents
84	my $q = "[^\\\"]*";
85
86	## Check for options.
87
88	if ($VERSION_ARG) {
89	&print_version;
90	} elsif ($HELP_ARG) {
91	&print_help;
92	} elsif ($BA_STYLE_ARG && @ARGV > 2) {
93	&preparation;
94	&print_message;
95	&ba_merge_translations;
96	&finalize;
97	} elsif ($XML_STYLE_ARG && @ARGV > 2) {
98	&utf8_sanity_check;
99	&preparation;
100	&print_message;
101	&xml_merge_translations;
102	&finalize;
103	} elsif ($KEYS_STYLE_ARG && @ARGV > 2) {
104	&utf8_sanity_check;
105	&preparation;
106	&print_message;
107	&keys_merge_translations;
108	&finalize;
109	} elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) {
110	&preparation;
111	&print_message;
112	&desktop_merge_translations;
113	&finalize;
114	} elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) {
115	&preparation;
116	&print_message;
117	&schemas_merge_translations;
118	&finalize;
119	} else {
120	&print_help;
121	}
122
123	exit;
124
125	## Sub for printing release information
126	sub print_version
127	{
128	print "${PROGRAM} (${PACKAGE}) ${VERSION}\n";
129	print "Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.\n\n";
130	print "Copyright (C) 2000-2002 Free Software Foundation, Inc.\n";
131	print "Copyright (C) 2000-2001 Eazel, Inc.\n";
132	print "This is free software; see the source for copying conditions. There is NO\n";
133	print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
134	exit;
135	}
136
137	## Sub for printing usage information
138	sub print_help
139	{
140	print "Usage: ${PROGRAM} [OPTIONS] PO_DIRECTORY FILENAME OUTPUT_FILE\n";
141	print "Generates an output file that includes translated versions of some attributes,\n";
142	print "from an untranslated source and a po directory that includes translations.\n\n";
143	print " -b, --ba-style includes translations in the bonobo-activation style\n";
144	print " -d, --desktop-style includes translations in the desktop style\n";
145	print " -k, --keys-style includes translations in the keys style\n";
146	print " -s, --schemas-style includes translations in the schemas style\n";
147	print " -x, --xml-style includes translations in the standard xml style\n";
148	print " -u, --utf8 convert all strings to UTF-8 before merging\n";
149	print " -p, --pass-through use strings as found in .po files, without\n";
150	print " conversion (STRONGLY unrecommended with -x)\n";
151	print " -q, --quiet suppress most messages\n";
152	print " --help display this help and exit\n";
153	print " --version output version information and exit\n";
154	print "\nReport bugs to bugzilla.gnome.org, module intltool, or contact us through \n";
155	print "<xml-i18n-tools-list\@gnome.org>.\n";
156	exit;
157	}
158
159
160	## Sub for printing error messages
161	sub print_error
162	{
163	print "Try `${PROGRAM} --help' for more information.\n";
164	exit;
165	}
166
167
168	sub print_message
169	{
170	print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
171	}
172
173
174	sub preparation
175	{
176	$PO_DIR = $ARGV[0];
177	$FILE = $ARGV[1];
178	$OUTFILE = $ARGV[2];
179
180	&gather_po_files;
181	&get_translation_database;
182	}
183
184	# General-purpose code for looking up translations in .po files
185
186	sub po_file2lang
187	{
188	my ($tmp) = @_;
189	$tmp =~ s/^.\/(.)\.po$/$1/;
190	return $tmp;
191	}
192
193	sub gather_po_files
194	{
195	for my $po_file (glob "$PO_DIR/*.po") {
196	$po_files_by_lang{po_file2lang($po_file)} = $po_file;
197	}
198	}
199
200	sub get_po_encoding
201	{
202	my ($in_po_file) = @_;
203	my $encoding = "";
204
205	open IN_PO_FILE, $in_po_file or die;
206	while (<IN_PO_FILE>) {
207	## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
208	if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) {
209	$encoding = $1;
210	last;
211	}
212	}
213	close IN_PO_FILE;
214
215	if (!$encoding) {
216	print "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n";
217	$encoding = "ISO-8859-1";
218	}
219	return $encoding
220	}
221
222	sub utf8_sanity_check
223	{
224	if (!$UTF8_ARG) {
225	if (!$PASS_THROUGH_ARG) {
226	$PASS_THROUGH_ARG="1";
227	}
228	}
229	}
230
231	sub get_translation_database
232	{
233	if ($cache_file) {
234	&get_cached_translation_database;
235	} else {
236	&create_translation_database;
237	}
238	}
239
240	sub get_newest_po_age
241	{
242	my $newest_age;
243
244	foreach my $file (values %po_files_by_lang) {
245	my $file_age = -M $file;
246	$newest_age = $file_age if !$newest_age \|\| $file_age < $newest_age;
247	}
248
249	return $newest_age;
250	}
251
252	sub create_cache
253	{
254	print "Generating and caching the translation database\n" unless $QUIET_ARG;
255
256	&create_translation_database;
257
258	open CACHE, ">$cache_file" \|\| die;
259	print CACHE join "\x01", %translations;
260	close CACHE;
261	}
262
263	sub load_cache
264	{
265	print "Found cached translation database\n" unless $QUIET_ARG;
266
267	my $contents;
268	open CACHE, "<$cache_file" \|\| die;
269	{
270	local $/;
271	$contents = <CACHE>;
272	}
273	close CACHE;
274	%translations = split "\x01", $contents;
275	}
276
277	sub get_cached_translation_database
278	{
279	my $cache_file_age = -M $cache_file;
280	if (defined $cache_file_age) {
281	if ($cache_file_age <= &get_newest_po_age) {
282	&load_cache;
283	return;
284	}
285	print "Found too-old cached translation database\n" unless $QUIET_ARG;
286	}
287
288	&create_cache;
289	}
290
291	sub create_translation_database
292	{
293	for my $lang (keys %po_files_by_lang) {
294	my $po_file = $po_files_by_lang{$lang};
295
296	if ($UTF8_ARG) {
297	my $encoding = get_po_encoding ($po_file);
298	if (lc $encoding eq "utf-8") {
299	open PO_FILE, "<$po_file";
300	} else {
301	my $iconv = $ENV{"INTLTOOL_ICONV"} \|\| "iconv";
302	open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file\|";
303	}
304	} else {
305	open PO_FILE, "<$po_file";
306	}
307
308	my $nextfuzzy = 0;
309	my $inmsgid = 0;
310	my $inmsgstr = 0;
311	my $msgid = "";
312	my $msgstr = "";
313	while (<PO_FILE>) {
314	$nextfuzzy = 1 if /^#, fuzzy/;
315	if (/^msgid "((\\.\|[^\\])*)"/ ) {
316	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
317	$msgid = "";
318	$msgstr = "";
319
320	if ($nextfuzzy) {
321	$inmsgid = 0;
322	} else {
323	$msgid = unescape_po_string($1);
324	$inmsgid = 1;
325	}
326	$inmsgstr = 0;
327	$nextfuzzy = 0;
328	}
329	if (/^msgstr "((\\.\|[^\\])*)"/) {
330	$msgstr = unescape_po_string($1);
331	$inmsgstr = 1;
332	$inmsgid = 0;
333	}
334	if (/^"((\\.\|[^\\])*)"/) {
335	$msgid .= unescape_po_string($1) if $inmsgid;
336	$msgstr .= unescape_po_string($1) if $inmsgstr;
337	}
338	}
339	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
340	}
341	}
342
343	sub finalize
344	{
345	}
346
347	sub unescape_one_sequence
348	{
349	my ($sequence) = @_;
350
351	return "\\" if $sequence eq "\\\\";
352	return "\"" if $sequence eq "\\\"";
353
354	# gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
355	# \xXX (hex) and has a comment saying they want to handle \u and \U.
356
357	return $sequence;
358	}
359
360	sub unescape_po_string
361	{
362	my ($string) = @_;
363
364	$string =~ s/(\\.)/unescape_one_sequence($1)/eg;
365
366	return $string;
367	}
368
369	sub entity_decode
370	{
371	local ($_) = @_;
372
373	s/'/'/g; # '
374	s/"/"/g; # "
375	s/&/&/g;
376
377	return $_;
378	}
379
380	sub entity_encode
381	{
382	my ($pre_encoded) = @_;
383
384	my @list_of_chars = unpack ('C*', $pre_encoded);
385
386	if ($PASS_THROUGH_ARG) {
387	return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
388	} else {
389	return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
390	}
391	}
392
393	sub entity_encode_int_minimalist
394	{
395	return """ if $_ == 34;
396	return "&" if $_ == 38;
397	return "'" if $_ == 39;
398	return chr $_;
399	}
400
401	sub entity_encode_int_even_high_bit
402	{
403	if ($_ > 127 \|\| $_ == 34 \|\| $_ == 38 \|\| $_ == 39) {
404	# the ($_ > 127) should probably be removed
405	return "&#" . $_ . ";";
406	} else {
407	return chr $_;
408	}
409	}
410
411	sub entity_encoded_translation
412	{
413	my ($lang, $string) = @_;
414
415	my $translation = $translations{$lang, $string};
416	return $string if !$translation;
417	return entity_encode ($translation);
418	}
419
420	## XML (bonobo-activation specific) merge code
421
422	sub ba_merge_translations
423	{
424	my $source;
425
426	{
427	local $/; # slurp mode
428	open INPUT, "<$FILE" or die "can't open $FILE: $!";
429	$source = <INPUT>;
430	close INPUT;
431	}
432
433	open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
434
435	while ($source =~ s\|^(.?)([ \t]<\s$w+\s+($w+\s=\s"$q"\s)+/?>)([ \t]*\n)?\|\|s) {
436	print OUTPUT $1;
437
438	my $node = $2 . "\n";
439
440	my @strings = ();
441	$_ = $node;
442	while (s/(\s)_($w+\s=\s"($q)")/$1$2/s) {
443	push @strings, entity_decode($3);
444	}
445	print OUTPUT;
446
447	my %langs;
448	for my $string (@strings) {
449	for my $lang (keys %po_files_by_lang) {
450	$langs{$lang} = 1 if $translations{$lang, $string};
451	}
452	}
453
454	for my $lang (sort keys %langs) {
455	$_ = $node;
456	s/(\sname\s=\s)"($q)"/$1"$2-$lang"/s;
457	s/(\s)_($w+\s=\s")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
458	print OUTPUT;
459	}
460	}
461
462	print OUTPUT $source;
463
464	close OUTPUT;
465	}
466
467
468	## XML (non-bonobo-activation) merge code
469
470	sub xml_merge_translations
471	{
472	my $source;
473
474	{
475	local $/; # slurp mode
476	open INPUT, "<$FILE" or die "can't open $FILE: $!";
477	$source = <INPUT>;
478	close INPUT;
479	}
480
481	open OUTPUT, ">$OUTFILE" or die;
482
483	# FIXME: support attribute translations
484
485	# Empty nodes never need translation, so unmark all of them.
486	# For example, <_foo/> is just replaced by <foo/>.
487	$source =~ s\|<\s_($w+)\s/>\|<$1/>\|g;
488
489	# Support for <_foo>blah</_foo> style translations.
490	while ($source =~ s\|^(.?)([ \t])<\s_($w+)\s>(.?)<\s/_\3\s>([ \t]\n)?\|\|s) {
491	print OUTPUT $1;
492
493	my $spaces = $2;
494	my $tag = $3;
495	my $string = $4;
496
497	print OUTPUT "$spaces<$tag>$string</$tag>\n";
498
499	$string =~ s/\s+/ /g;
500	$string =~ s/^ //;
501	$string =~ s/ $//;
502	$string = entity_decode($string);
503
504	for my $lang (sort keys %po_files_by_lang) {
505	my $translation = $translations{$lang, $string};
506	next if !$translation;
507	$translation = entity_encode($translation);
508	print OUTPUT "$spaces<$tag xml:lang=\"$lang\">$translation</$tag>\n";
509	}
510	}
511
512	print OUTPUT $source;
513
514	close OUTPUT;
515	}
516
517	sub keys_merge_translations
518	{
519	open INPUT, "<${FILE}" or die;
520	open OUTPUT, ">${OUTFILE}" or die;
521
522	while (<INPUT>) {
523	if (s/^(\s)_(\w+=(.))/$1$2/) {
524	my $string = $3;
525
526	print OUTPUT;
527
528	my $non_translated_line = $_;
529
530	for my $lang (sort keys %po_files_by_lang) {
531	my $translation = $translations{$lang, $string};
532	next if !$translation;
533
534	$_ = $non_translated_line;
535	s/(\w+)=.*/[$lang]$1=$translation/;
536	print OUTPUT;
537	}
538	} else {
539	print OUTPUT;
540	}
541	}
542
543	close OUTPUT;
544	close INPUT;
545	}
546
547	sub desktop_merge_translations
548	{
549	open INPUT, "<${FILE}" or die;
550	open OUTPUT, ">${OUTFILE}" or die;
551
552	while (<INPUT>) {
553	if (s/^(\s)_(\w+=(.))/$1$2/) {
554	my $string = $3;
555
556	print OUTPUT;
557
558	my $non_translated_line = $_;
559
560	for my $lang (sort keys %po_files_by_lang) {
561	my $translation = $translations{$lang, $string};
562	next if !$translation;
563
564	$_ = $non_translated_line;
565	s/(\w+)=.*/${1}[$lang]=$translation/;
566	print OUTPUT;
567	}
568	} else {
569	print OUTPUT;
570	}
571	}
572
573	close OUTPUT;
574	close INPUT;
575	}
576
577	sub schemas_merge_translations
578	{
579	my $source;
580
581	{
582	local $/; # slurp mode
583	open INPUT, "<$FILE" or die "can't open $FILE: $!";
584	$source = <INPUT>;
585	close INPUT;
586	}
587
588	open OUTPUT, ">$OUTFILE" or die;
589
590	# FIXME: support attribute translations
591
592	# Empty nodes never need translation, so unmark all of them.
593	# For example, <_foo/> is just replaced by <foo/>.
594	$source =~ s\|<\s_($w+)\s/>\|<$1/>\|g;
595
596	# Support for <_foo>blah</_foo> style translations.
597
598	my $regex_start = "^(.?)([ \t])<locale name=\"C\">";
599	my $regex_short = "([ \t\n])<short>(.?)</short>";
600	my $regex_long = "([ \t\n])<long>(.?)</long>";
601	my $regex_end = "([ \t\n]*)</locale>";
602
603	while ($source =~ s\|$regex_start$regex_short$regex_long$regex_end\|\|s) {
604	print OUTPUT $1;
605
606	my $locale_start_spaces = $2;
607	my $locale_end_spaces = $7;
608	my $short_spaces = $3;
609	my $short_string = $4;
610	my $long_spaces = $5;
611	my $long_string = $6;
612
613	# English first
614
615	print OUTPUT "$locale_start_spaces<locale name=\"C\">";
616	print OUTPUT "$short_spaces<short>$short_string</short>";
617	print OUTPUT "$long_spaces<long>$long_string</long>";
618	print OUTPUT "$locale_end_spaces</locale>";
619
620	$short_string =~ s/\s+/ /g;
621	$short_string =~ s/^ //;
622	$short_string =~ s/ $//;
623	$short_string = entity_decode($short_string);
624
625	$long_string =~ s/\s+/ /g;
626	$long_string =~ s/^ //;
627	$long_string =~ s/ $//;
628	$long_string = entity_decode($long_string);
629
630	for my $lang (sort keys %po_files_by_lang) {
631	my $short_translation = $translations{$lang, $short_string};
632	my $long_translation = $translations{$lang, $long_string};
633
634	next if (!$short_translation && !$long_translation);
635
636	print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
637
638	if ($short_translation)
639	{
640	$short_translation = entity_encode($short_translation);
641	print OUTPUT "$short_spaces<short>$short_translation</short>";
642	}
643
644	if ($long_translation)
645	{
646	$long_translation = entity_encode($long_translation);
647	print OUTPUT "$long_spaces<long>$long_translation</long>";
648	}
649
650	print OUTPUT "$locale_end_spaces</locale>";
651	}
652	}
653
654	print OUTPUT $source;
655
656	close OUTPUT;
657	}