#!/usr/bin/perl # latin_diacr patterns: # 'LATIN (SMALL|CAPITAL) (LETTER|LIGATURE) ((LONG|OPEN|DOTLESS) )?([^ ;]+)( DIGRAPH)?( WITH ([^ ;]+)( AND ([ ;]+))?)?' # 'LATIN $1 $2 $3$5$6 WITH $8' # 'LATIN (SMALL|CAPITAL) (LETTER|LIGATURE) ((LONG|OPEN|DOTLESS) )?([^ ;]+)( DIGRAPH)?( WITH [^ ;]+)?' # 'LATIN $1 $2 $3$5$6' # 'LATIN (SMALL|CAPITAL) (LETTER|LIGATURE) ((LONG|OPEN|DOTLESS) )?[^ ;]+( DIGRAPH)?' # 'LATIN $1 $2 $5$6' # 'LATIN (SMALL|CAPITAL) (LETTER|LIGATURE) [^ ;]+( DIGRAPH)?' # latin_diacr_HOPEFULL_PATTERN='LATIN' #($FULL_PATTERN, $REDUCTED_PATTERN, $TRANSLATION) = @ARGV; #printf (" => `%s'\n =>`%s'\n =>`%s'\n", $FULL_PATTERN, $REDUCTED_PATTERN, $TRANSLATION); # # Parse command-line: fill @PATTERNS and @TRANSLATION alternatively # # @PATTERNS=(); # @TRANSLATIONS=(); # $arg_parity = 0; # foreach $arg (@ARGV) { # if ($arg_parity == 1) { # push @TRANSLATIONS, $arg; # } else { # push @PATTERNS, $arg; # } # $arg_parity = not $arg_parity; # } $PATTERNS=$ARGV[0]; shift @ARGV; @TRANSLATIONS = @ARGV; use Data::Dumper; $Data::Dumper::Indent = 3; $Data::Dumper::Terse = 1; #printf ("\@PATTERNS = %s\@TRANSLATIONS = %s\n", Dumper(\@PATTERNS), Dumper(\@TRANSLATIONS)); printf ("\$PATTERNS = %s\@TRANSLATIONS = %s\n", Dumper($PATTERNS), Dumper(\@TRANSLATIONS)); exit 0; # # Parse each input line # UNICODE: while () { # ($code, $label, $categ, $comClass, # $bidiClass, $decompos, $decDigit, $digit, # $numeric, $mirror, $oldName, $comment, # $upper, $lower, $title) = split (/;/); ($code, $label, $categ, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) = split (/;/); # skip control chars next UNICODE if (index ($categ, "C") == 0); for ($i = 0; $i < $#PATTERNS; $i++) { if ($label =~ m/^$PATTERNS[$i]$/) { printf ("# `%s'", $label); for ($j = $i; $j <= $#TRANSLATIONS; $j++) { $transl = $label; $transl =~ s/^$PATTERNS[$i]$/eval"\"$TRANSLATIONS[$j]\""/e; printf (" : `%s'", $transl); } print "\n"; next UNICODE; # don't try to match with a smaller pattern } else { printf ("# *SKIPPED* `%s'\n", $label); } } }