#!/bin/bash # -*- sh -*- # Takes on stdin a file formated like the official UnicodeData list; # Writes on stdout a fallback table suitable for `consolechars -k' # Note: we assume all diacrs match a non-diacr ! if [ $# != 3 ] then echo >&2 "Usage: $(basename $0) FULL_PATTERN REDUCTED_PATTERN TRANSLATION" echo >&2 " < UnicodeData-2.txt >out.fallback" exit 1 fi FULL_GREP_PATTERN=$1 REDUCTED_PATTERN=$2 TRANSLATION=$3 FULL_SED_PATTERN=$(echo ${FULL_GREP_PATTERN} | sed -e 's/?/\\{0,1\\}/g' -e 's/\([()|]\)/\\\1/g') FILE=/tmp/file.$$ cut -d\; -f1,2 >$FILE cat << EOF # # Fallback file from chars matching: # '${FULL_GREP_PATTERN}' # to chars matching: # '${REDUCTED_PATTERN}' # using as a translation rule: # '${TRANSLATION}' # # Auto-generated by $(basename $0) from UnicodeData list # # ** DO NOT EDIT MANUALLY ** # EOF cat < $FILE | # select only targeted lines egrep -f <(echo ";${FULL_GREP_PATTERN}$") | # filter out those we want to reduct to egrep -v -f <(echo ";${REDUCTED_PATTERN}$") | # translate into wished reduction, and keep orig name as 3rd field sed -f <(echo "s/;${FULL_SED_PATTERN}$/;${TRANSLATION}\0/") | # translate into wished unicode sed -f <( # construct a sed filter that adds their unicodes to reduction names egrep < $FILE ";${REDUCTED_PATTERN}$" | awk -F\; '{print "s/;" $2 ";/;U+" $1 "\\0/"}' ) | # filter out lines that did not give a reducted pattern grep ';U+' | # format lines in fallback-file syntax awk -F\; '{print "# " $4 " : " $3 "\nU+" $1 " " $2}' rm $FILE