apertium · ahmedsiam0 · Jun 24, 2023 · Jul 3, 2023 · Jul 10, 2023 · Jul 11, 2023
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -17,7 +17,7 @@ jobs:
       run: ./configure --disable-static --enable-python-bindings
     - name: build
       run: make -j4 V=1 VERBOSE=1
-    - name: tests
-      run: make test
     - name: make install
       run: sudo make install
+    - name: tests
+      run: make test
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 *.pyc
 **/*.deps/
 /build/
+/.vscode/
 /.ccls-cache/
 /*.pc
 /aclocal.m4
@@ -79,13 +80,18 @@
 /lttoolbox/lt-invert
 /lttoolbox/lt-restrict
 /lttoolbox/lt-apply-acx
+/lttoolbox/formatmsg
 /python/Makefile
 /python/Makefile.in
 /python/lttoolbox.i
 /python/lttoolbox_wrap.cpp
 /python/lttoolbox.py
 /python/setup.py
 /python/build*
+/locales/Makefile
+/locales/Makefile.in
+*.res
+*.dat
 *.egg-info/
 *.egg
 **/.mypy_cache/

diff --git a/Makefile.am b/Makefile.am
@@ -1,6 +1,6 @@
 ACLOCAL_AMFLAGS=-I m4
 
-SUBDIRS = $(PACKAGE_NAME)
+SUBDIRS = $(PACKAGE_NAME) locales
 DIST_SUBDIRS = $(PACKAGE_NAME)
 
 if HAVE_PYTHON_BINDINGS
@@ -15,3 +15,6 @@ EXTRA_DIST=autogen.sh tests
 # TODO: the below will use python3 if you run it on Arch Linux with no python2 installed
 test: tests/run_tests.py
 	$(PYTHON) $<
+
+export LOCALES_DIR=$(datadir)/$(PACKAGE_NAME)
+export ALT_I18N_DATA=$(LOCALES_DIR)/lttoolbox.dat
diff --git a/configure.ac b/configure.ac
@@ -105,4 +105,4 @@ then
   PYTHON_INSTALL_PARAMS="--prefix=\$(prefix) --root=\$(DESTDIR)/"
 fi
 
-AC_OUTPUT([Makefile lttoolbox.pc lttoolbox/Makefile python/Makefile])
+AC_OUTPUT([Makefile lttoolbox.pc lttoolbox/Makefile python/Makefile locales/Makefile])
diff --git a/locales/Makefile.am b/locales/Makefile.am
@@ -0,0 +1,7 @@
+lttoolbox.dat: root.txt en.txt es.txt
+	genrb -d . root.txt en.txt es.txt
+	echo root.res en.res es.res > package_list.txt
+	pkgdata -p lttoolbox --mode archive -d . package_list.txt
+
+localesdir = $(LOCALES_DIR)
+dist_locales_DATA = lttoolbox.dat
diff --git a/locales/en.txt b/locales/en.txt
@@ -0,0 +1,2 @@
+en{
+}
diff --git a/locales/es.txt b/locales/es.txt
@@ -0,0 +1,31 @@
+es{
+    lt_proc_desc{"procesar una corriente con un transductor de letras"}
+    analysis_desc{"análisis morfológico (comportamiento predeterminado)"}
+    bilingual_desc{"transferencia léxica"}
+    case_sensitive_desc{"utilizar el caso literal de los caracteres entrantes"}
+    debugged_gen_desc{"transformarse. generación con todas las cosas"}
+    decompose_nouns_desc{"Intenta descomponer palabras desconocidas."}
+    generation_desc{"generación morfológica"}
+    ignored_chars_desc{"especificar archivo con caracteres para ignorar"}
+    restore_chars_desc{"especificar archivo con caracteres para restauración diacrítica"}
+    tagged_gen_desc{"generación morfológica manteniendo formas léxicas"}
+    tagged_nm_gen_desc{"igual que -l pero sin marcas denominativas desconocidas"}
+    non_marked_gen_desc{"transformarse. generación sin marcas denominativas desconocidas"}
+    surf_bilingual_desc{"transferencia léxica con formas superficiales"}
+    post_generation_desc{"posgeneración"}
+    inter_generation_desc{"intergeneracional"}
+    sao_desc{"Procesamiento de entrada del sistema de anotaciones SAO"}
+    transliteration_desc{"aplicar diccionario de transliteración"}
+    version_desc{"versión"}
+    null_flush_desc{"salida de vaciado en el carácter nulo"}
+    dictionary_case_desc{"usar mayúsculas y minúsculas del diccionario en lugar de superficie"}
+    careful_case_desc{"usar mayúsculas y minúsculas del diccionario si están presentes; de lo contrario, superficie"}
+    no_default_ignore_desc{"omite la carga de los caracteres ignorados predeterminados"}
+    show_weights_desc{"Imprimir los pesos del análisis final (si corresponde)"}
+    analyses_desc{"No generar más de N análisis (si el transductor está ponderado, los N mejores análisis)"}
+    weight_classes_desc{"No generar más de N mejores clases de ponderación (donde los análisis con igual ponderación constituyen una clase)"}
+    compound_max_elements_desc{"Establecer elementos máximos compuestos"}
+    help_desc{"muestra esta ayuda"}
+    usage{"USO: "}
+    version{" versión "}
+}
diff --git a/locales/package_list.txt b/locales/package_list.txt
@@ -0,0 +1 @@
+root.res en.res es.res
diff --git a/locales/root.txt b/locales/root.txt
@@ -0,0 +1,158 @@
+root{
+    lt_proc_desc{"process a stream with a letter transducer"}
+    analysis_desc{"morphological analysis (default behavior)"}
+    bilingual_desc{"lexical transfer"}
+    case_sensitive_desc{"use the literal case of the incoming characters"}
+    debugged_gen_desc{"morph. generation with all the stuff"}
+    decompose_nouns_desc{"Try to decompound unknown words"}
+    generation_desc{"morphological generation"}
+    ignored_chars_desc{"specify file with characters to ignore"}
+    restore_chars_desc{"specify file with characters to diacritic restoration"}
+    tagged_gen_desc{"morphological generation keeping lexical forms"}
+    tagged_nm_gen_desc{"same as -l but without unknown word marks"}
+    non_marked_gen_desc{"morph. generation without unknown word marks"}
+    surf_bilingual_desc{"lexical transfer with surface forms"}
+    post_generation_desc{"post-generation"}
+    inter_generation_desc{"inter-generation"}
+    sao_desc{"SAO annotation system input processing"}
+    transliteration_desc{"apply transliteration dictionary"}
+    version_desc{"version"}
+    null_flush_desc{"flush output on the null character"}
+    dictionary_case_desc{"use dictionary case instead of surface"}
+    careful_case_desc{"use dictionary case if present, else surface"}
+    no_default_ignore_desc{"skips loading the default ignore characters"}
+    show_weights_desc{"Print final analysis weights (if any)"}
+    analyses_desc{"Output no more than N analyses (if the transducer is weighted, the N best analyses)"}
+    weight_classes_desc{"Output no more than N best weight classes (where analyses with equal weight constitute a class)"}
+    compound_max_elements_desc{"Set compound max elements"}
+    help_desc{"show this help"}
+    usage{"USAGE: "}
+    version{" version "}
+    modes{"Modes:"}
+    options{"Options:"}
+
+
+    lt_append_desc{"add sections to a compiled transducer"}
+    keep_desc{"in case of section name conflicts, keep the one from the first transducer"}
+    single_desc{"treat input transducers as one-sided"}
+
+    lt_apply_acx_desc{"apply an ACX file to a compiled transducer"}
+
+    lt_comp_desc{"build a letter transducer from a dictionary"}
+    debug_desc{"insert line numbers before each entry"}
+    keep_boundaries_desc{"keep morpheme boundaries"}
+    var_desc{"set language variant"}
+    alt_desc{"set alternative (monodix)"}
+    var_left_desc{"set left language variant (bidix)"}
+    var_right_desc{"set right language variant (bidix)"}
+    expect_hfst_desc{"expect HFST symbols"}
+    no_split_desc{"don't attempt to split into word and punctuation sections"}
+    jobs_desc{"use one cpu core per section when minimising, new section after 50k entries"}
+    verbose_desc{"compile verbosely"}
+
+    lt_compose_desc{"compose transducer1 with transducer2"}
+    inverted_desc{"run composition right-to-left on transducer1"}
+    anywhere_desc{"don't require anchored matches, let transducer2 optionally compose at any sub-path"}
+
+    lt_expand{"expand the contents of a dictionary file"}
+    lt_invert_desc{"reverse the direction of a compiled transducer"}
+
+    lt_paradigm_desc{"generate listings from a compiled transducer"}
+    analyser_desc{"FST is an analyser (tags on the right)"}
+    exclude_desc{"disregard paths containing TAG"}
+    sort_desc{"alphabetize the paths for each pattern"}
+
+    lt_print_desc{"dump a transducer to text in ATT format"}
+    alpha_desc{"print transducer alphabet"}
+    use_hfst_desc{"use HFST-compatible character escapes"}
+
+    lt_restrict_desc{"remove paths from a transducer"}
+    minimise_desc{"minimise transducers after deleting paths"}
+
+    lt_tmxcomp_desc{"build a letter transducer from a TMX translation memory"}
+    origin_code_desc{"the language code to be taken as lang1"}
+    meta_code_desc{"the language code to be taken as lang2"}
+    input_language{"input language"}
+    output_language{"output language"}
+
+    lt_tmxproc_desc{"process a stream with a letter transducer"}
+
+    lt_trim_desc{"trim a transducer to another transducer"}
+    match_section_desc{"A section with this name (id@type) will only be trimmed against a section with the same name. This argument may be used multiple times."}
+
+    ALT80000{"ERROR ALT80000: Invalid or no argument for {option}"}
+    ALT80010{"ERROR ALT80010: In {node_doc_url} on line {line_number}: Missing value attribute."}
+    ALT80020{"ERROR ALT80020: In {node_doc_url} on line {line_number}: Expected a single character in value attribute, but found {value_size}."}
+    ALT80030{"ERROR ALT80030: In {node_doc_url} on line {line_number}: Expected <{expected}> but found <{found}>."}
+    ALT80050{"ERROR ALT80050: Unable to access \"{file_name}\"."}
+    ALT80060{"ERROR ALT80060: Invalid format in file \"{file_name}\" on line {line_number}."}
+    ALT60070{"WARNING ALT60070: Multiple fsts in \"{file_name}\" will be disjuncted."}
+    ALT80080{"ERROR ALT80080: Transducer contains epsilon transition to a final state. Aborting."}
+    ALT80090{"ERROR ALT80090: Transducer contains initial epsilon loop. Aborting."}
+    ALT80100{"ERROR ALT80100: Cannot create empty buffer."}
+    ALT80110{"ERROR ALT80110: Parse error at the end of input."}
+    ALT80120{"ERROR ALT80120: Invalid dictionary (hint: the right side of an entry is empty)."}
+    ALT80121{"ERROR ALT80121: Invalid dictionary (hint: entry on the right beginning with whitespace)."}
+    ALT80122{"ERROR ALT80122: Invalid dictionary (hint: the left side of an entry is empty)."}
+    ALT80123{"ERROR ALT80123: Invalid dictionary (hint: entry on the left beginning with whitespace)."}
+    ALT80124{"ERROR ALT80124: Invalid dictionary (hint: entry on the beginning with whitespace)."}
+    ALT80140{"ERROR ALT80140: In file \"{file_name}\" on line {line_number}: Missing alphabet symbols."}
+    ALT60150{"WARNING ALT60150: Cannot insert <t/> from empty input. Ignoring. (You probably want to specify exact tags when deleting a word.)"}
+    ALT80160{"ERROR ALT80160: In file \"{file_name}\" on line {line_number}: Non-empty element \"<{name}>\" should be empty."}
+    ALT80170{"ERROR ALT80170: In file \"{file_name}\" on line {line_number}: Undefined symbol \"{symbol}\"."}
+    ALT80180{"ERROR ALT80180: In file \"{file_name}\" on line {line_number}: Invalid specification of element \"<{name}>\" in this context."}
+    ALT80190{"ERROR ALT80190: In file \"{file_name}\" on line {line_number}: Invalid construction."}
+    ALT80200{"ERROR ALT80200: In file \"{file_name}\" on line {line_number}: Expected \"<{slash_element}>\"."}
+    ALT60210{"WARNING ALT60210: In file \"{file_name}\" on line {line_number}: Entry begins with space."}
+    ALT80220{"ERROR ALT80220: In file \"{file_name}\" on line {line_number}: Paradigm refers to itself \"{paradigm_name}\"."}
+    ALT80230{"ERROR ALT80230: In file \"{file_name}\" on line {line_number}: Undefined paradigm \"{paradigm_name}\"."}
+    ALT80240{"ERROR ALT80240: In file \"{file_name}\" on line {line_number}: Invalid entry token."}
+    ALT80250{"ERROR ALT80250: In file \"{file_name}\" on line {line_number}: \"<{element_name}>\" element must specify non-void \"{attr_name}\" attribute."}
+    ALT80260{"ERROR ALT80260: In file \"{file_name}\" on line {line_number}: Parse error."}
+    ALT80270{"ERROR ALT80270: In file \"{file_name}\" on line {line_number}: Invalid inclusion of \"<{element_name}>\" into \"<{compiler_entry_element}>\"."}
+    ALT80280{"ERROR ALT80280: In file \"{file_name}\" on line {line_number}: Invalid node \"<{element_name}>\"."}
+    ALT80290{"ERROR ALT80290: I/O Error."}
+    ALT80300{"ERROR ALT80300: Out of range: {value}."}
+    ALT60320{"WARNING ALT60320: Matching case-sensitively since processor state size >= {max_case_insensitive_state_size}"}
+    ALT80330{"ERROR ALT80330: Unsupported transducer type for \"{transducer_first}\"."}
+    ALT60340{"WARNING ALT60340: CompoundAnalysis's MAX_COMBINATIONS exceeded for \"{input_word}\"\n"
+             "                  gave up at char {index} \"{char}\"."}
+    ALT60350{"WARNING ALT60350: Decomposition symbol {symbol} not found."}
+    ALT80360{"ERROR ALT80360: Unable to rewind file."}
+    ALT80370{"ERROR ALT80370: Unexpected trailing backslash."}
+    ALT60380{"WARNING ALT60380: section \"{section}\" appears in both transducers and will be overwritten!"}
+    ALT80390{"ERROR ALT80390: -l specified, but mode is lr."}
+    ALT80391{"ERROR ALT80391: -r specified, but mode is rl."}
+    ALT60410{"WARNING ALT60410: section {section_name} is empty! Skipping it..."}
+    ALT60420{"WARNING ALT60420: section {section_name} had no final state after composing! Skipping it..."}
+    ALT80430{"ERROR ALT80430: Composition gave empty transducer!"}
+    ALT60440{"WARNING ALT60440: unsupported locale, fallback to \"C\""}
+    ALT60450{"WARNING ALT60450: section {section_name} was not found in both transducers! Skipping if in just one..."}
+    ALT80460{"ERROR ALT80460: Trimming gave empty transducer!\n"
+             "Hint: There are no words in bilingual dictionary that match words in both monolingual dictionaries?"}
+    ALT80470{"ERROR ALT80470: Opening an unended sequence."}
+    ALT80471{"ERROR ALT80471: Ending an unopened sequence."}
+    ALT80490{"ERROR ALT80490: Using labels outside of a sequence."}
+    ALT80500{"ERROR ALT80500: Parsing regexp."}
+    ALT80510{"ERROR ALT80510: Unable to lowercase string \"{string}\".\n"
+             "Error code: {error_name}"}
+    ALT80511{"ERROR ALT80511: Unable to uppercase string \"{string}\".\n"
+             "Error code: {error_name}"}
+    ALT80512{"ERROR ALT80512: Unable to titlecase string \"{string}\".\n"
+             "Error code: {error_name}"}
+    ALT80513{"ERROR ALT80513: Caseless string comparison failed on \"{string_a}\" and \"{string_b}\".\n"
+             "Error code: {error_name}"}
+    ALT80550{"ERROR ALT80550: Trying to link nonexistent states ({source}, {target}, {tag})."}
+    ALT80560{"ERROR ALT80560: Empty set of final states."}
+    ALT80570{"ERROR ALT80570: Couldn't find {f_src}, {g_src} in state map."}
+    ALT80580{"ERROR ALT80580: Failed to read/write uint64_t."}
+    ALT80590{"ERROR ALT80590: Transducer has features that are unknown to this version of lttoolbox - upgrade!"}
+    ALT80600{"ERROR ALT80600: Unable to parse {type}."}
+    ALT80610{"ERROR ALT80610: Malformed input stream."}
+    ALT80620{"ERROR ALT80620: FST has features that are unknown to this version of lttoolbox - upgrade!"}
+    ALT80630{"ERROR ALT80630: Could not read {number} expected bytes from stream."}
+    ALT80640{"ERROR ALT80640: Can't deserialise {size} byte integer type: Can't deserialise size."}
+    ALT80650{"ERROR ALT80650: Can't deserialise {size} byte integer type: Can't deserialise byte."}
+    ALT80660{"ERROR ALT80660: Can't serialise const {size_a} byte integer type: Can't serialise size {size_b}."}
+    ALT80670{"ERROR ALT80670: Can't serialise const {size} byte integer type: Can't serialise byte {byte}."}
+}
diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am
@@ -4,7 +4,7 @@ h_sources = acx.h alphabet.h att_compiler.h buffer.h cli.h compiler.h compressio
             match_exe.h match_node.h match_state.h my_stdio.h node.h \
             pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h string_utils.h symbol_iter.h \
             transducer.h trans_exe.h xml_parse_util.h xml_walk_util.h exception.h tmx_compiler.h \
-            ustring.h sorted_vector.hpp
+            ustring.h sorted_vector.hpp i18n.h
 cc_sources = acx.cc alphabet.cc att_compiler.cc cli.cc compiler.cc compression.cc entry_token.cc \
              expander.cc file_utils.cc fst_processor.cc input_file.cc lt_locale.cc match_exe.cc \
              match_node.cc match_state.cc node.cc pattern_list.cc \
@@ -14,7 +14,7 @@ cc_sources = acx.cc alphabet.cc att_compiler.cc cli.cc compiler.cc compression.c
 library_includedir = $(includedir)/$(PACKAGE_NAME)
 library_include_HEADERS = $(h_sources)
 
-bin_PROGRAMS = lt-comp lt-proc lt-expand lt-paradigm lt-tmxcomp lt-tmxproc lt-print lt-trim lt-compose lt-append lsx-comp lt-invert lt-restrict lt-apply-acx
+bin_PROGRAMS = lt-comp lt-proc lt-expand lt-paradigm lt-tmxcomp lt-tmxproc lt-print lt-trim lt-compose lt-append lsx-comp lt-invert lt-restrict lt-apply-acx formatmsg
 instdir = lttoolbox
 
 lib_LTLIBRARIES= liblttoolbox.la
@@ -29,6 +29,7 @@ lttoolboxlib = $(prefix)/lib
 lttoolbox_DATA = dix.dtd dix.rng dix.rnc acx.rng xsd/dix.xsd xsd/acx.xsd
 
 LDADD = liblttoolbox.la $(PTHREAD_LIBS)
+AM_CPPFLAGS = -DALT_I18N_DATA='"$(ALT_I18N_DATA)"'
 AM_LDFLAGS = -llttoolbox $(LIBXML_LIBS) $(ICU_LIBS)
 
 lt_append_SOURCES = lt_append.cc
@@ -45,6 +46,7 @@ lsx_comp_SOURCES = lt_comp.cc
 lt_invert_SOURCES = lt_invert.cc
 lt_restrict_SOURCES = lt_restrict.cc
 lt_apply_acx_SOURCES = lt_apply_acx.cc
+formatmsg_SOURCES = formatmsg.cc
 
 #lt-validate-dictionary: Makefile.am validate-header.sh
 #	@echo "Creating lt-validate-dictionary script"

diff --git a/lttoolbox/acx.cc b/lttoolbox/acx.cc
@@ -16,40 +16,47 @@
  */
 #include <lttoolbox/acx.h>
 #include <lttoolbox/xml_walk_util.h>
+#include <iostream>
+#include <unicode/ustream.h>
+#include <lttoolbox/i18n.h>
 
 const xmlChar* CHAR_NODE = (const xmlChar*)"char";
 const xmlChar* EQUIV_NODE = (const xmlChar*)"equiv-char";
 const char* VALUE_ATTR = "value";
 
 int32_t get_val(xmlNode* node)
 {
+  I18n i18n {ALT_I18N_DATA, "lttoolbox"};
   UString s = getattr(node, VALUE_ATTR);
   if (s.empty()) {
-    error_and_die(node, "Missing value attribute.");
+    i18n.error("ALT80010", {"node_doc_url", "line_number"},
+                           {(char*)node->doc->URL, node->line}, true);
   }
   std::vector<int32_t> v;
   ustring_to_vec32(s, v);
   if (v.size() > 1) {
-    error_and_die(node, "Expected a single character in value attribute, but found %d.", v.size());
+    i18n.error("ALT80020", {"node_doc_url", "line_number", "value_size"},
+                           {(char*)node->doc->URL, node->line, std::to_string(v.size()).c_str()}, true);
   }
   return v[0];
 }
 
 std::map<int32_t, sorted_vector<int32_t>> readACX(const char* file)
 {
+  I18n i18n {ALT_I18N_DATA, "lttoolbox"};
   std::map<int32_t, sorted_vector<int32_t>> acx;
   xmlNode* top_node = load_xml(file);
   for (auto char_node : children(top_node)) {
     if (!xmlStrEqual(char_node->name, CHAR_NODE)) {
-      error_and_die(char_node, "Expected <char> but found <%s>.",
-                    (const char*)char_node->name);
+      i18n.error("ALT80030", {"node_doc_url", "line_number", "expected", "found"},
+        {(char*)char_node->doc->URL, char_node->line, "char", (const char*)char_node->name}, true);
     }
     int32_t key = get_val(char_node);
     sorted_vector<int32_t> vec;
     for (auto equiv_node : children(char_node)) {
       if (!xmlStrEqual(equiv_node->name, EQUIV_NODE)) {
-        error_and_die(char_node, "Expected <equiv-char> but found <%s>.",
-                      (const char*)equiv_node->name);
+        i18n.error("ALT80030", {"node_doc_url", "line_number", "expected", "found"},
+          {(char*)char_node->doc->URL, char_node->line, "equiv-char", (const char*)equiv_node->name}, true);
       }
       vec.insert(get_val(equiv_node));
     }