Skip to content

Commit

Permalink
Merge pull request #66 from hmlendea/name-normalisation
Browse files Browse the repository at this point in the history
Improved name normalisations
  • Loading branch information
hmlendea authored May 31, 2023
2 parents a6fea97 + 35b290f commit 8d89416
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 25 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"workbench.colorCustomizations": {
"commandCenter.border": "#15202b99",
"sash.hoverBorder": "#63c45d",
"statusBar.background": "#46ae40",
"statusBar.foreground": "#15202b",
Expand Down
26 changes: 15 additions & 11 deletions MoreCulturalNamesBuilder.UnitTests/Service/NameNormaliserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ public void SetUp()
this.nameNormaliser = new NameNormaliser();
}

// Crusader Kings 3
[Test]
[TestCase("Â-ngì-pî-sṳ̂ sân", "Â-ngì-pî-sû sân")]
[TestCase("Ab‌khajiyā", "Abkhajiyā")]
[TestCase("Aǧīm", "Ajīm")]
[TestCase("Aǧīm", "Ağīm")]
[TestCase("Aḫmīm", "Akhmīm")]
[TestCase("Ais‍lyāṇḍ", "Aislyāņd")]
[TestCase("Aǩsubaj", "Aksubaj")]
[TestCase("al-Basīṭ", "al-Basīț")]
[TestCase("al-Ǧubayl", "al-Jubayl")]
[TestCase("al-Ǧazīraḧ al-Ḫaḍrāʼ", "al-Ğazīrah al-Khadrā´")]
[TestCase("al-Ǧubayl", "al-Ğubayl")]
[TestCase("al-Hāmā al-Arāġūn", "al-Hāmā al-Arāġūn")]
[TestCase("al-H̱ānīẗ", "al-Khānīah")]
[TestCase("āl-Zāwyẗ", "āl-Zāwyah")]
Expand All @@ -43,6 +44,7 @@ public void SetUp()
[TestCase("Anwākšūṭ", "Anwākšūț")]
[TestCase("Aṗsny", "Apsny")]
[TestCase("Åsele", "Åsele")]
[TestCase("Â-ngì-pî-sṳ̂ sân", "Â-ngì-pî-sû sân")]
[TestCase("Bāḇel", "Bābel")]
[TestCase("Basileia Rhṓmaiṓn", "Basileia Rhōmaiōn")]
[TestCase("Bạt Đế Mỗ", "Bat Đê Mô")]
Expand All @@ -58,9 +60,9 @@ public void SetUp()
[TestCase("Chęciny", "Chęciny")]
[TestCase("Cửu Trại Câu", "Ců'u Trai Câu")]
[TestCase("Đakovo", "Đakovo")]
[TestCase("Đặng Khẩu", "Đăng Khâu")]
[TestCase("Danmǫrk", "Danmörk")]
[TestCase("Dasavleti Virǯinia", "Dasavleti Viržinia")]
[TestCase("Đặng Khẩu", "Đăng Khâu")]
[TestCase("Đế quốc Nga", "Đê quôc Nga")]
[TestCase("Dobřany", "Dobřany")]
[TestCase("Dᶻidᶻəlal̓ič", "Dzidzalalič")]
Expand All @@ -74,7 +76,7 @@ public void SetUp()
[TestCase("Góðviðra", "Góðviðra")]
[TestCase("Grɨnlɛɛn", "Grinleen")]
[TestCase("G‍roseṭō", "Grosețō")]
[TestCase("Ǧuzur al-Īǧẗ", "Juzur al-Ījah")]
[TestCase("Ǧuzur al-Īǧẗ", "Ğuzur al-Īğah")]
[TestCase("Ḥadīṯẗ", "Hadīthah")]
[TestCase("Ȟaȟáwakpa", "Ĥaĥáwakpa")]
[TestCase("H̱rūnīnġn", "Khrūnīnġn")]
Expand Down Expand Up @@ -107,7 +109,7 @@ public void SetUp()
[TestCase("Miniṡoṡeiyoḣdoke Otoƞwe", "Minisoseiyohdoke Otonwe")]
[TestCase("Miniᐋpulis", "Miniâpulis")]
[TestCase("Moscoƿ", "Moscouu")]
[TestCase("Mūrīṭanīẗ al-Ṭinǧīẗ", "Mūrīțanīah al-Ținjīah")]
[TestCase("Mūrīṭanīẗ al-Ṭinǧīẗ", "Mūrīțanīah al-Ținğīah")]
[TestCase("Nam̐si", "Namsi")]
[TestCase("Nazareḟŭ", "Nazarefŭ")]
[TestCase("Ngò-lò-sṳ̂", "Ngò-lò-sû")]
Expand Down Expand Up @@ -144,7 +146,7 @@ public void SetUp()
[TestCase("Tibískon", "Tibískon")]
[TestCase("Tłnáʔəč", "Tłná´ač")]
[TestCase("Ṭ‍renṭō", "Țrențō")]
[TestCase("Truǧālẗ", "Trujālah")]
[TestCase("Truǧālẗ", "Truğālah")]
[TestCase("Užhorod", "Užhorod")]
[TestCase("Vialikaja Poĺšča", "Vialikaja Poĺšča")]
[TestCase("Vюrцby’rg", "Viurcby’rg")]
Expand All @@ -162,15 +164,16 @@ public void WhenNormalisingForCK3_ReturnsTheExpectedNormalisedName(
Assert.AreEqual(expectedResult, actualResult);
}

// Hearts of Iron 4 Cities
[Test]
[TestCase("Â-ngì-pî-sṳ̂ sân", "Â-ngì-pî-sû sân")]
[TestCase("Ab‌khajiyā", "Abkhajiyā")]
[TestCase("Aǧīm", "Ajīm")]
[TestCase("Aǧīm", "Ağīm")]
[TestCase("Aḫmīm", "Akhmīm")]
[TestCase("Ais‍lyāṇḍ", "Aislyāņd")]
[TestCase("Aǩsubaj", "Aќsubaj")]
[TestCase("al-Basīṭ", "al-Basīţ")]
[TestCase("al-Ǧubayl", "al-Jubayl")]
[TestCase("al-Ǧubayl", "al-Ğubayl")]
[TestCase("al-Hāmā al-Arāġūn", "al-Hāmā al-Arāġūn")]
[TestCase("al-H̱ānīẗ", "al-Khānīah")]
[TestCase("āl-Zāwyẗ", "āl-Zāwyah")]
Expand Down Expand Up @@ -245,7 +248,7 @@ public void WhenNormalisingForCK3_ReturnsTheExpectedNormalisedName(
[TestCase("Miniṡoṡeiyoḣdoke Otoƞwe", "Minisoseiyohdoke Otoŋwe")]
[TestCase("Miniᐋpulis", "Miniâpulis")]
[TestCase("Moscoƿ", "Moscouu")]
[TestCase("Mūrīṭanīẗ al-Ṭinǧīẗ", "Mūrīţanīah al-Ţinjīah")]
[TestCase("Mūrīṭanīẗ al-Ṭinǧīẗ", "Mūrīţanīah al-Ţinğīah")]
[TestCase("Nam̐si", "Namsi")]
[TestCase("Nazareḟŭ", "Nazarefŭ")]
[TestCase("Ngò-lò-sṳ̂", "Ngò-lò-sû")]
Expand Down Expand Up @@ -282,7 +285,7 @@ public void WhenNormalisingForCK3_ReturnsTheExpectedNormalisedName(
[TestCase("Tibískon", "Tibískon")]
[TestCase("Tłnáʔəč", "Tłná´ač")]
[TestCase("Ṭ‍renṭō", "Ţrenţō")]
[TestCase("Truǧālẗ", "Trujālah")]
[TestCase("Truǧālẗ", "Truğālah")]
[TestCase("Užhorod", "Užhorod")]
[TestCase("Vialikaja Poĺšča", "Vialikaja Poĺšča")]
[TestCase("Vюrцby’rg", "Viurcby´rg")]
Expand Down Expand Up @@ -436,6 +439,7 @@ public void WhenNormalisingForWindow1252_ReturnsTheExpectedNormalisedName(
Assert.AreEqual(expectedResult, actualResult);
}

// Imperator Rome
[Test]
[TestCase("Â-ngì-pî-sṳ̂ sân", "Â-ngì-pî-sû sân")]
[TestCase("Ab‌khajiyā", "Abkhajiyā")]
Expand Down
36 changes: 22 additions & 14 deletions MoreCulturalNamesBuilder/Service/NameNormaliser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,16 @@ public string ToCK3Charset(string name)
processedName = Regex.Replace(processedName, "[ḦḨ]", "H");
processedName = Regex.Replace(processedName, "[Ȟ]", "Ĥ");
processedName = Regex.Replace(processedName, "[Ị]", "Į");
processedName = Regex.Replace(processedName, "[Ǧ]", "J");
processedName = Regex.Replace(processedName, "[Ǧ]", "Ğ"); // J
processedName = Regex.Replace(processedName, "", "Ĵ");
processedName = Regex.Replace(processedName, "[Ḫ]", "Kh");
processedName = Regex.Replace(processedName, "[ḰǨ]", "K");
processedName = Regex.Replace(processedName, "[ḲḴ]", "Ķ");
processedName = Regex.Replace(processedName, "[Ḷ]", "Ļ");
processedName = Regex.Replace(processedName, "[ḾṂ]", "M");
processedName = Regex.Replace(processedName, "[ƝŊ]", "N");
processedName = Regex.Replace(processedName, "[ǸṄ]", "Ń");
processedName = Regex.Replace(processedName, "[Ǹ]", "En");
processedName = Regex.Replace(processedName, "[Ṅ]", "Ń");
processedName = Regex.Replace(processedName, "[Ṇ]", "Ņ");
processedName = Regex.Replace(processedName, "[ỌƠ]", "O");
processedName = Regex.Replace(processedName, "[Ȯ]", "Ó");
Expand Down Expand Up @@ -97,15 +98,16 @@ public string ToCK3Charset(string name)
processedName = Regex.Replace(processedName, "[ḧḩ]", "h");
processedName = Regex.Replace(processedName, "[ȟ]", "ĥ");
processedName = Regex.Replace(processedName, "[ị]", "į");
processedName = Regex.Replace(processedName, "[ǧ]", "j");
processedName = Regex.Replace(processedName, "[ǧ]", "ğ"); // j
processedName = Regex.Replace(processedName, "[ǰ]", "ĵ");
processedName = Regex.Replace(processedName, "[ḫ]", "kh");
processedName = Regex.Replace(processedName, "[ḱǩ]", "k");
processedName = Regex.Replace(processedName, "[ḳḵ]", "ķ");
processedName = Regex.Replace(processedName, "[ḷ]", "ļ");
processedName = Regex.Replace(processedName, "[ḿṃ]", "m");
processedName = Regex.Replace(processedName, "[ɲŋ]", "n");
processedName = Regex.Replace(processedName, "[ǹṅ]", "ń");
processedName = Regex.Replace(processedName, "[ǹ]", "en");
processedName = Regex.Replace(processedName, "[ṅ]", "ń");
processedName = Regex.Replace(processedName, "[ṇ]", "ņ");
processedName = Regex.Replace(processedName, "[ọơ]", "o");
processedName = Regex.Replace(processedName, "[ȯ]", "ó");
Expand Down Expand Up @@ -163,14 +165,15 @@ public string ToHOI4CityCharset(string name)
processedName = Regex.Replace(processedName, "[ḦḨ]", "H");
processedName = Regex.Replace(processedName, "[Ȟ]", "Ĥ");
processedName = Regex.Replace(processedName, "[Ị]", "Į");
processedName = Regex.Replace(processedName, "[Ǧ]", "J");
processedName = Regex.Replace(processedName, "[Ǧ]", "Ğ"); // J
processedName = Regex.Replace(processedName, "[Ḫ]", "Kh");
processedName = Regex.Replace(processedName, "[ḰǨ]", "Ќ");
processedName = Regex.Replace(processedName, "[ḴḲ]", "Ķ");
processedName = Regex.Replace(processedName, "[Ḷ]", "Ļ");
processedName = Regex.Replace(processedName, "[ḾṂ]", "M");
processedName = Regex.Replace(processedName, "[Ɲ]", "N");
processedName = Regex.Replace(processedName, "[ǸṄ]", "Ń");
processedName = Regex.Replace(processedName, "[Ǹ]", "En");
processedName = Regex.Replace(processedName, "[Ṅ]", "Ń");
processedName = Regex.Replace(processedName, "[Ṇ]", "Ņ");
processedName = Regex.Replace(processedName, "[ỌƠ]", "O");
processedName = Regex.Replace(processedName, "[Ȯ]", "Ó");
Expand Down Expand Up @@ -213,15 +216,16 @@ public string ToHOI4CityCharset(string name)
processedName = Regex.Replace(processedName, "[ȟ]", "ĥ");
processedName = Regex.Replace(processedName, "[ĩ]", "ï");
processedName = Regex.Replace(processedName, "[ị]", "į");
processedName = Regex.Replace(processedName, "[ǧ]", "j");
processedName = Regex.Replace(processedName, "[ǧ]", "ğ"); // j
processedName = Regex.Replace(processedName, "[ǰ]", "ĵ");
processedName = Regex.Replace(processedName, "[ḫ]", "kh");
processedName = Regex.Replace(processedName, "[ḱǩ]", "ќ");
processedName = Regex.Replace(processedName, "[ḵḳ]", "ķ");
processedName = Regex.Replace(processedName, "[ḷ]", "ļ");
processedName = Regex.Replace(processedName, "[ḿṃ]", "m");
processedName = Regex.Replace(processedName, "[ɲ]", "n");
processedName = Regex.Replace(processedName, "[ǹṅ]", "ń");
processedName = Regex.Replace(processedName, "[ǹ]", "en");
processedName = Regex.Replace(processedName, "[ṅ]", "ń");
processedName = Regex.Replace(processedName, "[ṇ]", "ņ");
processedName = Regex.Replace(processedName, "[ọơ]", "o");
processedName = Regex.Replace(processedName, "[ȯ]", "ó");
Expand Down Expand Up @@ -272,8 +276,6 @@ public string ToHOI4StateCharset(string name)
processedName = Regex.Replace(processedName, "iīẗ", "iyyah");
processedName = Regex.Replace(processedName, "īẗ", "iyah");

processedName = ToHOI4CityCharset(processedName);

// Hearts of Iron IV
processedName = Regex.Replace(processedName, "[ĂĀ]", "Ã");
processedName = Regex.Replace(processedName, "[Č]", "Ch");
Expand Down Expand Up @@ -336,6 +338,8 @@ public string ToHOI4StateCharset(string name)
processedName = Regex.Replace(processedName, "[ź]", "z");
processedName = Regex.Replace(processedName, "[ż]", "ž");

processedName = ToHOI4CityCharset(processedName);

hoi4statesCache.TryAdd(name, processedName);

return processedName;
Expand Down Expand Up @@ -385,7 +389,8 @@ public string ToImperatorRomeCharset(string name)
processedName = Regex.Replace(processedName, "[ḰḲĶḴǨЌ]", "K");
processedName = Regex.Replace(processedName, "[ĹŁĽḶĻ]", "L");
processedName = Regex.Replace(processedName, "[ṂḾ]", "M");
processedName = Regex.Replace(processedName, "[ǸŅŊƝ]", "N");
processedName = Regex.Replace(processedName, "[Ǹ]", "En");
processedName = Regex.Replace(processedName, "[ŅŊƝ]", "N");
processedName = Regex.Replace(processedName, "[Ơ]", "O");
processedName = Regex.Replace(processedName, "[Ȯ]", "Ó");
processedName = Regex.Replace(processedName, "[Ờ]", "Ò");
Expand Down Expand Up @@ -434,7 +439,8 @@ public string ToImperatorRomeCharset(string name)
processedName = Regex.Replace(processedName, "[ḱḳķḵǩќ]", "k");
processedName = Regex.Replace(processedName, "[ĺłľḷļ]", "l");
processedName = Regex.Replace(processedName, "[ṃḿ]", "m");
processedName = Regex.Replace(processedName, "[ǹņŋɲ]", "n");
processedName = Regex.Replace(processedName, "[ǹ]", "en");
processedName = Regex.Replace(processedName, "[ņŋɲ]", "n");
processedName = Regex.Replace(processedName, "[ơ]", "o");
processedName = Regex.Replace(processedName, "[ờ]", "ò");
processedName = Regex.Replace(processedName, "[ȯ]", "ó");
Expand Down Expand Up @@ -516,7 +522,8 @@ public string ToWindows1252(string name)
processedName = Regex.Replace(processedName, "[ĹŁĽḶĻ]", "L");
processedName = Regex.Replace(processedName, "[ṂḾ]", "M");
processedName = Regex.Replace(processedName, "[Ň]", "Ñ");
processedName = Regex.Replace(processedName, "[ŃǸŅṄṆŊƝ]", "N");
processedName = Regex.Replace(processedName, "[Ǹ]", "En");
processedName = Regex.Replace(processedName, "[ŃŅṄṆŊƝ]", "N");
processedName = Regex.Replace(processedName, "[ƠỌ]", "O");
processedName = Regex.Replace(processedName, "[Ȯ]", "Ó");
processedName = Regex.Replace(processedName, "[Ờ]", "Ò");
Expand Down Expand Up @@ -572,7 +579,8 @@ public string ToWindows1252(string name)
processedName = Regex.Replace(processedName, "[ĺłľḷļ]", "l");
processedName = Regex.Replace(processedName, "[ṃḿ]", "m");
processedName = Regex.Replace(processedName, "[ň]", "ñ");
processedName = Regex.Replace(processedName, "[ńǹņṅṇŋɲ]", "n");
processedName = Regex.Replace(processedName, "[ǹ]", "en");
processedName = Regex.Replace(processedName, "[ńņṅṇŋɲ]", "n");
processedName = Regex.Replace(processedName, "[ơọ]", "o");
processedName = Regex.Replace(processedName, "[ȯ]", "ó");
processedName = Regex.Replace(processedName, "[ờ]", "ò");
Expand Down

0 comments on commit 8d89416

Please sign in to comment.