From ea2ce3b644bc39c3ab1ff12ff799c013c0f44a1f Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 16 Nov 2024 17:28:15 +0900 Subject: [PATCH 1/7] =?UTF-8?q?update:=20#455:=20=E3=80=8C=E7=A5=9E?= =?UTF-8?q?=E4=BB=A3=E5=9B=BD=E8=A1=99=E3=80=8D=E3=81=A8=E3=80=8C=E7=A5=9E?= =?UTF-8?q?=E4=BB=A3=E5=9C=8B=E8=A1=99=E3=80=8D=E3=81=AE=E8=A1=A8=E8=A8=98?= =?UTF-8?q?=E3=82=86=E3=82=8C=E3=81=AB=E5=AF=BE=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/adapter/orthographical_variant_adapter.rs | 2 ++ core/src/tokenizer/read_town.rs | 1 + ...75\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 3 files changed, 6 insertions(+) diff --git a/core/src/adapter/orthographical_variant_adapter.rs b/core/src/adapter/orthographical_variant_adapter.rs index 7fba398a..56f38227 100644 --- a/core/src/adapter/orthographical_variant_adapter.rs +++ b/core/src/adapter/orthographical_variant_adapter.rs @@ -28,6 +28,7 @@ pub enum OrthographicalVariant { 濱, 祗, 曾, + 國, } impl OrthographicalVariant { @@ -61,6 +62,7 @@ impl OrthographicalVariant { OrthographicalVariant::濱 => &['濱', '浜'], OrthographicalVariant::祗 => &['祗', '祇'], OrthographicalVariant::曾 => &['曾', '曽'], + OrthographicalVariant::國 => &['國', '国'], } } diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index 7474f747..0f41f9f0 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -90,6 +90,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> OrthographicalVariant::濱, OrthographicalVariant::祗, OrthographicalVariant::曾, + OrthographicalVariant::國, ], }; if let Some(result) = adapter.apply(input, candidate) { diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" index 8afeb572..de962e26 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -84,3 +84,6 @@ address,prefecture,city,town,rest # 「小曾根」と「小曽根」の表記ゆれへの対応 埼玉県熊谷市小曽根1220,埼玉県,熊谷市,小曽根,1220 埼玉県熊谷市小曾根1220,埼玉県,熊谷市,小曽根,1220 +# 「神代國衙」と「神代国衙」の表記揺れへの対応 +兵庫県南あわじ市神代國衙1680,兵庫県,南あわじ市,神代國衙,1680 +兵庫県南あわじ市神代国衙1680,兵庫県,南あわじ市,神代國衙,1680 From 21d91cce8798cb77854897699cb8b051310cfee9 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 16 Nov 2024 17:45:52 +0900 Subject: [PATCH 2/7] =?UTF-8?q?update:=20#458:=20=E3=80=8C=E4=B8=8A?= =?UTF-8?q?=E6=B0=B7=E9=89=8B=E3=80=8D=E3=81=A8=E3=80=8C=E4=B8=8A=E6=B0=B7?= =?UTF-8?q?=E9=A3=BD=E3=80=8D=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C?= =?UTF-8?q?=E3=81=AB=E5=AF=BE=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/adapter/orthographical_variant_adapter.rs | 2 ++ core/src/tokenizer/read_town.rs | 1 + ...02\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 3 files changed, 6 insertions(+) diff --git a/core/src/adapter/orthographical_variant_adapter.rs b/core/src/adapter/orthographical_variant_adapter.rs index 56f38227..2dbef93b 100644 --- a/core/src/adapter/orthographical_variant_adapter.rs +++ b/core/src/adapter/orthographical_variant_adapter.rs @@ -29,6 +29,7 @@ pub enum OrthographicalVariant { 祗, 曾, 國, + 鉋, } impl OrthographicalVariant { @@ -63,6 +64,7 @@ impl OrthographicalVariant { OrthographicalVariant::祗 => &['祗', '祇'], OrthographicalVariant::曾 => &['曾', '曽'], OrthographicalVariant::國 => &['國', '国'], + OrthographicalVariant::鉋 => &['鉋', '飽'], } } diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index 0f41f9f0..564e273e 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -91,6 +91,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> OrthographicalVariant::祗, OrthographicalVariant::曾, OrthographicalVariant::國, + OrthographicalVariant::鉋, ], }; if let Some(result) = adapter.apply(input, candidate) { diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" index 4c1b0914..f19ab02e 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -3,3 +3,6 @@ address,prefecture,city,town,rest 神奈川県鎌倉市山ノ内189,神奈川県,鎌倉市,山ノ内,189 神奈川県鎌倉市山の内189,神奈川県,鎌倉市,山ノ内,189 神奈川県鎌倉市山之内189,神奈川県,鎌倉市,山ノ内,189 +# 「上氷鉋」と「上氷飽」の表記揺れへの対応 +長野県長野市川中島町上氷鉋1368,長野県,長野市,川中島町上氷鉋,1368 +長野県長野市川中島町上氷飽1368,長野県,長野市,川中島町上氷鉋,1368 From 493fe895a505f827671d105ebb574e8a6348bcfa Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 16 Nov 2024 17:56:57 +0900 Subject: [PATCH 3/7] =?UTF-8?q?update:=20#451:=20=E3=80=8C=E9=B7=8F?= =?UTF-8?q?=E5=92=8C=E3=80=8D=E3=81=A8=E3=80=8C=E9=B7=86=E5=92=8C=E3=80=8D?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C=E3=81=AB=E5=AF=BE?= =?UTF-8?q?=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/adapter/orthographical_variant_adapter.rs | 2 ++ core/src/tokenizer/read_town.rs | 1 + ...75\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 3 files changed, 6 insertions(+) diff --git a/core/src/adapter/orthographical_variant_adapter.rs b/core/src/adapter/orthographical_variant_adapter.rs index 2dbef93b..1e51c4dd 100644 --- a/core/src/adapter/orthographical_variant_adapter.rs +++ b/core/src/adapter/orthographical_variant_adapter.rs @@ -30,6 +30,7 @@ pub enum OrthographicalVariant { 曾, 國, 鉋, + 鷆, } impl OrthographicalVariant { @@ -65,6 +66,7 @@ impl OrthographicalVariant { OrthographicalVariant::曾 => &['曾', '曽'], OrthographicalVariant::國 => &['國', '国'], OrthographicalVariant::鉋 => &['鉋', '飽'], + OrthographicalVariant::鷆 => &['鷆', '鷏'], } } diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index 564e273e..a10d4c7b 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -92,6 +92,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> OrthographicalVariant::曾, OrthographicalVariant::國, OrthographicalVariant::鉋, + OrthographicalVariant::鷆, ], }; if let Some(result) = adapter.apply(input, candidate) { diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" index de962e26..e4d86a79 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -87,3 +87,6 @@ address,prefecture,city,town,rest # 「神代國衙」と「神代国衙」の表記揺れへの対応 兵庫県南あわじ市神代國衙1680,兵庫県,南あわじ市,神代國衙,1680 兵庫県南あわじ市神代国衙1680,兵庫県,南あわじ市,神代國衙,1680 +# 「鷏和」と「鷆和」の表記揺れへの対応 +兵庫県赤穂市鷏和422,兵庫県,赤穂市,鷏和,422 +兵庫県赤穂市鷆和422,兵庫県,赤穂市,鷏和,422 From 5b1fafb21922c61a2de172c472a321cc14f7dc32 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 16 Nov 2024 18:08:45 +0900 Subject: [PATCH 4/7] =?UTF-8?q?update:=20#408:=20=E3=80=8C=E7=8F=AD?= =?UTF-8?q?=E7=9B=AE=E3=80=8D=E3=81=A8=E3=80=8C=E6=96=91=E7=9B=AE=E3=80=8D?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C=E3=81=AB=E5=AF=BE?= =?UTF-8?q?=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/adapter/orthographical_variant_adapter.rs | 2 ++ core/src/tokenizer/read_town.rs | 1 + ...02\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 3 files changed, 6 insertions(+) diff --git a/core/src/adapter/orthographical_variant_adapter.rs b/core/src/adapter/orthographical_variant_adapter.rs index 1e51c4dd..da053bcc 100644 --- a/core/src/adapter/orthographical_variant_adapter.rs +++ b/core/src/adapter/orthographical_variant_adapter.rs @@ -31,6 +31,7 @@ pub enum OrthographicalVariant { 國, 鉋, 鷆, + 斑, } impl OrthographicalVariant { @@ -67,6 +68,7 @@ impl OrthographicalVariant { OrthographicalVariant::國 => &['國', '国'], OrthographicalVariant::鉋 => &['鉋', '飽'], OrthographicalVariant::鷆 => &['鷆', '鷏'], + OrthographicalVariant::斑 => &['斑', '班'], } } diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index a10d4c7b..defaffcf 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -93,6 +93,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> OrthographicalVariant::國, OrthographicalVariant::鉋, OrthographicalVariant::鷆, + OrthographicalVariant::斑, ], }; if let Some(result) = adapter.apply(input, candidate) { diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" index f19ab02e..ba065e89 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -6,3 +6,6 @@ address,prefecture,city,town,rest # 「上氷鉋」と「上氷飽」の表記揺れへの対応 長野県長野市川中島町上氷鉋1368,長野県,長野市,川中島町上氷鉋,1368 長野県長野市川中島町上氷飽1368,長野県,長野市,川中島町上氷鉋,1368 +# 「斑目」と「班目」の表記揺れへの対応 +神奈川県南足柄市班目639,神奈川県,南足柄市,班目,639 +神奈川県南足柄市斑目639,神奈川県,南足柄市,班目,639 From d47240eba85563e6be8c87f98a16c965784e4aee Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 16 Nov 2024 18:22:36 +0900 Subject: [PATCH 5/7] =?UTF-8?q?update:=20#456:=20=E3=80=8C=E5=8D=97?= =?UTF-8?q?=E6=AB=BB=E3=80=8D=E3=81=A8=E3=80=8C=E5=8D=97=E6=A1=9C=E3=80=8D?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C=E3=81=AB=E5=AF=BE?= =?UTF-8?q?=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/adapter/orthographical_variant_adapter.rs | 2 ++ core/src/tokenizer/read_town.rs | 1 + ...75\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 3 files changed, 6 insertions(+) diff --git a/core/src/adapter/orthographical_variant_adapter.rs b/core/src/adapter/orthographical_variant_adapter.rs index da053bcc..069d5904 100644 --- a/core/src/adapter/orthographical_variant_adapter.rs +++ b/core/src/adapter/orthographical_variant_adapter.rs @@ -32,6 +32,7 @@ pub enum OrthographicalVariant { 鉋, 鷆, 斑, + 櫻, } impl OrthographicalVariant { @@ -69,6 +70,7 @@ impl OrthographicalVariant { OrthographicalVariant::鉋 => &['鉋', '飽'], OrthographicalVariant::鷆 => &['鷆', '鷏'], OrthographicalVariant::斑 => &['斑', '班'], + OrthographicalVariant::櫻 => &['櫻', '桜'], } } diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index defaffcf..dfe36715 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -94,6 +94,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> OrthographicalVariant::鉋, OrthographicalVariant::鷆, OrthographicalVariant::斑, + OrthographicalVariant::櫻, ], }; if let Some(result) = adapter.apply(input, candidate) { diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" index e4d86a79..11f9e596 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -90,3 +90,6 @@ address,prefecture,city,town,rest # 「鷏和」と「鷆和」の表記揺れへの対応 兵庫県赤穂市鷏和422,兵庫県,赤穂市,鷏和,422 兵庫県赤穂市鷆和422,兵庫県,赤穂市,鷏和,422 +# 「南桜」と「南櫻」の表記揺れへの対応 +滋賀県野洲市南桜1792,滋賀県,野洲市,南櫻,1792 +滋賀県野洲市南櫻1792,滋賀県,野洲市,南櫻,1792 From c32547c2b0bf7aa2148fe85e47f253bbccc278e1 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 16 Nov 2024 18:33:32 +0900 Subject: [PATCH 6/7] =?UTF-8?q?update:=20#454:=20=E3=80=8C=E6=9D=BE?= =?UTF-8?q?=E5=B8=86=E6=93=BD=E7=94=B0=E3=80=8D=E3=81=A8=E3=80=8C=E6=9D=BE?= =?UTF-8?q?=E5=B8=86=E6=AB=9F=E7=94=B0=E3=80=8D=E3=81=AE=E8=A1=A8=E8=A8=98?= =?UTF-8?q?=E3=82=86=E3=82=8C=E3=81=AB=E5=AF=BE=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/adapter/orthographical_variant_adapter.rs | 2 ++ core/src/tokenizer/read_town.rs | 1 + ...02\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 3 files changed, 6 insertions(+) diff --git a/core/src/adapter/orthographical_variant_adapter.rs b/core/src/adapter/orthographical_variant_adapter.rs index 069d5904..cc73589b 100644 --- a/core/src/adapter/orthographical_variant_adapter.rs +++ b/core/src/adapter/orthographical_variant_adapter.rs @@ -33,6 +33,7 @@ pub enum OrthographicalVariant { 鷆, 斑, 櫻, + 櫟, } impl OrthographicalVariant { @@ -71,6 +72,7 @@ impl OrthographicalVariant { OrthographicalVariant::鷆 => &['鷆', '鷏'], OrthographicalVariant::斑 => &['斑', '班'], OrthographicalVariant::櫻 => &['櫻', '桜'], + OrthographicalVariant::櫟 => &['櫟', '擽'], } } diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index dfe36715..4789b608 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -95,6 +95,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> OrthographicalVariant::鷆, OrthographicalVariant::斑, OrthographicalVariant::櫻, + OrthographicalVariant::櫟, ], }; if let Some(result) = adapter.apply(input, candidate) { diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" index ba065e89..f76a6bd8 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\343\201\247\343\201\257\343\201\252\343\201\204\350\241\250\350\250\230\343\202\206\343\202\214\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -9,3 +9,6 @@ address,prefecture,city,town,rest # 「斑目」と「班目」の表記揺れへの対応 神奈川県南足柄市班目639,神奈川県,南足柄市,班目,639 神奈川県南足柄市斑目639,神奈川県,南足柄市,班目,639 +# 「櫟」と「擽」の表記ゆれへの対応 +兵庫県南あわじ市松帆櫟田196,兵庫県,南あわじ市,松帆櫟田,196 +兵庫県南あわじ市松帆擽田196,兵庫県,南あわじ市,松帆櫟田,196 From 09bc73c2fc04d155b0028dbe50ab97127183960f Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Sat, 16 Nov 2024 18:45:38 +0900 Subject: [PATCH 7/7] =?UTF-8?q?update:=20#452:=20=E3=80=8C=E5=90=89?= =?UTF-8?q?=E5=86=A8=E3=80=8D=E3=81=A8=E3=80=8C=E5=90=89=E5=AF=8C=E3=80=8D?= =?UTF-8?q?=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C=E3=81=AB=E5=AF=BE?= =?UTF-8?q?=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/adapter/orthographical_variant_adapter.rs | 2 ++ core/src/tokenizer/read_town.rs | 1 + ...75\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 3 files changed, 6 insertions(+) diff --git a/core/src/adapter/orthographical_variant_adapter.rs b/core/src/adapter/orthographical_variant_adapter.rs index cc73589b..05aaf730 100644 --- a/core/src/adapter/orthographical_variant_adapter.rs +++ b/core/src/adapter/orthographical_variant_adapter.rs @@ -34,6 +34,7 @@ pub enum OrthographicalVariant { 斑, 櫻, 櫟, + 冨, } impl OrthographicalVariant { @@ -73,6 +74,7 @@ impl OrthographicalVariant { OrthographicalVariant::斑 => &['斑', '班'], OrthographicalVariant::櫻 => &['櫻', '桜'], OrthographicalVariant::櫟 => &['櫟', '擽'], + OrthographicalVariant::冨 => &['冨', '富'], } } diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index 4789b608..517bb363 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -96,6 +96,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> OrthographicalVariant::斑, OrthographicalVariant::櫻, OrthographicalVariant::櫟, + OrthographicalVariant::冨, ], }; if let Some(result) = adapter.apply(input, candidate) { diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" index 11f9e596..459577f9 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -93,3 +93,6 @@ address,prefecture,city,town,rest # 「南桜」と「南櫻」の表記揺れへの対応 滋賀県野洲市南桜1792,滋賀県,野洲市,南櫻,1792 滋賀県野洲市南櫻1792,滋賀県,野洲市,南櫻,1792 +# 「富」と「冨」の表記ゆれへの対応 +兵庫県神崎郡神河町吉冨88番地10号,兵庫県,神崎郡神河町,吉冨,88番地10号 +兵庫県神崎郡神河町吉富88番地10号,兵庫県,神崎郡神河町,吉冨,88番地10号