From d1435e975f92fd9542429b9d2339c03bfe49574a Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Mon, 16 Sep 2024 12:22:50 +0900 Subject: [PATCH 1/8] =?UTF-8?q?update:=20#418:=20=E7=B5=90=E5=90=88?= =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88`=E7=95=B0=E5=AD=97=E4=BD=93?= =?UTF-8?q?=E6=97=A7=E5=AD=97=E4=BD=93=E3=81=B8=E3=81=AE=E5=AF=BE=E5=BF=9C?= =?UTF-8?q?`=E3=81=AB=E3=80=8C=E7=AB=83=E3=80=8D=E3=81=A8=E3=80=8C?= =?UTF-8?q?=E7=AB=88=E3=80=8D=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C?= =?UTF-8?q?=E3=81=AE=E3=82=B1=E3=83=BC=E3=82=B9=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...75\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 1 file changed, 3 insertions(+) diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" index 74ea78a9..f6769e70 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -72,3 +72,6 @@ address,prefecture,city,town,rest # 「濱ノ瀬」と「浜ノ瀬」の表記ゆれへの対応 和歌山県日高郡美浜町大字濱ノ瀬356番3,和歌山県,日高郡美浜町,大字濱ノ瀬,356番3 和歌山県日高郡美浜町大字浜ノ瀬356番3,和歌山県,日高郡美浜町,大字濱ノ瀬,356番3 +# 「竃」と「竈」の表記ゆれへの対応 +静岡県御殿場市竈1032,静岡県,御殿場市,竈,1032 +静岡県御殿場市竃1032,静岡県,御殿場市,竈,1032 From 458f1513cdd28f4b246bec4b2a4d313fa2189154 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Mon, 16 Sep 2024 12:24:16 +0900 Subject: [PATCH 2/8] =?UTF-8?q?update:=20#418:=20`Variant::=E7=AB=88`?= =?UTF-8?q?=E3=81=AB=E3=80=8C=E7=AB=83=E3=80=8D=E5=AD=97=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/parser/adapter/orthographical_variant_adapter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/parser/adapter/orthographical_variant_adapter.rs b/core/src/parser/adapter/orthographical_variant_adapter.rs index 5fbb7276..5ade9c97 100644 --- a/core/src/parser/adapter/orthographical_variant_adapter.rs +++ b/core/src/parser/adapter/orthographical_variant_adapter.rs @@ -38,7 +38,7 @@ impl OrthographicalVariants for Variant { const 崎: Variant = &["崎", "﨑"]; const 檜: Variant = &["桧", "檜"]; const 龍: Variant = &["龍", "竜"]; - const 竈: Variant = &["竈", "釜"]; + const 竈: Variant = &["竈", "竃", "釜"]; const 嶋: Variant = &["嶋", "島"]; const 舘: Variant = &["舘", "館"]; const 脊: Variant = &["脊", "背"]; From 3e0260eb6590c035a7746d1476de343b9ee233db Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Mon, 16 Sep 2024 12:25:04 +0900 Subject: [PATCH 3/8] =?UTF-8?q?update:=20#418:=20=E7=94=BA=E5=90=8D?= =?UTF-8?q?=E3=81=AE=E7=89=B9=E5=AE=9A=E6=99=82=E3=81=AB=E3=80=8C=E7=AB=88?= =?UTF-8?q?=E3=80=8D=E5=AD=97=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C?= =?UTF-8?q?=E3=82=92=E8=80=83=E6=85=AE=E3=81=99=E3=82=8B=E3=82=88=E3=81=86?= =?UTF-8?q?=E3=81=AB=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/tokenizer/read_town.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index d0ce00e2..b7d3d102 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -95,6 +95,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> Variant::薮, Variant::崎, Variant::檜, + Variant::竈, Variant::舘, Variant::脊, Variant::渕, From b751130521cd8d5351d694a506e1806333a302d2 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Mon, 16 Sep 2024 12:27:39 +0900 Subject: [PATCH 4/8] update-version: 0.1.14 -> 0.1.15 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 709fb842..4d2767f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = [ resolver = "2" [workspace.package] -version = "0.1.14" +version = "0.1.15" edition = "2021" description = "A Rust Library to parse japanese addresses." repository = "https://github.com/YuukiToriyama/japanese-address-parser" From 35a628fc8deb466071a94eb011ba83074891f15b Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Mon, 16 Sep 2024 12:49:34 +0900 Subject: [PATCH 5/8] =?UTF-8?q?add:=20#409:=20=E7=B5=90=E5=90=88=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88`=E7=95=B0=E5=AD=97=E4=BD=93=E6=97=A7?= =?UTF-8?q?=E5=AD=97=E4=BD=93=E3=81=B8=E3=81=AE=E5=AF=BE=E5=BF=9C`?= =?UTF-8?q?=E3=81=AB=E3=80=8C=E6=9C=A8=E6=9C=88=E7=A5=97=E5=9C=92=E7=94=BA?= =?UTF-8?q?=E3=80=8D=E3=81=A8=E3=80=8C=E6=9C=A8=E6=9C=88=E7=A5=87=E5=9C=92?= =?UTF-8?q?=E7=94=BA=E3=80=8D=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86=E3=82=8C?= =?UTF-8?q?=E3=81=AE=E3=82=B1=E3=83=BC=E3=82=B9=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...75\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 1 file changed, 3 insertions(+) diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" index 74ea78a9..004fc555 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -72,3 +72,6 @@ address,prefecture,city,town,rest # 「濱ノ瀬」と「浜ノ瀬」の表記ゆれへの対応 和歌山県日高郡美浜町大字濱ノ瀬356番3,和歌山県,日高郡美浜町,大字濱ノ瀬,356番3 和歌山県日高郡美浜町大字浜ノ瀬356番3,和歌山県,日高郡美浜町,大字濱ノ瀬,356番3 +# 「木月祗園町」と「木月祇園町」の表記ゆれへの対応 +神奈川県川崎市中原区木月祗園町17-1,神奈川県,川崎市中原区,木月祗園町,17-1 +神奈川県川崎市中原区木月祇園町17-1,神奈川県,川崎市中原区,木月祗園町,17-1 From d1191043392bf4ca4e99a67cba95c4f3ccb9f826 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Mon, 16 Sep 2024 12:51:19 +0900 Subject: [PATCH 6/8] =?UTF-8?q?update:=20#409:=20=E7=94=BA=E5=90=8D?= =?UTF-8?q?=E3=81=AE=E7=89=B9=E5=AE=9A=E6=99=82=E3=81=AB=E3=80=8C=E7=A5=97?= =?UTF-8?q?=E3=80=8D=E3=81=A8=E3=80=8C=E7=A5=87=E3=80=8D=E3=81=AE=E8=A1=A8?= =?UTF-8?q?=E8=A8=98=E3=82=86=E3=82=8C=E3=82=92=E8=80=83=E6=85=AE=E3=81=99?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/parser/adapter/orthographical_variant_adapter.rs | 2 ++ core/src/tokenizer/read_town.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/core/src/parser/adapter/orthographical_variant_adapter.rs b/core/src/parser/adapter/orthographical_variant_adapter.rs index 5fbb7276..b6612447 100644 --- a/core/src/parser/adapter/orthographical_variant_adapter.rs +++ b/core/src/parser/adapter/orthographical_variant_adapter.rs @@ -28,6 +28,7 @@ pub trait OrthographicalVariants { const 與: Variant; const 瀧: Variant; const 濱: Variant; + const 祗: Variant; } impl OrthographicalVariants for Variant { @@ -56,6 +57,7 @@ impl OrthographicalVariants for Variant { const 與: Variant = &["與", "与"]; const 瀧: Variant = &["瀧", "滝"]; const 濱: Variant = &["濱", "浜"]; + const 祗: Variant = &["祗", "祇"]; } pub struct OrthographicalVariantAdapter { diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index d0ce00e2..ff73f827 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -110,6 +110,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> Variant::與, Variant::瀧, Variant::濱, + Variant::祗, ], }; if let Some(result) = adapter.apply(input, candidate) { From 8acacc7024e30b1258a277b148bdde1edba1b329 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Mon, 16 Sep 2024 14:20:05 +0900 Subject: [PATCH 7/8] =?UTF-8?q?add:=20#419:=20=E7=B5=90=E5=90=88=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88`=E7=95=B0=E5=AD=97=E4=BD=93=E6=97=A7?= =?UTF-8?q?=E5=AD=97=E4=BD=93=E3=81=B8=E3=81=AE=E5=AF=BE=E5=BF=9C`?= =?UTF-8?q?=E3=81=AB=E3=80=8C=E7=B1=A0=E4=B8=8A=E3=80=8D=E3=81=A8=E3=80=8C?= =?UTF-8?q?=E7=AF=AD=E4=B8=8A=E3=80=8D=E3=81=AE=E8=A1=A8=E8=A8=98=E3=82=86?= =?UTF-8?q?=E3=82=8C=E3=81=AE=E3=82=B1=E3=83=BC=E3=82=B9=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...75\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" | 3 +++ 1 file changed, 3 insertions(+) diff --git "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" index 74ea78a9..da8fc125 100644 --- "a/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" +++ "b/tests/test_data/\347\225\260\345\255\227\344\275\223\346\227\247\345\255\227\344\275\223\343\201\270\343\201\256\345\257\276\345\277\234.csv" @@ -69,6 +69,9 @@ address,prefecture,city,town,rest # 「瀧本」と「滝本」の表記ゆれへの対応 和歌山県新宮市熊野川町滝本417-1,和歌山県,新宮市,熊野川町瀧本,417-1 和歌山県新宮市熊野川町瀧本417-1,和歌山県,新宮市,熊野川町瀧本,417-1 +# 「籠上」と「篭上」の表記ゆれへの対応 +静岡県静岡市葵区籠上13-54,静岡県,静岡市葵区,籠上,13-54 +静岡県静岡市葵区篭上13-54,静岡県,静岡市葵区,籠上,13-54 # 「濱ノ瀬」と「浜ノ瀬」の表記ゆれへの対応 和歌山県日高郡美浜町大字濱ノ瀬356番3,和歌山県,日高郡美浜町,大字濱ノ瀬,356番3 和歌山県日高郡美浜町大字浜ノ瀬356番3,和歌山県,日高郡美浜町,大字濱ノ瀬,356番3 From edaae31b990d110f2335407259c5bbce766d617f Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Mon, 16 Sep 2024 14:23:04 +0900 Subject: [PATCH 8/8] =?UTF-8?q?update:=20#419:=20=E7=94=BA=E5=90=8D?= =?UTF-8?q?=E3=81=AE=E7=89=B9=E5=AE=9A=E6=99=82=E3=81=AB=E3=80=8C=E7=B1=A0?= =?UTF-8?q?=E3=80=8D=E3=81=A8=E3=80=8C=E7=AF=AD=E3=80=8D=E3=81=AE=E8=A1=A8?= =?UTF-8?q?=E8=A8=98=E3=82=86=E3=82=8C=E3=82=92=E8=80=83=E6=85=AE=E3=81=99?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=BE=E3=81=97?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/parser/adapter/orthographical_variant_adapter.rs | 2 ++ core/src/tokenizer/read_town.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/core/src/parser/adapter/orthographical_variant_adapter.rs b/core/src/parser/adapter/orthographical_variant_adapter.rs index 5fbb7276..99333066 100644 --- a/core/src/parser/adapter/orthographical_variant_adapter.rs +++ b/core/src/parser/adapter/orthographical_variant_adapter.rs @@ -27,6 +27,7 @@ pub trait OrthographicalVariants { const 蛍: Variant; const 與: Variant; const 瀧: Variant; + const 籠: Variant; const 濱: Variant; } @@ -55,6 +56,7 @@ impl OrthographicalVariants for Variant { const 蛍: Variant = &["蛍", "螢"]; const 與: Variant = &["與", "与"]; const 瀧: Variant = &["瀧", "滝"]; + const 籠: Variant = &["籠", "篭"]; const 濱: Variant = &["濱", "浜"]; } diff --git a/core/src/tokenizer/read_town.rs b/core/src/tokenizer/read_town.rs index d0ce00e2..4e8bb7df 100644 --- a/core/src/tokenizer/read_town.rs +++ b/core/src/tokenizer/read_town.rs @@ -109,6 +109,7 @@ fn find_town(input: &str, candidates: &Vec) -> Option<(String, String)> Variant::蛍, Variant::與, Variant::瀧, + Variant::籠, Variant::濱, ], };