Skip to content

Commit

Permalink
Merge pull request #367 from YuukiToriyama/release/v0.1.8
Browse files Browse the repository at this point in the history
release/v0.1.8をmainブランチにマージ
  • Loading branch information
YuukiToriyama authored Aug 12, 2024
2 parents 47b0c38 + 330fa36 commit 0cffa39
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 44 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.1.7"
version = "0.1.8"
edition = "2021"
description = "A Rust Library to parse japanese addresses."
repository = "https://github.com/YuukiToriyama/japanese-address-parser"
Expand Down
1 change: 0 additions & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ blocking = ["reqwest/blocking"]
[dependencies]
itertools = "0.13.0"
js-sys = "0.3.67"
nom = "7.1.3"
rapidfuzz = "0.5.0"
regex = "1.10.2"
serde.workspace = true
Expand Down
17 changes: 10 additions & 7 deletions core/src/parser/adapter/orthographical_variant_adapter.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
use itertools::Itertools;
use nom::bytes::complete::tag;
use nom::error::VerboseError;
use nom::Parser;

pub type Variant = &'static [&'static str];

Expand Down Expand Up @@ -29,6 +26,7 @@ pub trait OrthographicalVariants {
const: Variant;
const: Variant;
const: Variant;
const: Variant;
}

impl OrthographicalVariants for Variant {
Expand All @@ -55,6 +53,7 @@ impl OrthographicalVariants for Variant {
const: Variant = &["梼", "檮"];
const: Variant = &["葛󠄀", "葛"];
const: Variant = &["蛍", "螢"];
const: Variant = &["瀧", "滝"];
}

pub struct OrthographicalVariantAdapter {
Expand Down Expand Up @@ -86,11 +85,15 @@ impl OrthographicalVariantAdapter {
// マッチ候補の中でパターンに引っかかるものがあれば文字を置き換えてマッチを試す
if candidate.contains(permutation[0]) {
let edited_region_name = candidate.replace(permutation[0], permutation[1]);
if let Ok((rest, _)) =
tag::<&str, &str, VerboseError<&str>>(&edited_region_name).parse(input)
{
if input.starts_with(&edited_region_name) {
// マッチすれば早期リターン
return Some((rest.to_string(), region_name.to_string()));
return Some((
region_name.to_string(),
input
.chars()
.skip(edited_region_name.chars().count())
.collect(),
));
} else {
// マッチしなければsemi_candidatesに置き換え後の文字列をpush
semi_candidates.push(edited_region_name.clone());
Expand Down
33 changes: 12 additions & 21 deletions core/src/parser/adapter/vague_expression_adapter.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
use crate::util::sequence_matcher::SequenceMatcher;
use nom::bytes::complete::{is_a, is_not};
use nom::combinator::rest;
use nom::error::Error;
use nom::sequence::tuple;

pub struct VagueExpressionAdapter;

Expand All @@ -11,13 +7,8 @@ impl VagueExpressionAdapter {
if let Ok(highest_match) =
SequenceMatcher::get_most_similar_match(input, region_name_list, None)
{
let mut parser = tuple((
is_not::<&str, &str, Error<&str>>("町村"),
is_a::<&str, &str, Error<&str>>("町村"),
rest,
));
if let Ok((_, (_, _, rest))) = parser(input) {
return Some((rest.to_string(), highest_match));
if let Some(position) = input.chars().position(|c| c == '町' || c == '村') {
return Some((highest_match, input.chars().skip(position + 1).collect()));
}
}
None
Expand All @@ -32,50 +23,50 @@ mod tests {
#[test]
fn 郡名が省略されている場合_吉田郡永平寺町() {
let fukui = Prefecture::fukui();
let (rest, city_name) = VagueExpressionAdapter {}
let (city_name, rest) = VagueExpressionAdapter {}
.apply("永平寺町志比5-5", &fukui.cities)
.unwrap();
assert_eq!(rest, "志比5-5");
assert_eq!(city_name, "吉田郡永平寺町");
assert_eq!(rest, "志比5-5");
}

#[test]
fn 郡名が省略されている場合_今立郡池田町() {
let fukui = Prefecture::fukui();
let (rest, city_name) = VagueExpressionAdapter {}
let (city_name, rest) = VagueExpressionAdapter {}
.apply("池田町稲荷28-7", &fukui.cities)
.unwrap();
assert_eq!(rest, "稲荷28-7");
assert_eq!(city_name, "今立郡池田町");
assert_eq!(rest, "稲荷28-7");
}

#[test]
fn 郡名が省略されている場合_南条郡南越前町() {
let fukui = Prefecture::fukui();
let (rest, city_name) = VagueExpressionAdapter {}
let (city_name, rest) = VagueExpressionAdapter {}
.apply("南越前町今庄74-7-1", &fukui.cities)
.unwrap();
assert_eq!(rest, "今庄74-7-1");
assert_eq!(city_name, "南条郡南越前町");
assert_eq!(rest, "今庄74-7-1");
}

#[test]
fn 郡名が省略されている場合_西村山郡河北町() {
let yamagata = Prefecture::yamagata();
let (rest, city_name) = VagueExpressionAdapter {}
let (city_name, rest) = VagueExpressionAdapter {}
.apply("河北町大字吉田字馬場261", &yamagata.cities)
.unwrap();
assert_eq!(rest, "大字吉田字馬場261");
assert_eq!(city_name, "西村山郡河北町");
assert_eq!(rest, "大字吉田字馬場261");
}

#[test]
fn 郡名と町名が一致している場合_最上郡最上町() {
let yamagata = Prefecture::yamagata();
let (rest, city_name) = VagueExpressionAdapter {}
let (city_name, rest) = VagueExpressionAdapter {}
.apply("最上町法田2672-2", &yamagata.cities)
.unwrap();
assert_eq!(rest, "法田2672-2");
assert_eq!(city_name, "最上郡最上町");
assert_eq!(rest, "法田2672-2");
}
}
8 changes: 4 additions & 4 deletions core/src/tokenizer/read_city.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ impl Tokenizer<PrefectureNameFound> {
return Ok(Tokenizer {
input: self.input.clone(),
prefecture_name: self.prefecture_name.clone(),
city_name: Some(result.1),
city_name: Some(result.0),
town_name: None,
rest: result.0,
rest: result.1,
_state: PhantomData::<CityNameFound>,
});
}
Expand All @@ -75,9 +75,9 @@ impl Tokenizer<PrefectureNameFound> {
return Ok(Tokenizer {
input: self.input.clone(),
prefecture_name: self.prefecture_name.clone(),
city_name: Some(result.1),
city_name: Some(result.0),
town_name: None,
rest: result.0,
rest: result.1,
_state: PhantomData::<CityNameFound>,
});
}
Expand Down
21 changes: 11 additions & 10 deletions core/src/tokenizer/read_town.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,36 @@ impl Tokenizer<CityNameFound> {
if rest.contains("丁目") {
rest = NonKanjiBlockNumberFilter {}.apply(rest);
}
if let Some(result) = find_town(&rest, &candidates) {
if let Some((town_name, rest)) = find_town(&rest, &candidates) {
return Ok(Tokenizer {
input: self.input.clone(),
prefecture_name: self.prefecture_name.clone(),
city_name: self.city_name.clone(),
town_name: Some(result.1),
rest: result.0,
town_name: Some(town_name),
rest,
_state: PhantomData::<TownNameFound>,
});
}
// 「〇〇町L丁目M番N」ではなく「〇〇町L-M-N」と表記されているような場合
rest = InvalidTownNameFormatFilter {}.apply(rest);
if let Some(result) = find_town(&rest, &candidates) {
if let Some((town_name, rest)) = find_town(&rest, &candidates) {
return Ok(Tokenizer {
input: self.input.clone(),
prefecture_name: self.prefecture_name.clone(),
city_name: self.city_name.clone(),
town_name: Some(result.1),
rest: result.0,
town_name: Some(town_name),
rest,
_state: PhantomData::<TownNameFound>,
});
}
// ここまでで町名の検出に成功しない場合は、「大字」の省略の可能性を検討する
if let Some(result) = find_town(&format!("大字{}", rest), &candidates) {
if let Some((town_name, rest)) = find_town(&format!("大字{}", rest), &candidates) {
return Ok(Tokenizer {
input: self.input.clone(),
prefecture_name: self.prefecture_name.clone(),
city_name: self.city_name.clone(),
town_name: Some(result.1),
rest: result.0,
town_name: Some(town_name),
rest,
_state: PhantomData::<TownNameFound>,
});
}
Expand All @@ -66,11 +66,11 @@ fn find_town(input: &str, candidates: &Vec<String>) -> Option<(String, String)>
for candidate in candidates {
if input.starts_with(candidate) {
return Some((
candidate.to_string(),
input
.chars()
.skip(candidate.chars().count())
.collect::<String>(),
candidate.to_string(),
));
}
let adapter = OrthographicalVariantAdapter {
Expand All @@ -93,6 +93,7 @@ fn find_town(input: &str, candidates: &Vec<String>) -> Option<(String, String)>
Variant::,
Variant::,
Variant::,
Variant::,
],
};
if let Some(result) = adapter.apply(input, candidate) {
Expand Down
3 changes: 3 additions & 0 deletions tests/test_data/異字体旧字体への対応.csv
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,6 @@ address,prefecture,city,town,rest
# 「螢池」と「蛍池」の表記ゆれへの対応
大阪府豊中市螢池東町一丁目5番1号,大阪府,豊中市,螢池東町一丁目,5番1号
大阪府豊中市蛍池東町一丁目5番1号,大阪府,豊中市,螢池東町一丁目,5番1号
# 「瀧本」と「滝本」の表記ゆれへの対応
和歌山県新宮市熊野川町滝本417-1,和歌山県,新宮市,熊野川町瀧本,417-1
和歌山県新宮市熊野川町瀧本417-1,和歌山県,新宮市,熊野川町瀧本,417-1

0 comments on commit 0cffa39

Please sign in to comment.