Skip to content

Commit

Permalink
Merge pull request #394 from YuukiToriyama/release/v0.1.12
Browse files Browse the repository at this point in the history
release/v0.1.12をmainブランチにマージ
  • Loading branch information
YuukiToriyama authored Aug 30, 2024
2 parents 1ec4eef + 2542f57 commit 4c7ae80
Show file tree
Hide file tree
Showing 14 changed files with 275 additions and 331 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.1.11"
version = "0.1.12"
edition = "2021"
description = "A Rust Library to parse japanese addresses."
repository = "https://github.com/YuukiToriyama/japanese-address-parser"
Expand Down
2 changes: 0 additions & 2 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ rapidfuzz = "0.5.0"
regex = "1.10.2"
serde.workspace = true
reqwest = { version = "0.12.5", default-features = false, features = ["json", "rustls-tls"] }

[target.'cfg(target_arch = "wasm32")'.dependencies]
js-sys = "0.3.67"

[dev-dependencies]
Expand Down
3 changes: 3 additions & 0 deletions core/src/formatter.rs
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
pub(crate) mod chome_with_arabic_numerals;
pub(crate) mod fullwidth_character;
pub(crate) mod house_number;
pub(crate) mod informal_town_name_notation;
76 changes: 76 additions & 0 deletions core/src/formatter/chome_with_arabic_numerals.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use crate::util::converter::JapaneseNumber;

pub(crate) fn format_chome_with_arabic_numerals(target: &str) -> Option<String> {
let chome = if cfg!(target_arch = "wasm32") {
js_sys::RegExp::new(r"\D+(\d+)丁目", "")
.exec(target)?
.get(1)
.as_string()?
} else {
regex::Regex::new(r"\D+(?<chome>\d+)丁目")
.unwrap()
.captures(target)?
.name("chome")?
.as_str()
.to_string()
};
let chome_int = chome.parse::<i8>().ok()?;
Some(target.replacen(&chome, chome_int.to_japanese_form()?.as_str(), 1))
}

#[cfg(all(test, not(target_arch = "wasm32")))]
mod tests {
use crate::formatter::chome_with_arabic_numerals::format_chome_with_arabic_numerals;

#[test]
fn 丁目を検出できない場合() {
assert_eq!(format_chome_with_arabic_numerals("a丁目"), None);
}

#[test]
fn 丁目をi8に変換できない場合() {
assert_eq!(
format_chome_with_arabic_numerals("銀座127丁目"),
Some("銀座百二十七丁目".to_string())
);
assert_eq!(format_chome_with_arabic_numerals("銀座128丁目"), None);
}

#[test]
fn 成功() {
assert_eq!(
format_chome_with_arabic_numerals("銀座1丁目"),
Some("銀座一丁目".to_string())
);
}
}

#[cfg(all(test, target_arch = "wasm32"))]
mod wasm_tests {
use crate::formatter::chome_with_arabic_numerals::format_chome_with_arabic_numerals;
use wasm_bindgen_test::{wasm_bindgen_test, wasm_bindgen_test_configure};

wasm_bindgen_test_configure!(run_in_browser);

#[wasm_bindgen_test]
fn chome_not_detected() {
assert_eq!(format_chome_with_arabic_numerals("a丁目"), None);
}

#[wasm_bindgen_test]
fn failed_to_convert_chome_into_i8() {
assert_eq!(
format_chome_with_arabic_numerals("銀座127丁目"),
Some("銀座百二十七丁目".to_string())
);
assert_eq!(format_chome_with_arabic_numerals("銀座128丁目"), None);
}

#[wasm_bindgen_test]
fn success() {
assert_eq!(
format_chome_with_arabic_numerals("銀座1丁目"),
Some("銀座一丁目".to_string())
);
}
}
30 changes: 30 additions & 0 deletions core/src/formatter/fullwidth_character.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/// 文字列中の全角数字を半角数字に修正します
pub(crate) fn format_fullwidth_number(target: &str) -> String {
target
.chars()
.map(|c| match c {
'0' => '0',
'1' => '1',
'2' => '2',
'3' => '3',
'4' => '4',
'5' => '5',
'6' => '6',
'7' => '7',
'8' => '8',
'9' => '9',
_ => c,
})
.collect()
}

#[cfg(test)]
mod tests {
use crate::formatter::fullwidth_character::format_fullwidth_number;

#[test]
fn 全角文字を含む() {
assert_eq!(format_fullwidth_number("京橋1丁目"), "京橋1丁目");
assert_eq!(format_fullwidth_number("京橋3丁目1の1"), "京橋3丁目1の1");
}
}
146 changes: 146 additions & 0 deletions core/src/formatter/informal_town_name_notation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
use crate::util::converter::JapaneseNumber;

/// 住居表示実施済みの住所でN丁目のNが算用数字の場合に漢数字に書き換えます
pub(crate) fn format_informal_town_name_notation(target: &str) -> Option<String> {
let (town_name, chome, rest) = if cfg!(target_arch = "wasm32") {
let captures = js_sys::RegExp::new(
r"^(\D+)(\d+)[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\u2212\u30FC\uFF0D\uFF70]*(.*)$",
"",
).exec(target)?;
(
captures.get(1).as_string()?,
captures.get(2).as_string()?.parse::<i8>().ok()?,
captures.get(3).as_string()?,
)
} else {
let captures = regex::Regex::new(
r"^(?<town_name>\D+)(?<chome>\d+)[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\u2212\u30FC\uFF0D\uFF70]*(?<rest>.*)$",
).unwrap().captures(target)?;
(
captures.name("town_name")?.as_str().to_string(),
captures.name("chome")?.as_str().parse::<i8>().ok()?,
captures.name("rest")?.as_str().to_string(),
)
};
// 帯広市西十九条四十二丁目の42が最大なので、43以上の値の場合はNoneを返すようにする
if chome > 42 {
return None;
}
Some(format!(
"{}{}丁目{}",
town_name,
chome.to_japanese_form()?,
rest
))
}

#[cfg(all(test, not(target_arch = "wasm32")))]
mod tests {
use crate::formatter::informal_town_name_notation::format_informal_town_name_notation;

#[test]
fn 町名が検出できない場合() {
assert_eq!(format_informal_town_name_notation("1-1-1"), None);
}

#[test]
fn 丁目が検出できない場合() {
assert_eq!(format_informal_town_name_notation("銀座"), None);
}

#[test]
fn 丁目が大きすぎる場合() {
assert_eq!(
format_informal_town_name_notation("西十九条42"),
Some("西十九条四十二丁目".to_string())
);
assert_eq!(format_informal_town_name_notation("西十九条43"), None);
}

#[test]
fn 町名以降がない場合() {
assert_eq!(
format_informal_town_name_notation("銀座1"),
Some("銀座一丁目".to_string())
);
}

#[test]
fn ハイフン以外の文字種が使われている場合() {
let test_cases = [
("有楽町1-1-1", "有楽町一丁目1-1"), // U+002D
("有楽町1‐1‐1", "有楽町一丁目1‐1"), // U+2010
("有楽町1‑1‑1", "有楽町一丁目1‑1"), // U+2011
("有楽町1‒1‒1", "有楽町一丁目1‒1"), // U+2012
("有楽町1–1–1", "有楽町一丁目1–1"), // U+2013
("有楽町1—1—1", "有楽町一丁目1—1"), // U+2014
("有楽町1―1―1", "有楽町一丁目1―1"), // U+2015
("有楽町1−1−1", "有楽町一丁目1−1"), // U+2212
("有楽町1ー1ー1", "有楽町一丁目1ー1"), // U+30FC
("有楽町1-1-1", "有楽町一丁目1-1"), // U+FF0D
("有楽町1ー1ー1", "有楽町一丁目1ー1"), // U+FF70
];
for (input, expected) in test_cases {
let result = format_informal_town_name_notation(input);
assert!(result.is_some());
assert_eq!(result.unwrap(), expected);
}
}
}

#[cfg(all(test, target_arch = "wasm32"))]
mod wasm_tests {
use crate::formatter::informal_town_name_notation::format_informal_town_name_notation;
use wasm_bindgen_test::{wasm_bindgen_test, wasm_bindgen_test_configure};

wasm_bindgen_test_configure!(run_in_browser);

#[wasm_bindgen_test]
fn town_name_not_detected() {
assert_eq!(format_informal_town_name_notation("1-1-1"), None);
}

#[wasm_bindgen_test]
fn chome_not_detected() {
assert_eq!(format_informal_town_name_notation("銀座"), None);
}

#[wasm_bindgen_test]
fn chome_is_too_large_number() {
assert_eq!(
format_informal_town_name_notation("西十九条42"),
Some("西十九条四十二丁目".to_string())
);
assert_eq!(format_informal_town_name_notation("西十九条43"), None);
}

#[wasm_bindgen_test]
fn rest_is_empty() {
assert_eq!(
format_informal_town_name_notation("銀座1"),
Some("銀座一丁目".to_string())
);
}

#[wasm_bindgen_test]
fn hyphen_like_characters_are_used() {
let test_cases = [
("有楽町1-1-1", "有楽町一丁目1-1"), // U+002D
("有楽町1‐1‐1", "有楽町一丁目1‐1"), // U+2010
("有楽町1‑1‑1", "有楽町一丁目1‑1"), // U+2011
("有楽町1‒1‒1", "有楽町一丁目1‒1"), // U+2012
("有楽町1–1–1", "有楽町一丁目1–1"), // U+2013
("有楽町1—1—1", "有楽町一丁目1—1"), // U+2014
("有楽町1―1―1", "有楽町一丁目1―1"), // U+2015
("有楽町1−1−1", "有楽町一丁目1−1"), // U+2212
("有楽町1ー1ー1", "有楽町一丁目1ー1"), // U+30FC
("有楽町1-1-1", "有楽町一丁目1-1"), // U+FF0D
("有楽町1ー1ー1", "有楽町一丁目1ー1"), // U+FF70
];
for (input, expected) in test_cases {
let result = format_informal_town_name_notation(input);
assert!(result.is_some());
assert_eq!(result.unwrap(), expected);
}
}
}
1 change: 0 additions & 1 deletion core/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use crate::tokenizer::Tokenizer;
use serde::Serialize;

pub(crate) mod adapter;
pub(crate) mod filter;

impl<T> From<Tokenizer<T>> for Address {
fn from(value: Tokenizer<T>) -> Self {
Expand Down
4 changes: 4 additions & 0 deletions core/src/parser/adapter/orthographical_variant_adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ pub trait OrthographicalVariants {
const: Variant;
const: Variant;
const: Variant;
const: Variant;
const: Variant;
const: Variant;
}

impl OrthographicalVariants for Variant {
Expand All @@ -53,7 +55,9 @@ impl OrthographicalVariants for Variant {
const: Variant = &["梼", "檮"];
const: Variant = &["葛󠄀", "葛"];
const: Variant = &["蛍", "螢"];
const: Variant = &["與", "与"];
const: Variant = &["瀧", "滝"];
const: Variant = &["濱", "浜"];
}

pub struct OrthographicalVariantAdapter {
Expand Down
7 changes: 0 additions & 7 deletions core/src/parser/filter.rs

This file was deleted.

26 changes: 0 additions & 26 deletions core/src/parser/filter/fullwidth_character.rs

This file was deleted.

Loading

0 comments on commit 4c7ae80

Please sign in to comment.