Skip to content

Commit

Permalink
Merge pull request #527 from YuukiToriyama/release/v0.1.24
Browse files Browse the repository at this point in the history
release/v0.1.24をmainブランチにマージ
  • Loading branch information
YuukiToriyama authored Nov 10, 2024
2 parents a4cf136 + c9dc0ea commit 47377a2
Show file tree
Hide file tree
Showing 16 changed files with 314 additions and 127 deletions.
7 changes: 0 additions & 7 deletions .github/workflows/code-quality-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,3 @@ jobs:
reporter: 'github-pr-review'
filter_mode: 'nofilter'
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: Run benchmark
uses: boa-dev/criterion-compare-action@v3
with:
token: ${{ secrets.GITHUB_TOKEN }}
branchName: ${{ github.base_ref }}
cwd: 'core'
benchName: 'core_benchmark'
13 changes: 13 additions & 0 deletions .github/workflows/run-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,16 @@ jobs:
- name: Build check for wasm crate
working-directory: wasm
run: wasm-pack build --target web --scope toriyama

msrv:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install minimum supported version
uses: dtolnay/rust-toolchain@master
with:
toolchain: 1.75.0
- name: Basic build
run: cargo build --verbose
- name: Build docs
run: cargo doc --verbose
4 changes: 3 additions & 1 deletion .github/workflows/upload-npmjs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ jobs:
publish:
runs-on: ubuntu-latest
environment: npmjs
permissions:
id-token: write
defaults:
run:
working-directory: wasm
Expand Down Expand Up @@ -48,6 +50,6 @@ jobs:
- name: Upload wasm to npmjs.com
run: |
cd pkg
npm publish --access public
npm publish --provenance --access public
env:
NODE_AUTH_TOKEN: ${{ secrets.NPMJS_REGISTRY_TOKEN }}
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.1.23"
version = "0.1.24"
edition = "2021"
description = "A Rust Library to parse japanese addresses."
repository = "https://github.com/YuukiToriyama/japanese-address-parser"
Expand Down
17 changes: 2 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![Docs](https://docs.rs/japanese-address-parser/badge.svg)](https://docs.rs/japanese-address-parser)
[![Crates.io (latest)](https://img.shields.io/crates/v/japanese-address-parser)](https://crates.io/crates/japanese-address-parser)
![Rust Version](https://img.shields.io/badge/rust%20version-%3E%3D1.73.0-orange)
![Rust Version](https://img.shields.io/badge/rust%20version-%3E%3D1.75.0-orange)
[![Unit test & Integration test](https://github.com/YuukiToriyama/japanese-address-parser/actions/workflows/run-test.yaml/badge.svg?branch=main)](https://github.com/YuukiToriyama/japanese-address-parser/actions/workflows/run-test.yaml)

A Rust library for parsing Japanese addresses.
Expand Down Expand Up @@ -48,20 +48,7 @@ fn main() {
This crate is designed to be buildable for `wasm32-unknown-unknown` with `wasm-pack`.
Pre-compiled wasm module is available on npmjs.com

```bash
npm install @toriyama/japanese-address-parser
```

```javascript
import init, {Parser} from "@toriyama/japanese-address-parser"

init().then(() => {
const parser = new Parser()
parser.parse("東京都千代田区丸の内1-1-1").then(parseResult => {
console.log(JSON.stringify(parseResult, null, "\t"))
})
})
```
You can run this crate on your browser. For more details, see [wasm module's README](wasm/README.md).

## Python support(experimental)

Expand Down
9 changes: 2 additions & 7 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ license.workspace = true
readme = "../README.md"
keywords.workspace = true
categories.workspace = true
rust-version = "1.73.0"
rust-version = "1.75.0"

[lib]
crate-type = ["rlib", "cdylib"]
bench = false

[features]
default = ["city-name-correction"]
Expand All @@ -23,12 +22,8 @@ format-house-number = []
eliminate-whitespaces = []
experimental = []

[[bench]]
name = "core_benchmark"
harness = false

[dependencies]
itertools = "0.13.0"
itertools = "0.13.0" # 互換性のために残してあるが、`core::parser::adapter`を削除する際に忘れずに削除する
log.workspace = true
rapidfuzz = "0.5.0"
regex = { version = "1.10.6", default-features = false, features = ["std", "unicode-perl"] }
Expand Down
7 changes: 0 additions & 7 deletions core/benches/core_benchmark.rs

This file was deleted.

48 changes: 0 additions & 48 deletions core/benches/orthographical_variant_adapter.rs

This file was deleted.

1 change: 1 addition & 0 deletions core/src/adapter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod orthographical_variant_adapter;
160 changes: 160 additions & 0 deletions core/src/adapter/orthographical_variant_adapter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#[derive(Clone)]
pub enum OrthographicalVariant {
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
,
}

impl OrthographicalVariant {
fn value(&self) -> &[char] {
match self {
OrthographicalVariant::の => &['の', 'ノ', '之'],
OrthographicalVariant::ツ => &['ツ', 'ッ'],
OrthographicalVariant::ケ => &['ケ', 'ヶ', 'が', 'ガ'],
OrthographicalVariant::薮 => &['薮', '藪', '籔'],
OrthographicalVariant::崎 => &['崎', '﨑'],
OrthographicalVariant::檜 => &['桧', '檜'],
OrthographicalVariant::龍 => &['龍', '竜'],
OrthographicalVariant::竈 => &['竈', '竃', '釜'],
OrthographicalVariant::嶋 => &['嶋', '島'],
OrthographicalVariant::舘 => &['舘', '館'],
OrthographicalVariant::脊 => &['脊', '背'],
OrthographicalVariant::渕 => &['渕', '淵'],
OrthographicalVariant::己 => &['己', '巳'],
OrthographicalVariant::槇 => &['槇', '槙'],
OrthographicalVariant::治 => &['治', '冶'],
OrthographicalVariant::佛 => &['佛', '仏'],
OrthographicalVariant::澤 => &['澤', '沢'],
OrthographicalVariant::塚 => &['塚', '塚'],
OrthographicalVariant::恵 => &['恵', '惠'],
OrthographicalVariant::穂 => &['穂', '穗'],
OrthographicalVariant::梼 => &['梼', '檮'],
OrthographicalVariant::蛍 => &['蛍', '螢'],
OrthographicalVariant::與 => &['與', '与'],
OrthographicalVariant::瀧 => &['瀧', '滝'],
OrthographicalVariant::籠 => &['籠', '篭'],
OrthographicalVariant::濱 => &['濱', '浜'],
OrthographicalVariant::祗 => &['祗', '祇'],
OrthographicalVariant::曾 => &['曾', '曽'],
}
}

fn permutations(&self) -> Vec<(char, char)> {
let characters = self.value();
let mut permutations: Vec<(char, char)> = vec![];
for n in 0..characters.len() {
for m in 0..characters.len() {
if n != m {
permutations.push((characters[n], characters[m]));
}
}
}
permutations
}
}

pub struct OrthographicalVariantAdapter {
pub variant_list: Vec<OrthographicalVariant>,
}

impl OrthographicalVariantAdapter {
pub fn apply(self, input: &str, region_name: &str) -> Option<(String, String)> {
// 必要なパターンのみを選別する
let variant_list: Vec<&OrthographicalVariant> = self
.variant_list
.iter()
.filter(|v| v.value().iter().any(|&c| input.contains(c)))
.collect();
if variant_list.is_empty() {
return None;
}

// マッチ候補を容れておくためのVector
let mut candidates: Vec<String> = vec![region_name.to_string()];
// パターンを一つづつ検証していく
for variant in variant_list {
let mut semi_candidates: Vec<String> = vec![];
// variantから順列を作成
// ["ケ", "ヶ", "が"] -> (ケ, ヶ), (ケ, が), (ヶ, ケ), (ヶ, が), (が, ケ), (が, ヶ)
for (a, b) in variant.permutations() {
for candidate in candidates.iter().filter(|x| x.contains(a)) {
let modified_candidate = modify_specific_character(candidate, a, b);
if input.starts_with(&modified_candidate) {
// マッチすれば早期リターン
return Some((
region_name.to_string(),
input
.chars()
.skip(modified_candidate.chars().count())
.collect(),
));
} else {
// マッチしなければsemi_candidatesに置き換え後の文字列をpush
semi_candidates.push(modified_candidate);
}
}
}
candidates = semi_candidates;
candidates.push(region_name.to_string());
}
None
}
}

fn modify_specific_character(text: &str, from: char, to: char) -> String {
text.chars()
.map(|x| if x == from { to } else { x })
.collect()
}

#[cfg(test)]
mod tests {
use crate::adapter::orthographical_variant_adapter::OrthographicalVariant;

#[test]
fn permutations() {
let variant = OrthographicalVariant::;
assert_eq!(
variant.permutations(),
vec![
('ケ', 'ヶ'),
('ケ', 'が'),
('ケ', 'ガ'),
('ヶ', 'ケ'),
('ヶ', 'が'),
('ヶ', 'ガ'),
('が', 'ケ'),
('が', 'ヶ'),
('が', 'ガ'),
('ガ', 'ケ'),
('ガ', 'ヶ'),
('ガ', 'が'),
]
);
}
}
1 change: 1 addition & 0 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ compile_error! {
"The `blocking` feature is not supported with wasm target."
}

mod adapter;
#[deprecated(since = "0.1.23", note = "This module will be deleted in v0.2")]
pub mod api;
pub(crate) mod domain;
Expand Down
1 change: 1 addition & 0 deletions core/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::interactor::geolonia::{GeoloniaInteractor, GeoloniaInteractorImpl};
use crate::tokenizer::{End, Tokenizer};
use serde::Serialize;

#[deprecated(since = "0.1.24", note = "This module will be deleted in v0.2")]
pub mod adapter;

impl From<Tokenizer<End>> for Address {
Expand Down
24 changes: 12 additions & 12 deletions core/src/tokenizer/read_city.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::domain::common::token::{append_token, Token};
use crate::parser::adapter::orthographical_variant_adapter::{
OrthographicalVariantAdapter, OrthographicalVariants, Variant,
use crate::adapter::orthographical_variant_adapter::{
OrthographicalVariant, OrthographicalVariantAdapter,
};
use crate::domain::common::token::{append_token, Token};
use crate::tokenizer::{CityNameFound, CityNameNotFound, PrefectureNameFound, Tokenizer};
use std::marker::PhantomData;

Expand Down Expand Up @@ -29,29 +29,29 @@ impl Tokenizer<PrefectureNameFound> {
}

// ここまでで市区町村名が読み取れない場合は、表記ゆれを含む可能性を検討する
let mut variant_list = vec![Variant::];
let mut variant_list = vec![OrthographicalVariant::];
match self.get_prefecture_name() {
Some("青森県") => {
variant_list.push(Variant::);
variant_list.push(OrthographicalVariant::);
}
Some("宮城県") => {
variant_list.push(Variant::);
variant_list.push(OrthographicalVariant::);
}
Some("茨城県") => {
variant_list.push(Variant::);
variant_list.push(Variant::);
variant_list.push(OrthographicalVariant::);
variant_list.push(OrthographicalVariant::);
}
Some("東京都") => {
variant_list.push(Variant::);
variant_list.push(OrthographicalVariant::);
}
Some("兵庫県") => {
variant_list.push(Variant::);
variant_list.push(OrthographicalVariant::);
}
Some("高知県") => {
variant_list.push(Variant::);
variant_list.push(OrthographicalVariant::);
}
Some("福岡県") => {
variant_list.push(Variant::);
variant_list.push(OrthographicalVariant::);
}
_ => {}
}
Expand Down
Loading

0 comments on commit 47377a2

Please sign in to comment.