Skip to content

Commit

Permalink
1.1.0:兼容模式·测试优化·功能分离
Browse files Browse the repository at this point in the history
✅解析器对象「获取目标类型」函数支持
✅解析器自动识别「词项/语句」(从数据到目标的「目标类型」参数不再必选)
✅分离XML、JSON、序列化三种解析器,使包依赖轻量化
✅优化测试集:重用代码&宏展示优化
  • Loading branch information
ARCJ137442 committed Aug 8, 2023
1 parent cd74fd8 commit a6628f5
Show file tree
Hide file tree
Showing 15 changed files with 242 additions and 1,095 deletions.
71 changes: 1 addition & 70 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,78 +2,9 @@

julia_version = "1.9.1"
manifest_format = "2.0"
project_hash = "b618f0bb9b0aef939358c434d47108d24dffce9d"

[[deps.Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"

[[deps.JSON]]
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a"
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
version = "0.21.4"

[[deps.Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"

[[deps.OrderedCollections]]
git-tree-sha1 = "2e73fe17cac3c62ad1aebe70d44c963c3cfdc3e3"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.6.2"

[[deps.Parsers]]
deps = ["Dates", "PrecompileTools", "UUIDs"]
git-tree-sha1 = "4b2e829ee66d4218e0cef22c0a64ee37cf258c29"
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
version = "2.7.1"

[[deps.PrecompileTools]]
deps = ["Preferences"]
git-tree-sha1 = "9673d39decc5feece56ef3940e5dafba15ba0f81"
uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
version = "1.1.2"

[[deps.Preferences]]
deps = ["TOML"]
git-tree-sha1 = "7eb1686b4f04b82f96ed7a4ea5890a4f0c7a09f1"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.4.0"

[[deps.Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"

[[deps.Random]]
deps = ["SHA", "Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
project_hash = "9e8639383807ffd6257aa516ae29f54e67bf56f3"

[[deps.Reexport]]
git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "1.2.2"

[[deps.SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0"

[[deps.Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"

[[deps.TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
version = "1.0.3"

[[deps.UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[deps.Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[[deps.XML]]
deps = ["Mmap", "OrderedCollections"]
git-tree-sha1 = "cbf82009944525df5b6407bff97ba554b85f20fe"
uuid = "72c71f33-b9b6-44de-8c94-c961784809e2"
version = "0.3.0"
5 changes: 1 addition & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
name = "JuNarsese"
uuid = "11330a76-bea1-45e0-8f80-7114e2f607b1"
authors = ["ARCJ137442 <[email protected]>"]
version = "1.0.0"
version = "1.1.0"

[deps]
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
XML = "72c71f33-b9b6-44de-8c94-c961784809e2"
5 changes: 0 additions & 5 deletions src/Conversion.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,4 @@ include("Conversion/core/ast.jl")
# 附加
include("Conversion/extra/string_shortcut.jl") # 使用eval的字符串

# 外部文件格式 # TODO: 整体完成后分离独立成包,以便让整体支持轻量化
include("Conversion/extra/serialization.jl") # 序列化支持
include("Conversion/extra/json.jl")
include("Conversion/extra/xml.jl")

end
15 changes: 9 additions & 6 deletions src/Conversion/core/ast.jl
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,6 @@ abstract type ASTParser <: AbstractParser end
"类型の短别名"
const TAParser::Type = Type{<:ASTParser}

"Julia的Expr对象"
Base.eltype(::TAParser) = Expr

"""
声明「原生类型」
- 解析器直接返回自身
Expand All @@ -177,7 +174,7 @@ const AST_PRESERVED_TYPES::Type = Union{
}

"""
声明「结构类型
声明「目标类型
- 能被解析器支持解析
"""
const AST_PARSE_TARGETS::Type = DEFAULT_PARSE_TARGETS
Expand All @@ -189,6 +186,12 @@ const AST_PARSE_TARGETS::Type = DEFAULT_PARSE_TARGETS
"""
const AST_PRESERVED_HEAD::Symbol = Symbol(":preserved:")

"目标类型:词项/语句"
parse_target_types(::TAParser) = STRING_PARSE_TARGETS

"数据类型:Julia的Expr对象"
Base.eltype(::TAParser)::Type = Expr

# 【特殊链接】词项↔字符串 #

"重载Expr的构造方法"
Expand Down Expand Up @@ -450,9 +453,9 @@ begin "解析器入口"
- 封装性:只能调用它解析Narsese词项/语句
"""
function data2narsese(
parser::TAParser, ::Type{T},
parser::TAParser, ::Type, # 【20230808 10:33:39】因「兼容模式」不限制此处Type
ex::Expr
)::T where {T <: AST_PARSE_TARGETS}
)::AST_PARSE_TARGETS
return ast_parse(
parser, ex,
Narsese.eval, # 使用Narsese模块作解析の上下文
Expand Down
57 changes: 42 additions & 15 deletions src/Conversion/core/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ end
(默认)实例化,并作为一个「转换器」导出
- 来源:文档 `NARS ASCII Input.pdf`
"""
StringParser_ascii::StringParser = StringParser(
const StringParser_ascii::StringParser = StringParser(
Dict( # 原子前缀
Word => "", # 置空
IVar => "\$",
Expand Down Expand Up @@ -242,7 +242,7 @@ StringParser_ascii::StringParser = StringParser(
(LaTeX扩展)实例化,并作为一个「转换器」导出
- 来源:文档 `NARS ASCII Input.pdf`
"""
StringParser_latex::StringParser = StringParser(
const StringParser_latex::StringParser = StringParser(
Dict( # 原子前缀
Word => "", # 置空
IVar => "\$",
Expand Down Expand Up @@ -315,8 +315,17 @@ StringParser_latex::StringParser = StringParser(



"普通字符串"
Base.eltype(::StringParser) = String
"""
定义「字符串转换」的「目标类型」
- String↔词项/语句
"""
const STRING_PARSE_TARGETS::Type = DEFAULT_PARSE_TARGETS

"目标类型:词项/语句"
parse_target_types(::StringParser) = STRING_PARSE_TARGETS

"数据类型:普通字符串"
Base.eltype(::StringParser)::Type = String

## 已在template.jl导入
# using ..Util
Expand Down Expand Up @@ -471,7 +480,7 @@ end
- `[A,B]`/`{A,B}`: 词项集
- `?A`: 原子词项
"""
function data2narsese(parser::StringParser, ::Type{Term}, s::String)
function data2narsese(parser::StringParser, ::TYPE_TERMS, s::String)
# 预处理覆盖局部变量
s::String = parser.preprocess(s)

Expand Down Expand Up @@ -898,8 +907,8 @@ begin "语句相关"
function data2narsese(
parser::StringParser, ::Type{Punctuation},
s::String,
default = Judgement,
)
default::Type = Judgement, # 📌【20230808 9:46:21】此处不能用Type{P}限制,会导致类型变量连锁,类型转换失败
)::Type{ <: Union{Punctuation, Nothing}}
get(parser.punctuation2type, s, default)
end

Expand Down Expand Up @@ -965,6 +974,9 @@ begin "语句相关"

"""
总解析方法 : 词项+标点+时态+真值
- 【20230808 9:34:22】兼容模式:词项语句均可
- 在「目标类型」中统一使用Any以避免「各自目标类型不同」的歧义
- 「真值」「时态」「标点」俱无⇒转换为词项
默认真值 default_truth
- 核心功能:在没有真值时,自动创建真值
Expand All @@ -976,16 +988,17 @@ begin "语句相关"
- (预处理去空格后)`<A-->B>.:|:%1.00;0.90%`
"""
function data2narsese(
parser::StringParser, ::Type{Sentence},
parser::StringParser,
::Type{Any}, # 兼容模式
s::String,
F::Type=Float16, C::Type=Float16;
default_truth::Truth = Truth16(1.0, 0.5), # 动态创建
default_punctuation::Type = Judgement
)
default_punctuation::Type = Judgement # 默认类型
)::STRING_PARSE_TARGETS
# 预处理覆盖局部变量
str::String = parser.preprocess(s)
# 从尾部到头部,逐一解析「真值→时态→标点→词项」
index::Integer = lastindex(str)
index_start::Integer = lastindex(str)

truth::Truth, index = _match_truth(parser, str, F, C; default_truth)
str = str[begin:index] # 反复剪裁
Expand All @@ -997,17 +1010,31 @@ begin "语句相关"
str = str[begin:index] # 反复剪裁

term::Term = data2narsese(parser, Term, str) # 剩下就是词项

# 「真值」「时态」「标点」俱无⇒转换为词项
index == index_start && return term

# 构造
return Sentence{punctuation}(term, truth, tense)
end

"""
重定向默认值处理: AbstractSentence => Sentence
兼容化后的「语句转换方法」:兼容+类型断言
"""
function data2narsese(
parser::StringParser, ::Type{AbstractSentence},
args...; kwargs...)
data2narsese(parser, Sentence, args...; kwargs...)
parser::StringParser, ::TYPE_SENTENCES,
s::String,
F::Type=Float16, C::Type=Float16;
default_truth::Truth = Truth16(1.0, 0.5), # 动态创建
default_punctuation::Type = Nothing # 默认类型
)::AbstractSentence # 使用类型断言限制
data2narsese(
parser, Any, # Any对接兼容模式
s,
F, C;
default_truth,
default_punctuation,
)
end

"""
Expand Down
115 changes: 0 additions & 115 deletions src/Conversion/extra/json.jl

This file was deleted.

Loading

0 comments on commit a6628f5

Please sign in to comment.