-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLibGrass.h
88 lines (64 loc) · 4.26 KB
/
LibGrass.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#pragma once
#ifdef _WIN32
#ifdef LIBGRASS_EXPORTS
#define LIBGRASS_API __declspec(dllexport)
#else
#define LIBGRASS_API __declspec(dllimport)
#endif
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
// Windows Header Files:
#include <windows.h>
#else
#ifdef LIBGRASS_EXPORTS
#define LIBGRASS_API __attribute__((visibility("default")))
#else
#define LIBGRASS_API
#endif
#define CP_UTF8 65001
#define CP_ACP 0
typedef wchar_t WCHAR;
#endif
#include <string>
#include <vector>
#include <sstream>
#include <common/parser/implementations/POSTagging/postag_run.h>
// segment
LIBGRASS_API void train_segmentor(const std::string & train_file, const std::string & feature_file, const std::string & dict_file, int times, int encoding = CP_UTF8);
LIBGRASS_API void train_segmentor_ctx(const std::string & train_file, const std::string & feature_file, const std::string & dict_file, int times, int encoding);
LIBGRASS_API void create_segmentor(const std::string & feature_file = "segfeat_PKU", const std::string & dict_file = "dic_PKU");
LIBGRASS_API int create_segmentor_ctx(const std::string & feature_file, const std::string & dict_file);
LIBGRASS_API void delete_segmentor();
extern "C" LIBGRASS_API void delete_segmentor_ctx(int idx);
LIBGRASS_API void seg_file(const std::string & input_file, const std::string & output_file, int encoding = CP_UTF8);
LIBGRASS_API void seg_file_with_ctx(int idx, const std::string &input_file, const std::string &output_file,
int encoding = CP_UTF8);
LIBGRASS_API std::vector<std::string> seg_string(const std::string & input, int encoding = CP_UTF8);
LIBGRASS_API std::vector<std::string> seg_string_with_ctx(int idx, const std::string & input, int encoding);
// postagging
LIBGRASS_API void train_postagger(const std::string & train_file, const std::string & feature_file, int times, int encoding = CP_UTF8);
LIBGRASS_API int train_postagger_ctx(const std::string & train_file, const std::string & feature_file, int times, int encoding);
extern "C" int train_postagger_ctx(char* train_file, char* feature_file, int times, int encoding);
LIBGRASS_API void create_postagger(const std::string & feature_file = "posfeat_PKU");
LIBGRASS_API int create_postagger_ctx(const std::string & feature_file);
extern "C" int create_postagger_ctx(char* feature_file);
LIBGRASS_API void delete_postagger();
extern "C" LIBGRASS_API void delete_postagger_ctx(int idx);
LIBGRASS_API void tag_file(const std::string & input_file, const std::string & output_file, int encoding = CP_UTF8);
LIBGRASS_API void tag_file_with_ctx(int idx, const std::string & input_file, const std::string & output_file, int encoding);
extern "C" void tag_file_with_ctx(int idx, char* input_file, char* output_file, int encoding);
LIBGRASS_API std::vector<std::pair<std::string, std::string>> tag_sentence(const std::vector<std::string> & input, int encoding = CP_UTF8);
extern "C" POSTagging::TaggingResult tag_sentence_with_ctx(int idx, char** input, int length, int encoding);
// syntax tree
LIBGRASS_API void train_syntax_parser(const std::string & input_file, const std::string & feature_file, int round);
LIBGRASS_API void create_syntax_parser(const std::string & feature_file);
LIBGRASS_API void delete_syntax_parser();
LIBGRASS_API void syntax_parse_file(const std::string & input_file, const std::string & output_file, int encoding = CP_UTF8);
LIBGRASS_API std::string syntax_parse_string(const std::string & input, int encoding = CP_UTF8);
// semantic graph
LIBGRASS_API void train_semantic_parser(const std::string & input_file, const std::string & feature_file, int round);
LIBGRASS_API void create_semantic_parser(const std::string & semantic_feature_file, const std::string & tree_feature_file);
LIBGRASS_API void delete_semantic_parser();
LIBGRASS_API void semantic_parse_file(const std::string & input_file, const std::string & output_file, int encoding = CP_UTF8);
LIBGRASS_API std::string semantic_parse_string(const std::string & input, int encoding = CP_UTF8);
// helper
LIBGRASS_API void sentence_per_line(const std::string & input_file, const std::string & output_file, int encoding = CP_UTF8);