forked from mtlynch/crfpp
-
Notifications
You must be signed in to change notification settings - Fork 4
/
crfpp.h
379 lines (299 loc) · 13.2 KB
/
crfpp.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
/*
CRF++ -- Yet Another CRF toolkit
$Id: crfpp.h 1592 2007-02-12 09:40:53Z taku $;
Copyright(C) 2005-2007 Taku Kudo <[email protected]>
*/
#ifndef CRFPP_CRFPP_H_
#define CRFPP_CRFPP_H_
/* C interface */
#ifdef __cplusplus
#include <cstdio>
#else
#include <stdio.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _WIN32
#include <windows.h>
# ifdef DLL_EXPORT
# define CRFPP_DLL_EXTERN __declspec(dllexport)
# define CRFPP_DLL_CLASS_EXTERN __declspec(dllexport)
# else
# define CRFPP_DLL_EXTERN __declspec(dllimport)
# endif
#endif
#ifndef CRFPP_DLL_EXTERN
# define CRFPP_DLL_EXTERN extern
#endif
#ifndef CRFPP_DLL_CLASS_EXTERN
# define CRFPP_DLL_CLASS_EXTERN
#endif
#ifndef SWIG
typedef struct crfpp_t crfpp_t;
typedef struct crfpp_model_t crfpp_model_t;
/* C interface */
CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new(int, char**);
CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new2(const char*);
CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new(int, char**, const char *, size_t);
CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new2(const char*, const char *, size_t);
CRFPP_DLL_EXTERN const char * crfpp_model_get_template(crfpp_model_t*);
CRFPP_DLL_EXTERN void crfpp_model_destroy(crfpp_model_t*);
CRFPP_DLL_EXTERN const char * crfpp_model_strerror(crfpp_model_t *);
CRFPP_DLL_EXTERN crfpp_t* crfpp_model_new_tagger(crfpp_model_t *);
CRFPP_DLL_EXTERN crfpp_t* crfpp_new(int, char**);
CRFPP_DLL_EXTERN crfpp_t* crfpp_new2(const char*);
CRFPP_DLL_EXTERN void crfpp_destroy(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_set_model(crfpp_t *, crfpp_model_t *);
CRFPP_DLL_EXTERN int crfpp_add2(crfpp_t*, size_t, const char **);
CRFPP_DLL_EXTERN int crfpp_add(crfpp_t*, const char*);
CRFPP_DLL_EXTERN size_t crfpp_size(crfpp_t*);
CRFPP_DLL_EXTERN size_t crfpp_xsize(crfpp_t*);
CRFPP_DLL_EXTERN size_t crfpp_dsize(crfpp_t*);
CRFPP_DLL_EXTERN const float* crfpp_weight_vector(crfpp_t*);
CRFPP_DLL_EXTERN size_t crfpp_result(crfpp_t*, size_t);
CRFPP_DLL_EXTERN size_t crfpp_answer(crfpp_t*, size_t);
CRFPP_DLL_EXTERN size_t crfpp_y(crfpp_t*, size_t);
CRFPP_DLL_EXTERN size_t crfpp_ysize(crfpp_t*);
CRFPP_DLL_EXTERN double crfpp_prob(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_prob2(crfpp_t*, size_t);
CRFPP_DLL_EXTERN double crfpp_prob3(crfpp_t*);
CRFPP_DLL_EXTERN void crfpp_set_penalty(crfpp_t *, size_t i, size_t j, double penalty);
CRFPP_DLL_EXTERN double crfpp_penalty(crfpp_t *, size_t i, size_t j);
CRFPP_DLL_EXTERN double crfpp_alpha(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_beta(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_emisstion_cost(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_next_transition_cost(crfpp_t*, size_t,
size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_prev_transition_cost(crfpp_t*, size_t,
size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_best_cost(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN const int* crfpp_emittion_vector(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN const int* crfpp_next_transition_vector(crfpp_t*, size_t,
size_t, size_t);
CRFPP_DLL_EXTERN const int* crfpp_prev_transition_vector(crfpp_t*, size_t,
size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_Z(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_parse(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_empty(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_clear(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_next(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_test(int, char **);
CRFPP_DLL_EXTERN int crfpp_test2(const char *);
CRFPP_DLL_EXTERN int crfpp_learn(int, char **);
CRFPP_DLL_EXTERN int crfpp_learn2(const char *);
CRFPP_DLL_EXTERN const char* crfpp_strerror(crfpp_t*);
CRFPP_DLL_EXTERN const char* crfpp_yname(crfpp_t*, size_t);
CRFPP_DLL_EXTERN const char* crfpp_y2(crfpp_t*, size_t);
CRFPP_DLL_EXTERN const char* crfpp_x(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN const char** crfpp_x2(crfpp_t*, size_t);
CRFPP_DLL_EXTERN const char* crfpp_parse_tostr(crfpp_t*, const char*);
CRFPP_DLL_EXTERN const char* crfpp_parse_tostr2(crfpp_t*,
const char*, size_t);
CRFPP_DLL_EXTERN const char* crfpp_parse_tostr3(crfpp_t*, const char*,
size_t, char *, size_t);
CRFPP_DLL_EXTERN const char* crfpp_tostr(crfpp_t*);
CRFPP_DLL_EXTERN const char* crfpp_tostr2(crfpp_t*, char *, size_t);
CRFPP_DLL_EXTERN void crfpp_set_vlevel(crfpp_t *, unsigned int);
CRFPP_DLL_EXTERN unsigned int crfpp_vlevel(crfpp_t *);
CRFPP_DLL_EXTERN void crfpp_set_cost_factor(crfpp_t *, float);
CRFPP_DLL_EXTERN float crfpp_cost_factor(crfpp_t *);
CRFPP_DLL_EXTERN void crfpp_set_nbest(crfpp_t *, size_t);
#endif
#ifdef __cplusplus
}
#endif
/* C++ interface */
#ifdef __cplusplus
namespace CRFPP {
class Tagger;
class CRFPP_DLL_CLASS_EXTERN Model {
public:
#ifndef SWIG
// open model with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
virtual bool open(int argc, char** argv) = 0;
// open model with parameter arg, e.g. arg = "-m model -v3";
virtual bool open(const char* arg) = 0;
// open model with parameters in argv[].
// e.g, argv[] = {"CRF++", "-v3"};
virtual bool openFromArray(int argc, char** argv,
const char *model_buf,
size_t model_size) = 0;
// open model with parameter arg, e.g. arg = "-m model -v3";
virtual bool openFromArray(const char* arg,
const char *model_buf,
size_t model_size) = 0;
#endif
// return template string embedded in this model file.
virtual const char *getTemplate() const = 0;
// create Tagger object. Returned object shared the same
// model object
virtual Tagger *createTagger() const = 0;
virtual const char* what() = 0;
virtual ~Model() {}
};
class CRFPP_DLL_CLASS_EXTERN Tagger {
public:
#ifndef SWIG
// open model with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
virtual bool open(int argc, char** argv) = 0;
// open model with parameter arg, e.g. arg = "-m model -v3";
virtual bool open(const char* arg) = 0;
// add str[] as tokens to the current context
virtual bool add(size_t size, const char **str) = 0;
// close the current model
virtual void close() = 0;
// return parameter vector. the size should be dsize();
virtual const float *weight_vector() const = 0;
#endif
// set Model
virtual bool set_model(const Model &model) = 0;
// set vlevel
virtual void set_vlevel(unsigned int vlevel) = 0;
// get vlevel
virtual unsigned int vlevel() const = 0;
// set cost factor
virtual void set_cost_factor(float cost_factor) = 0;
// get cost factor
virtual float cost_factor() const = 0;
// set nbest
virtual void set_nbest(size_t nbest) = 0;
// get nbest
virtual size_t nbest() const = 0;
// add one line to the current context
virtual bool add(const char* str) = 0;
// return size of tokens(lines)
virtual size_t size() const = 0;
// return size of column
virtual size_t xsize() const = 0;
// return size of features
virtual size_t dsize() const = 0;
// return output tag-id of i-th token
virtual size_t result(size_t i) const = 0;
// return answer tag-id of i-th token if it is available
virtual size_t answer(size_t i) const = 0;
// alias of result(i)
virtual size_t y(size_t i) const = 0;
// return output tag of i-th token as string
virtual const char* y2(size_t i) const = 0;
// return i-th tag-id as string
virtual const char* yname(size_t i) const = 0;
// return token at [i,j] as string(i:token j:column)
virtual const char* x(size_t i, size_t j) const = 0;
#ifndef SWIG
// return an array of strings at i-th tokens
virtual const char** x(size_t) const = 0;
#endif
// return size of output tags
virtual size_t ysize() const = 0;
// return marginal probability of j-th tag id at i-th token
virtual double prob(size_t i, size_t j) const = 0;
// return marginal probability of output tag at i-th token
// same as prob(i, tagger->y(i));
virtual double prob(size_t i) const = 0;
// return conditional probability of enter output
virtual double prob() const = 0;
// set token-level penalty. It would be useful for implementing
// Dual decompositon decoding.
// e.g.
// "Dual Decomposition for Parsing with Non-Projective Head Automata"
// Terry Koo Alexander M. Rush Michael Collins Tommi Jaakkola David Sontag
virtual void set_penalty(size_t i, size_t j, double penalty) = 0;
virtual double penalty(size_t i, size_t j) const = 0;
// return forward log-prob of the j-th tag at i-th token
virtual double alpha(size_t i, size_t j) const = 0;
// return backward log-prob of the j-th tag at i-th token
virtual double beta(size_t i, size_t j) const = 0;
// return emission cost of the j-th tag at i-th token
virtual double emission_cost(size_t i, size_t j) const = 0;
// return transition cost of [j-th tag at i-th token] to
// [k-th tag at(i+1)-th token]
virtual double next_transition_cost(size_t i,
size_t j, size_t k) const = 0;
// return transition cost of [j-th tag at i-th token] to
// [k-th tag at(i-1)-th token]
virtual double prev_transition_cost(size_t i,
size_t j, size_t k) const = 0;
// return the best accumulative cost to the j-th tag at i-th token
// used in viterbi search
virtual double best_cost(size_t i, size_t j) const = 0;
#ifndef SWIG
// return emission feature vector of the j-th tag at i-th token
virtual const int* emission_vector(size_t i, size_t j) const = 0;
// return transition feature vector of [j-th tag at i-th token] to
// [k-th tag at(i+1)-th token]
virtual const int* next_transition_vector(size_t i,
size_t j, size_t k) const = 0;
// return transition feature vector of [j-th tag at i-th token] to
// [k-th tag at(i-1)-th token]
virtual const int* prev_transition_vector(size_t i,
size_t j, size_t k) const = 0;
#endif
// normalizing factor(log-prob)
virtual double Z() const = 0;
// do parse and change the internal status, if failed, returns false
virtual bool parse() = 0;
// return true if the context is empty
virtual bool empty() const = 0;
// clear all context
virtual bool clear() = 0;
// change the internal state to output next-optimal output.
// calling it n-th times, can get n-best results,
// Neeed to specify -nN option to use this function, where
// N>=2
virtual bool next() = 0;
// parse 'str' and return result as string
// 'str' must be written in CRF++'s input format
virtual const char* parse(const char* str) = 0;
#ifndef SWIG
// return parsed result as string
virtual const char* toString() = 0;
// return parsed result as string.
// Result is saved in the buffer 'result', 'size' is the
// size of the buffer. if failed, return NULL
virtual const char* toString(char* result , size_t size) = 0;
// parse 'str' and return parsed result.
// You don't need to delete return value, but the buffer
// is rewritten whenever you call parse method.
// if failed, return NULL
virtual const char* parse(const char *str, size_t size) = 0;
// parse 'str' and return parsed result.
// The result is stored in the buffer 'result'.
// 'size2' is the size of the buffer. if failed, return NULL
virtual const char* parse(const char *str, size_t size1,
char *result, size_t size2) = 0;
#endif
// return internal error code as string
virtual const char* what() = 0;
virtual ~Tagger() {}
};
/* factory method */
// create CRFPP::Tagger instance with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
CRFPP_DLL_EXTERN Tagger *createTagger(int argc, char **argv);
// create CRFPP::Tagger instance with parameter in arg
// e.g. arg = "-m model -v3";
CRFPP_DLL_EXTERN Tagger *createTagger(const char *arg);
// create CRFPP::Model instance with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
CRFPP_DLL_EXTERN Model *createModel(int argc, char **argv);
// load model from [buf, buf+size].
CRFPP_DLL_EXTERN Model *createModelFromArray(int argc, char **argv,
const char *model_buf,
size_t model_size);
// create CRFPP::Model instance with parameter in arg
// e.g. arg = "-m model -v3";
CRFPP_DLL_EXTERN Model *createModel(const char *arg);
// load model from [buf, buf+size].
CRFPP_DLL_EXTERN Model *createModelFromArray(const char *arg,
const char *model_buf,
size_t model_size);
// return error code of createTagger();
CRFPP_DLL_EXTERN const char *getTaggerError();
// alias of getTaggerError();
CRFPP_DLL_EXTERN const char *getLastError();
}
#endif
#endif