-
Notifications
You must be signed in to change notification settings - Fork 20
/
doc.h
executable file
·75 lines (54 loc) · 1.13 KB
/
doc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#ifndef DOCH
#define DOCH
#include <gsl/gsl_vector.h>
#include <assert.h>
#include "utils.h"
#include "topic.h"
#include "typedefs.h"
#define OFFSET 0
#define MH_GEM_STDEV 0.05
#define MH_GEM_MEAN_STDEV 0.05
#define MH_GEM_STDEV 0.05
/*
* resample the levels of a document
*
*/
void doc_sample_levels(doc* d, short do_permute, short do_remove);
/*
* update a level count
*
*/
void doc_update_level(doc* d, int l, double update);
/*
* read corpus from data
*
*/
void read_corpus(char* filename, corpus* c, int depth);
/*
* allocate a new corpus
*
*/
corpus* corpus_new(double mean, double scale);
/*
* score the corpus
*
*/
double gem_score(corpus* corp);
/*
* GEM MH updates
*
*/
void corpus_mh_update_gem(corpus* corp);
void corpus_mh_update_gem_mean(corpus* corp);
void corpus_mh_update_gem_scale(corpus* corp);
void compute_log_p_level(doc* d, double gem_mean, double gem_scale);
/*
* write the document clustering to a file
*
*/
void write_corpus_assignment(corpus* corp, FILE* file);
void write_corpus_levels(corpus* corp, FILE* file);
// free a corpus
void free_corpus(corpus* corp);
void free_doc(doc* d);
#endif