Skip to content

Commit 974216d

Browse files
authored
Index Builder Properties
1 parent 3ef7994 commit 974216d

File tree

1 file changed

+84
-0
lines changed

1 file changed

+84
-0
lines changed

conf/indexing.tmf.en.test.properties

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Wikipedia Dump
2+
# --------------
3+
#Added by Giuseppe Futia (the Wikipedia Dump must be cleaned)
4+
org.dbpedia.spotlight.data.originalWikipediaDump = ../data/tellmefirst/dbpedia/en/original/wikiDump.xml.bz2
5+
6+
org.dbpedia.spotlight.data.wikipediaDump = ../data/tellmefirst/dbpedia/en/original/wikiDump.xml.bz2-modified
7+
8+
# Location for DBpedia resources index
9+
org.dbpedia.spotlight.index.dir = ../data/tellmefirst/dbpedia/en/output/index
10+
org.dbpedia.spotlight.index.minDocsBeforeFlush = 40000
11+
12+
# DBpedia Datasets
13+
# ----------------
14+
org.dbpedia.spotlight.data.labels = ../data/tellmefirst/dbpedia/en/original/labels.nt.bz2
15+
org.dbpedia.spotlight.data.redirects = ../data/tellmefirst/dbpedia/en/original/redirects.nt.bz2
16+
org.dbpedia.spotlight.data.disambiguations = ../data/tellmefirst/dbpedia/en/original/disambiguations.nt.bz2
17+
org.dbpedia.spotlight.data.instanceTypes = ../data/tellmefirst/dbpedia/en/original/instance_types.nt.bz2
18+
19+
# Files created from DBpedia Datasets
20+
# -----------------------
21+
# Added by Giuseppe Futia (Different encoding between the Wikipedia Dump and the DBpedia Datasets)
22+
org.dbpedia.spotlight.data.conceptURIsToDecode =../data/tellmefirst/dbpedia/en/output/conceptURIsToDecode.list
23+
org.dbpedia.spotlight.data.redirectsTCToDecode = ../data/tellmefirst/dbpedia/en/output/redirects_tcToDecode.tsv
24+
org.dbpedia.spotlight.data.surfaceFormsToDecode = ../data/tellmefirst/dbpedia/en/output/surfaceFormsToDecode.tsv
25+
26+
org.dbpedia.spotlight.data.conceptURIs =../data/tellmefirst/dbpedia/en/output/conceptURIs.list
27+
org.dbpedia.spotlight.data.redirectsTC = ../data/tellmefirst/dbpedia/en/output/redirects_tc.tsv
28+
org.dbpedia.spotlight.data.surfaceForms = ../data/tellmefirst/dbpedia/en/output/surfaceForms.tsv
29+
30+
# Language-specific config
31+
# --------------
32+
org.dbpedia.spotlight.language = English
33+
org.dbpedia.spotlight.language_i18n_code = en
34+
org.dbpedia.spotlight.lucene.analyzer = org.apache.lucene.analysis.en.EnglishAnalyzer
35+
org.dbpedia.spotlight.lucene.version = LUCENE_36
36+
37+
# Internationalization (i18n) support -- work in progress
38+
org.dbpedia.spotlight.default_namespace = http://dbpedia.org/resource/
39+
org.dbpedia.spotlight.default_ontology= http://dbpedia.org/ontology/
40+
41+
# Stop word list
42+
org.dbpedia.spotlight.data.stopWords.english = ../data/tellmefirst/dbpedia/en/original/stopwords.en.list
43+
44+
# URI patterns that should not be indexed. e.g. List_of_*
45+
org.dbpedia.spotlight.data.badURIs.english = ../data/tellmefirst/dbpedia/en/original/blacklistedURIPatterns.en.list
46+
47+
# Will discard surface forms that are too long (reduces complexity of spotting and generally size in disk/memory)
48+
org.dbpedia.spotlight.data.maxSurfaceFormLength = 50
49+
# Will index only words closest to resource occurrence
50+
org.dbpedia.spotlight.data.maxContextWindowSize = 200
51+
org.dbpedia.spotlight.data.minContextWindowSize = 0
52+
53+
# Other files
54+
org.dbpedia.spotlight.data.priors = /home/pablo/eval/grounder/gold/g1b_spotlight.words.uris.counts
55+
56+
# Yahoo! Boss properties
57+
# ----------------------
58+
# application ID
59+
org.dbpedia.spotlight.yahoo.appID =
60+
# number of results returned at for one query (maximum: 50)
61+
org.dbpedia.spotlight.yahoo.maxResults = 50
62+
# number of iteration; each iteration returns YahooBossResults results
63+
org.dbpedia.spotlight.yahoo.maxIterations = 100
64+
## important for Yahoo! Boss query string: both language and region must be set according to
65+
## http://developer.yahoo.com/search/boss/boss_guide/supp_regions_lang.html
66+
org.dbpedia.spotlight.yahoo.language = en
67+
org.dbpedia.spotlight.yahoo.region = us
68+
69+
#Index with types
70+
tellmefirst.index_with_types = ../data/tellmefirst/dbpedia/en/output/index-withTypes
71+
72+
#Index with titles
73+
tellmefirst.index_with_titles = ../data/tellmefirst/dbpedia/en/output/index-withTypesTitles
74+
75+
#Index with images
76+
tellmefirst.index_with_titles_images = ../data/tellmefirst/dbpedia/en/output/index-withTypesTitlesImages
77+
#images
78+
tellmefirst.images = ../data/tellmefirst/dbpedia/en/original/images.nt.bz2
79+
80+
# kb and residual kb index
81+
tellmefirst.kb = ../data/tellmefirst/dbpedia/en/output/kb
82+
tellmefirst.residualkb = ../data/tellmefirst/dbpedia/en/output/residualkb
83+
#wikilinks
84+
tellmefirst.wikilinks = ../data/tellmefirst/dbpedia/en/original/page_links.nt.bz2

0 commit comments

Comments
 (0)