|
| 1 | +# Wikipedia Dump |
| 2 | +# -------------- |
| 3 | +#Added by Giuseppe Futia (the Wikipedia Dump must be cleaned) |
| 4 | +org.dbpedia.spotlight.data.originalWikipediaDump = ../data/tellmefirst/dbpedia/en/original/wikiDump.xml.bz2 |
| 5 | + |
| 6 | +org.dbpedia.spotlight.data.wikipediaDump = ../data/tellmefirst/dbpedia/en/original/wikiDump.xml.bz2-modified |
| 7 | + |
| 8 | +# Location for DBpedia resources index |
| 9 | +org.dbpedia.spotlight.index.dir = ../data/tellmefirst/dbpedia/en/output/index |
| 10 | +org.dbpedia.spotlight.index.minDocsBeforeFlush = 40000 |
| 11 | + |
| 12 | +# DBpedia Datasets |
| 13 | +# ---------------- |
| 14 | +org.dbpedia.spotlight.data.labels = ../data/tellmefirst/dbpedia/en/original/labels.nt.bz2 |
| 15 | +org.dbpedia.spotlight.data.redirects = ../data/tellmefirst/dbpedia/en/original/redirects.nt.bz2 |
| 16 | +org.dbpedia.spotlight.data.disambiguations = ../data/tellmefirst/dbpedia/en/original/disambiguations.nt.bz2 |
| 17 | +org.dbpedia.spotlight.data.instanceTypes = ../data/tellmefirst/dbpedia/en/original/instance_types.nt.bz2 |
| 18 | + |
| 19 | +# Files created from DBpedia Datasets |
| 20 | +# ----------------------- |
| 21 | +# Added by Giuseppe Futia (Different encoding between the Wikipedia Dump and the DBpedia Datasets) |
| 22 | +org.dbpedia.spotlight.data.conceptURIsToDecode =../data/tellmefirst/dbpedia/en/output/conceptURIsToDecode.list |
| 23 | +org.dbpedia.spotlight.data.redirectsTCToDecode = ../data/tellmefirst/dbpedia/en/output/redirects_tcToDecode.tsv |
| 24 | +org.dbpedia.spotlight.data.surfaceFormsToDecode = ../data/tellmefirst/dbpedia/en/output/surfaceFormsToDecode.tsv |
| 25 | + |
| 26 | +org.dbpedia.spotlight.data.conceptURIs =../data/tellmefirst/dbpedia/en/output/conceptURIs.list |
| 27 | +org.dbpedia.spotlight.data.redirectsTC = ../data/tellmefirst/dbpedia/en/output/redirects_tc.tsv |
| 28 | +org.dbpedia.spotlight.data.surfaceForms = ../data/tellmefirst/dbpedia/en/output/surfaceForms.tsv |
| 29 | + |
| 30 | +# Language-specific config |
| 31 | +# -------------- |
| 32 | +org.dbpedia.spotlight.language = English |
| 33 | +org.dbpedia.spotlight.language_i18n_code = en |
| 34 | +org.dbpedia.spotlight.lucene.analyzer = org.apache.lucene.analysis.en.EnglishAnalyzer |
| 35 | +org.dbpedia.spotlight.lucene.version = LUCENE_36 |
| 36 | + |
| 37 | +# Internationalization (i18n) support -- work in progress |
| 38 | +org.dbpedia.spotlight.default_namespace = http://dbpedia.org/resource/ |
| 39 | +org.dbpedia.spotlight.default_ontology= http://dbpedia.org/ontology/ |
| 40 | + |
| 41 | +# Stop word list |
| 42 | +org.dbpedia.spotlight.data.stopWords.english = ../data/tellmefirst/dbpedia/en/original/stopwords.en.list |
| 43 | + |
| 44 | +# URI patterns that should not be indexed. e.g. List_of_* |
| 45 | +org.dbpedia.spotlight.data.badURIs.english = ../data/tellmefirst/dbpedia/en/original/blacklistedURIPatterns.en.list |
| 46 | + |
| 47 | +# Will discard surface forms that are too long (reduces complexity of spotting and generally size in disk/memory) |
| 48 | +org.dbpedia.spotlight.data.maxSurfaceFormLength = 50 |
| 49 | +# Will index only words closest to resource occurrence |
| 50 | +org.dbpedia.spotlight.data.maxContextWindowSize = 200 |
| 51 | +org.dbpedia.spotlight.data.minContextWindowSize = 0 |
| 52 | + |
| 53 | +# Other files |
| 54 | +org.dbpedia.spotlight.data.priors = /home/pablo/eval/grounder/gold/g1b_spotlight.words.uris.counts |
| 55 | + |
| 56 | +# Yahoo! Boss properties |
| 57 | +# ---------------------- |
| 58 | +# application ID |
| 59 | +org.dbpedia.spotlight.yahoo.appID = |
| 60 | +# number of results returned at for one query (maximum: 50) |
| 61 | +org.dbpedia.spotlight.yahoo.maxResults = 50 |
| 62 | +# number of iteration; each iteration returns YahooBossResults results |
| 63 | +org.dbpedia.spotlight.yahoo.maxIterations = 100 |
| 64 | +## important for Yahoo! Boss query string: both language and region must be set according to |
| 65 | +## http://developer.yahoo.com/search/boss/boss_guide/supp_regions_lang.html |
| 66 | +org.dbpedia.spotlight.yahoo.language = en |
| 67 | +org.dbpedia.spotlight.yahoo.region = us |
| 68 | + |
| 69 | +#Index with types |
| 70 | +tellmefirst.index_with_types = ../data/tellmefirst/dbpedia/en/output/index-withTypes |
| 71 | + |
| 72 | +#Index with titles |
| 73 | +tellmefirst.index_with_titles = ../data/tellmefirst/dbpedia/en/output/index-withTypesTitles |
| 74 | + |
| 75 | +#Index with images |
| 76 | +tellmefirst.index_with_titles_images = ../data/tellmefirst/dbpedia/en/output/index-withTypesTitlesImages |
| 77 | +#images |
| 78 | +tellmefirst.images = ../data/tellmefirst/dbpedia/en/original/images.nt.bz2 |
| 79 | + |
| 80 | +# kb and residual kb index |
| 81 | +tellmefirst.kb = ../data/tellmefirst/dbpedia/en/output/kb |
| 82 | +tellmefirst.residualkb = ../data/tellmefirst/dbpedia/en/output/residualkb |
| 83 | +#wikilinks |
| 84 | +tellmefirst.wikilinks = ../data/tellmefirst/dbpedia/en/original/page_links.nt.bz2 |
0 commit comments