Skip to content

Commit

Permalink
fix #34
Browse files Browse the repository at this point in the history
  • Loading branch information
fhamborg committed May 20, 2019
1 parent ac503f6 commit b9de435
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .idea/Giveme5W1H.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Giveme5W1H/examples/extracting/parse_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"""

# don`t forget to start up core_nlp_host
# java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
# giveme5w1h-corenlp

if __name__ == '__main__':
# helper to setup a correct path
Expand Down
28 changes: 28 additions & 0 deletions Giveme5W1H/examples/extracting/parse_from_newsplease.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""
This is a simple example how to use the extractor in combination with news-please, a news crawler and scraper (https://github.com/fhamborg/news-please).
- Nothing is cached
"""

# don`t forget to start up core_nlp_host
# giveme5w1h-corenlp

from newsplease import NewsPlease

from Giveme5W1H.extractor.document import Document
from Giveme5W1H.extractor.extractor import MasterExtractor

extractor = MasterExtractor()


def main():
article = NewsPlease.from_url(
'https://www.foxnews.com/politics/house-democrat-subpoenas-mnuchin-irs-for-trumps-tax-returns')
doc = Document.from_newsplease(article)
doc = extractor.parse(doc)
answers = doc.get_top_answer('who').get_parts_as_text()


if __name__ == '__main__':
main()
7 changes: 4 additions & 3 deletions Giveme5W1H/examples/extracting/parse_single_from_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
"""

# don`t forget to start up core_nlp_host
# java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
# giveme5w1h-corenlp

titleshort = "Barack Obama was born in Hawaii. He is the president. Obama was elected in 2008."

title = "Taliban attacks German consulate in northern Afghan city of Mazar-i-Sharif with truck bomb"
lead = "The death toll from a powerful Taliban truck bombing at the German consulate in Afghanistan's Mazar-i-Sharif city rose to at least six Friday, with more than 100 others wounded in a major militant assault."
Expand Down Expand Up @@ -74,8 +75,8 @@

# giveme5w setup - with defaults
extractor = MasterExtractor()
Document()
doc = Document(title, lead, text, date_publish)
doc = Document.from_text(titleshort, date_publish)

doc = extractor.parse(doc)

top_who_answer = doc.get_top_answer('who').get_parts_as_text()
Expand Down
8 changes: 8 additions & 0 deletions Giveme5W1H/extractor/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ def __init__(self, title='', desc='', text='', date=None, raw_data=None):
def from_text(cls, text, date=None, raw_data=None):
return cls(title=text, date=date, raw_data=raw_data)

@classmethod
def from_newsplease(cls, article):
if article.date_publish:
tmp_date = str(article.date_publish)
else:
tmp_date = None
return cls(title=article.title, text=article.text, desc=article.description, date=tmp_date, raw_data=None)

def is_preprocessed(self, preprocessed=None):
if preprocessed is True or preprocessed is False:
self._preprocessed = preprocessed
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages

setup(name='giveme5w1h',
version='1.0.15',
version='1.0.16',
description="Extraction of the journalistic five W and one H questions (5W1H) from news articles.",
long_description="""Giveme5W1H is an open source, easy-to-use system to that extracts phrases answering the journalist 5W1H questions to describe an article's main event: who did what, when, where, why, and how?""",
classifiers=[
Expand Down

0 comments on commit b9de435

Please sign in to comment.