clicheio · tkiapril · May 9, 2015 · Mar 7, 2015 · Apr 21, 2015 · Apr 21, 2015
diff --git a/cliche/celery.py b/cliche/celery.py
@@ -227,6 +227,7 @@ def setup_raven_logging(conf=None, **kwargs):
 
 
 @task_failure.connect
-def report_task_failure(task_id, exception, args, kwargs, traceback, einfo):
+def report_task_failure(task_id, exception, args, kwargs,
+                        traceback, einfo, sender):
     client = get_raven_client()
     client.captureException(einfo.exc_info)
diff --git a/cliche/services/tvtropes/crawler.py b/cliche/services/tvtropes/crawler.py
@@ -132,23 +132,22 @@ def fetch_link(url, session, *, log_prefix=''):
         return False, None, None, None, final_url
     tree = document_fromstring(r.text)
     try:
-        namespace = tree.xpath('//div[@class="pagetitle"]')[0] \
-            .text.strip()[:-1]
+        name = tree.find_class('article_title')[0].text_content()
     except (AttributeError, AssertionError, IndexError):
         logger.warning('%sWarning on url %s: '
                        'There is no pagetitle on this page. Ignoring.',
                        log_prefix, url)
         return False, tree, None, None, final_url
-    if namespace == '':
-        namespace = 'Main'
-    name = tree.xpath('//div[@class="pagetitle"]/span')[0].text.strip()
-
-    type = determine_type(namespace)
-    if type == 'Administrivia':
-        return False, tree, namespace, name, final_url
-    upsert_entity(session, namespace, name, type, final_url)
-    process_redirections(session, url, final_url, namespace, name)
-    return True, tree, namespace, name, final_url
+    else:
+        *namespace, name = name.split(':')
+        name = name.strip()
+        namespace = 'Main' if not namespace else namespace[0]
+        type = determine_type(namespace)
+        if type == 'Administrivia':
+            return False, tree, namespace, name, final_url
+        upsert_entity(session, namespace, name, type, final_url)
+        process_redirections(session, url, final_url, namespace, name)
+        return True, tree, namespace, name, final_url
 
 
 def recently_crawled(current_time, url, session):

diff --git a/tests/tvtropes_crawler_test.py b/tests/tvtropes_crawler_test.py
@@ -0,0 +1,21 @@
+import requests
+
+from cliche.services.tvtropes.crawler import fetch_link
+
+
+def test_fetch_link(monkeypatch, fx_session, fx_celery_app):
+
+    url = 'http://tvtropes.org/pmwiki/pmwiki.php/Main/GodJob'
+    text = '<div class="pagetitle"><div class="article_title"><h1>' \
+           '<span>God Job</span></h1></div></div>'
+
+    def mockreturn(path):
+        req = requests.Request()
+        req.url = url
+        req.text = text
+        return req
+
+    monkeypatch.setattr(requests, "get", mockreturn)
+
+    result = fetch_link(url, fx_session)
+    assert result[-3:] == ('Main', 'God Job', url)