Skip to content

Commit

Permalink
Merge pull request #284 from ibnesayeed/osagnostic
Browse files Browse the repository at this point in the history
A more efficient and accurate memento and archive counts
  • Loading branch information
machawk1 authored Dec 15, 2017
2 parents 135a58b + b741f71 commit 48d94f6
Showing 1 changed file with 13 additions and 20 deletions.
33 changes: 13 additions & 20 deletions bundledApps/WAIL.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
# from wx import *
import waybackConfigWriter
from subprocess import Popen, PIPE
from subprocess import check_output

# For a more asynchronous UI, esp with accessible()s
from multiprocessing import Pool as Thread
Expand Down Expand Up @@ -334,19 +333,6 @@ def __init__(self, parent):
self.uri.Bind(wx.EVT_KEY_UP, self.uriChanged) # Call memgator on URI change


def getHosts(self, tm):
matches = re.findall(r'\<(.*)\>; rel=.*memento\"', tm)

hosts = {}
for match in matches:
host = urlparse(match).netloc
if host not in hosts:
hosts[host] = 1
else:
hosts[host] += 1
return hosts


def setMementoCount(self, mCount, aCount=''):
ui_mementoCountMessage_pos = (105, 85)
ui_mementoCountMessage_size = (150, 20)
Expand Down Expand Up @@ -384,20 +370,27 @@ def setMessage(self, msg):
def fetchMementos(self):
# TODO: Use CDXJ for counting the mementos
currentURIValue = self.uri.GetValue()
out = check_output([memGatorPath, "-a", archivesJSON,
print('MEMGATOR checking {0}'.format(currentURIValue))
mg = Popen([memGatorPath,
'--arcs', archivesJSON,
'--format', 'cdxj',
'--restimeout', '0m3s',
'--hdrtimeout', '3s',
'--contimeout', '3s',
currentURIValue])
print('MEMGATOR checking {0}'.format(currentURIValue))
currentURIValue], stdout=PIPE)

# TODO: bug, on Gogo internet MemGator cannot hit aggregator, which
# results in 0 mementos, for which MemGator throws exception

mCount = out.count("memento")
aCount = len(self.getHosts(out))
mCount = 0
archHosts = set()
for line in mg.stdout:
l = line.strip()
if l[:1].isdigit():
mCount += 1
archHosts.add(l.split('/')[2])

self.setMementoCount(mCount, aCount) # UI not updated on Windows
self.setMementoCount(mCount, len(archHosts)) # UI not updated on Windows

print('MEMGATOR counted {0} {1}'.format(currentURIValue, mCount))
# TODO: cache the TM
Expand Down

0 comments on commit 48d94f6

Please sign in to comment.