From 9b2575cdbc99966e1e368ff07e53e54cc7ee8c31 Mon Sep 17 00:00:00 2001
From: Mahmoud Hashemi <mahmoud@hatnote.com>
Date: Sun, 4 Sep 2016 02:34:51 -0700
Subject: [PATCH] refactor data testing and take the french data for a spin

---
 montage/__init__.py                           |  0
 montage/rdb.py                                | 82 +++----------------
 run_data_test.py                              | 62 ++++++++++++++
 ...lm2015_fr_11874.csv => wlm2015_fr_12k.csv} |  0
 4 files changed, 74 insertions(+), 70 deletions(-)
 create mode 100644 montage/__init__.py
 create mode 100644 run_data_test.py
 rename test_data/{wlm2015_fr_11874.csv => wlm2015_fr_12k.csv} (100%)

diff --git a/montage/__init__.py b/montage/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/montage/rdb.py b/montage/rdb.py
index e712ca7a..2e7a8418 100644
--- a/montage/rdb.py
+++ b/montage/rdb.py
@@ -1,6 +1,7 @@
 # Relational database models for Montage
 import random
 import itertools
+from math import ceil
 
 from sqlalchemy import (Column,
                         String,
@@ -173,8 +174,9 @@ class Entry(Base, DictableBase):
 class RoundEntry(Base, DictableBase):
     __tablename__ = 'round_entries'
 
-    entry_id = Column(Integer, ForeignKey('entries.id'), primary_key=True)
-    round_id = Column(Integer, ForeignKey('rounds.id'), primary_key=True)
+    id = Column(Integer, primary_key=True)
+    entry_id = Column(Integer, ForeignKey('entries.id'))
+    round_id = Column(Integer, ForeignKey('rounds.id'))
 
     entry = relationship(Entry, back_populates='entered_rounds')
     round = relationship(Round, back_populates='round_entries')
@@ -210,12 +212,10 @@ class Task(Base, DictableBase):
     __tablename__ = 'tasks'
     id = Column(Integer, primary_key=True)
     user_id = Column(Integer, ForeignKey('users.id'))
-    entry_id = Column(Integer, ForeignKey('entries.id'))
-    round_id = Column(Integer, ForeignKey('rounds.id'))
+    entry_id = Column(Integer, ForeignKey('round_entries.id'))
 
     user = relationship('User')
-    round = relationship('Round')  # , back_populates='tasks')
-    entry = relationship('Entry')
+    round_entry = relationship('RoundEntry')  # , back_populates='tasks')
 
 
 class UserDAO(object):
@@ -306,26 +306,6 @@ def get_all_rounds(self):
         return ret
 
 
-import os.path
-
-CUR_PATH = os.path.dirname(os.path.abspath(__file__))
-DATA_PATH = os.path.join(os.path.dirname(CUR_PATH), 'test_data')
-
-
-def make_rdb_session(db_url='sqlite:///tmp_montage.db'):
-    from sqlalchemy import create_engine
-    from sqlalchemy.orm import sessionmaker
-
-    # echo="debug" also prints results of selects, etc.
-    engine = create_engine(db_url, echo=True)
-    Base.metadata.create_all(engine)
-
-    session_type = sessionmaker()
-    session_type.configure(bind=engine)
-    session = session_type()
-    return session
-
-
 def create_initial_tasks(rdb_session, round):
     """this creates the initial tasks.
 
@@ -354,62 +334,24 @@ def create_initial_tasks(rdb_session, round):
                                   .order_by(rand_func).all()
 
     to_process = itertools.chain.from_iterable([shuffled_entries] * quorum)
-    per_juror_count = len(shuffled_entries) * (float(quorum) / len(jurors))
+    per_juror = int(ceil(len(shuffled_entries) * (float(quorum) / len(jurors))))
 
-    juror_iters = [itertools.repeat(j, per_juror_count) for j in jurors]
+    juror_iters = itertools.chain.from_iterable([itertools.repeat(j, per_juror)
+                                                 for j in jurors])
 
-    for entry, juror in itertools.izip_longest(to_process, juror_iters, None):
+    pairs = itertools.izip_longest(to_process, juror_iters, fillvalue=None)
+    for entry, juror in pairs:
         if juror is None:
             raise RuntimeError('should never run out of jurors first')
         if entry is None:
             break
 
-        task = Task(user=juror, entry=entry, round=round)
+        task = Task(user=juror, round_entry=entry)
         ret.append(task)
 
     return ret
 
 
-def make_fake_data(debug=True):
-    from loaders import load_full_csv
-
-    rdb_session = make_rdb_session()
-    coord = User(username='Slaporte')
-    juror = User(username='MahmoudHashemi')
-
-    campaign = Campaign(name='Test Campaign 2016')
-    rdb_session.add(campaign)
-
-    campaign.coords.append(coord)
-    round = Round(name='Test Round 1', quorum=1)
-    campaign.rounds.append(round)
-    round.jurors.append(juror)
-
-    CSV_PATH = DATA_PATH + '/wlm2015_ir_5.csv'
-
-    with open(CSV_PATH) as f:
-        entries = load_full_csv(f)
-
-    for entry in entries:
-        round.entries.append(entry)
-
-    create_initial_tasks(rdb_session, round)
-
-    rdb_session.commit()
-    if debug:
-        import pdb;pdb.set_trace()
-    return
-
-
-def main():
-    make_fake_data()
-    return
-
-
-if __name__ == '__main__':
-    main()
-
-
 """
 * Indexes
 * db session management, engine creation, and schema creation separation
diff --git a/run_data_test.py b/run_data_test.py
new file mode 100644
index 00000000..bcbb4c54
--- /dev/null
+++ b/run_data_test.py
@@ -0,0 +1,62 @@
+
+import os.path
+
+from montage.rdb import User, Campaign, Round, Base, create_initial_tasks
+from montage.loaders import load_full_csv
+
+CUR_PATH = os.path.dirname(os.path.abspath(__file__))
+DATA_PATH = os.path.join(CUR_PATH, 'test_data')
+
+
+def make_rdb_session(db_url='sqlite:///tmp_montage.db', echo=True):
+    from sqlalchemy import create_engine
+    from sqlalchemy.orm import sessionmaker
+
+    # echo="debug" also prints results of selects, etc.
+    engine = create_engine(db_url, echo=echo)
+    Base.metadata.create_all(engine)
+
+    session_type = sessionmaker()
+    session_type.configure(bind=engine)
+    session = session_type()
+    return session
+
+
+def make_fake_data(debug=True, echo=True):
+    rdb_session = make_rdb_session(echo=echo)
+    coord = User(username='Slaporte')
+    juror = User(username='MahmoudHashemi')
+
+    campaign = Campaign(name='Test Campaign 2016')
+    rdb_session.add(campaign)
+
+    campaign.coords.append(coord)
+    round = Round(name='Test Round 1', quorum=1)
+    campaign.rounds.append(round)
+    round.jurors.append(juror)
+
+    CSV_PATH = DATA_PATH + '/wlm2015_fr_11k.csv'
+
+    with open(CSV_PATH) as f:
+        entries = load_full_csv(f)
+
+    for entry in entries:
+        round.entries.append(entry)
+    rdb_session.commit()
+
+    tasks = create_initial_tasks(rdb_session, round)
+    rdb_session.add_all(tasks)
+
+    rdb_session.commit()
+    if debug:
+        import pdb;pdb.set_trace()
+    return
+
+
+def main():
+    make_fake_data(False, False)
+    return
+
+
+if __name__ == '__main__':
+    main()
diff --git a/test_data/wlm2015_fr_11874.csv b/test_data/wlm2015_fr_12k.csv
similarity index 100%
rename from test_data/wlm2015_fr_11874.csv
rename to test_data/wlm2015_fr_12k.csv