From 9b2575cdbc99966e1e368ff07e53e54cc7ee8c31 Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Sun, 4 Sep 2016 02:34:51 -0700 Subject: [PATCH] refactor data testing and take the french data for a spin --- montage/__init__.py | 0 montage/rdb.py | 82 +++---------------- run_data_test.py | 62 ++++++++++++++ ...lm2015_fr_11874.csv => wlm2015_fr_12k.csv} | 0 4 files changed, 74 insertions(+), 70 deletions(-) create mode 100644 montage/__init__.py create mode 100644 run_data_test.py rename test_data/{wlm2015_fr_11874.csv => wlm2015_fr_12k.csv} (100%) diff --git a/montage/__init__.py b/montage/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/montage/rdb.py b/montage/rdb.py index e712ca7a..2e7a8418 100644 --- a/montage/rdb.py +++ b/montage/rdb.py @@ -1,6 +1,7 @@ # Relational database models for Montage import random import itertools +from math import ceil from sqlalchemy import (Column, String, @@ -173,8 +174,9 @@ class Entry(Base, DictableBase): class RoundEntry(Base, DictableBase): __tablename__ = 'round_entries' - entry_id = Column(Integer, ForeignKey('entries.id'), primary_key=True) - round_id = Column(Integer, ForeignKey('rounds.id'), primary_key=True) + id = Column(Integer, primary_key=True) + entry_id = Column(Integer, ForeignKey('entries.id')) + round_id = Column(Integer, ForeignKey('rounds.id')) entry = relationship(Entry, back_populates='entered_rounds') round = relationship(Round, back_populates='round_entries') @@ -210,12 +212,10 @@ class Task(Base, DictableBase): __tablename__ = 'tasks' id = Column(Integer, primary_key=True) user_id = Column(Integer, ForeignKey('users.id')) - entry_id = Column(Integer, ForeignKey('entries.id')) - round_id = Column(Integer, ForeignKey('rounds.id')) + entry_id = Column(Integer, ForeignKey('round_entries.id')) user = relationship('User') - round = relationship('Round') # , back_populates='tasks') - entry = relationship('Entry') + round_entry = relationship('RoundEntry') # , back_populates='tasks') class UserDAO(object): @@ -306,26 +306,6 @@ def get_all_rounds(self): return ret -import os.path - -CUR_PATH = os.path.dirname(os.path.abspath(__file__)) -DATA_PATH = os.path.join(os.path.dirname(CUR_PATH), 'test_data') - - -def make_rdb_session(db_url='sqlite:///tmp_montage.db'): - from sqlalchemy import create_engine - from sqlalchemy.orm import sessionmaker - - # echo="debug" also prints results of selects, etc. - engine = create_engine(db_url, echo=True) - Base.metadata.create_all(engine) - - session_type = sessionmaker() - session_type.configure(bind=engine) - session = session_type() - return session - - def create_initial_tasks(rdb_session, round): """this creates the initial tasks. @@ -354,62 +334,24 @@ def create_initial_tasks(rdb_session, round): .order_by(rand_func).all() to_process = itertools.chain.from_iterable([shuffled_entries] * quorum) - per_juror_count = len(shuffled_entries) * (float(quorum) / len(jurors)) + per_juror = int(ceil(len(shuffled_entries) * (float(quorum) / len(jurors)))) - juror_iters = [itertools.repeat(j, per_juror_count) for j in jurors] + juror_iters = itertools.chain.from_iterable([itertools.repeat(j, per_juror) + for j in jurors]) - for entry, juror in itertools.izip_longest(to_process, juror_iters, None): + pairs = itertools.izip_longest(to_process, juror_iters, fillvalue=None) + for entry, juror in pairs: if juror is None: raise RuntimeError('should never run out of jurors first') if entry is None: break - task = Task(user=juror, entry=entry, round=round) + task = Task(user=juror, round_entry=entry) ret.append(task) return ret -def make_fake_data(debug=True): - from loaders import load_full_csv - - rdb_session = make_rdb_session() - coord = User(username='Slaporte') - juror = User(username='MahmoudHashemi') - - campaign = Campaign(name='Test Campaign 2016') - rdb_session.add(campaign) - - campaign.coords.append(coord) - round = Round(name='Test Round 1', quorum=1) - campaign.rounds.append(round) - round.jurors.append(juror) - - CSV_PATH = DATA_PATH + '/wlm2015_ir_5.csv' - - with open(CSV_PATH) as f: - entries = load_full_csv(f) - - for entry in entries: - round.entries.append(entry) - - create_initial_tasks(rdb_session, round) - - rdb_session.commit() - if debug: - import pdb;pdb.set_trace() - return - - -def main(): - make_fake_data() - return - - -if __name__ == '__main__': - main() - - """ * Indexes * db session management, engine creation, and schema creation separation diff --git a/run_data_test.py b/run_data_test.py new file mode 100644 index 00000000..bcbb4c54 --- /dev/null +++ b/run_data_test.py @@ -0,0 +1,62 @@ + +import os.path + +from montage.rdb import User, Campaign, Round, Base, create_initial_tasks +from montage.loaders import load_full_csv + +CUR_PATH = os.path.dirname(os.path.abspath(__file__)) +DATA_PATH = os.path.join(CUR_PATH, 'test_data') + + +def make_rdb_session(db_url='sqlite:///tmp_montage.db', echo=True): + from sqlalchemy import create_engine + from sqlalchemy.orm import sessionmaker + + # echo="debug" also prints results of selects, etc. + engine = create_engine(db_url, echo=echo) + Base.metadata.create_all(engine) + + session_type = sessionmaker() + session_type.configure(bind=engine) + session = session_type() + return session + + +def make_fake_data(debug=True, echo=True): + rdb_session = make_rdb_session(echo=echo) + coord = User(username='Slaporte') + juror = User(username='MahmoudHashemi') + + campaign = Campaign(name='Test Campaign 2016') + rdb_session.add(campaign) + + campaign.coords.append(coord) + round = Round(name='Test Round 1', quorum=1) + campaign.rounds.append(round) + round.jurors.append(juror) + + CSV_PATH = DATA_PATH + '/wlm2015_fr_11k.csv' + + with open(CSV_PATH) as f: + entries = load_full_csv(f) + + for entry in entries: + round.entries.append(entry) + rdb_session.commit() + + tasks = create_initial_tasks(rdb_session, round) + rdb_session.add_all(tasks) + + rdb_session.commit() + if debug: + import pdb;pdb.set_trace() + return + + +def main(): + make_fake_data(False, False) + return + + +if __name__ == '__main__': + main() diff --git a/test_data/wlm2015_fr_11874.csv b/test_data/wlm2015_fr_12k.csv similarity index 100% rename from test_data/wlm2015_fr_11874.csv rename to test_data/wlm2015_fr_12k.csv