Skip to content

Commit

Permalink
Tester: Add script for pre-running all tests
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewhead committed Oct 17, 2016
1 parent 28cb2d4 commit a113334
Show file tree
Hide file tree
Showing 6 changed files with 418 additions and 134 deletions.
8 changes: 8 additions & 0 deletions db/clear.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
delete from entries;
delete from users;
delete from submissions;
delete from grades;
delete from notes;
delete from gradenotes;
delete from codeedits;
delete from fixes;
94 changes: 94 additions & 0 deletions db/init.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
create table if not exists entries (
id integer primary key,
title text not null,
cluster_id integer not null,
question_number integer not null,
tab_id integer not null,
'text' text not null
);

create table if not exists users (
id integer primary key,
username text not null,
session_id integer, /* A null session ID means one has to be generated */
unique(username)
);
insert or ignore into users (username) values
('admin1'),
('admin2'),
('admin3'),
('admin4'),
('user1'),
('user2'),
('user3'),
('user4'),
('user5'),
('user6'),
('user7'),
('user8'),
('user9'),
('user10');

create table if not exists testresults (
id integer primary key,
submission_id integer not null,
test_case_index integer not null,
input_values text not null,
success boolean not null,
expected text not null,
observed text,
foreign key (submission_id) references submissions(id),
unique(submission_id, test_case_index)
);

create table if not exists submissions (
id integer primary key,
question_number integer not null,
submission_id integer not null,
code text note null,
unique(question_number, submission_id)
);

create table if not exists grades (
id integer primary key,
session_id integer note null,
question_number integer not null,
submission_id integer not null,
grade float not null,
unique(session_id, question_number, submission_id)
);

create table if not exists notes (
id integer primary key,
session_id integer not null,
'text' text not null,
unique(id, 'text')
);

create table if not exists gradenotes (
id integer primary key,
note_id integer not null,
grade_id integer not null,
foreign key (note_id) references notes(id),
foreign key (grade_id) references grades(id),
unique(note_id, grade_id)
);

create table if not exists codeedits (
id integer primary key,
question_number integer not null,
submission_id integer not null,
code text not null,
unique(question_number, submission_id)
);

create table if not exists fixes (
id integer primary key,
session_id integer not null,
question_number integer not null,
submission_id integer not null,
fixed_submission_id integer not null, /* which submission was fixed to produce this fix */
before text not null,
after text not null,
unique(session_id, question_number, submission_id, fixed_submission_id)
);
260 changes: 260 additions & 0 deletions evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
import sys
import signal
from multiprocessing import Manager, Process
from io import StringIO
import time
import inspect


PROCESS_TIMEOUT = .5


TEST_CONDITIONS = {
0: {
'function_name': 'accumulate',
'input_value_tuples': [
(lambda x, y: x + y, 11, 5, lambda x: x),
(lambda x, y: x + y, 0, 5, lambda x: x),
(lambda x, y: x * y, 2, 3, lambda x: x * x),
(lambda x, y: x + y, 11, 0, lambda x: x),
(lambda x, y: x + y, 11, 3, lambda x: x * x),
],
'expected_outputs': [
26,
15,
72,
11,
25,
],
},
1: {
'function_name': 'product',
'input_values_tuples': [
(3, lambda x: x),
(5, lambda x: x),
(3, lambda x: x * x),
(5, lambda x: x * x),
],
'expected_outputs': [
6,
120,
36,
14400,
],
},
}

class timeout(object):
''' REUSE: Adapted from source code for UC Berkeley CS 61A auto-grader. '''

def __init__(self, seconds, error_message="Timeout"):
self.seconds = seconds
self.error_message = error_message

def handle_timeout(self, signum, frame):
raise TimeoutError(self.error_message)

def __enter__(self):
signal.signal(signal.SIGALRM, self.handle_timeout)
signal.setitimer(signal.ITIMER_REAL, self.seconds)

def __exit__(self, type, value, traceback):
signal.alarm(0)



"""
def repeated(f, n):\n \"\"\"Return the function that computes the nth application of f.\n\n >>> add_three = repeated(increment, 3)\n >>> add_three(5)\n 8\n >>> repeated(triple, 5)(1) # 3 * 3 * 3 * 3 * 3 * 1\n 243\n >>> repeated(square, 2)(5) # square(square(5))\n 625\n >>> repeated(square, 4)(5) # square(square(square(square(5))))\n 152587890625\n >>> repeated(square, 0)(5)\n 5\n \"\"\"\n d
'function_name': 'repeated',
'input_values_tuples': [
(lambda x: x + 1, 3),
],
'expected_outputs': [
],
}
}
"""


def stringify_input(input_values):
''' Expects input_values to be provided as a list. '''

for input_value in input_values:
# We use a heuristic if there are lambdas:
# Just find the source code of the line where this set of examples
# was defined, and add that source code line here.
if callable(input_value):
return inspect.getsource(input_value).strip()

return str(input_values)


def stringify_output(result):

if not result['compile_success']:
pass
elif result['timeout']:
pass
elif not result['exec_success']:
result['exec_exception']['type'] = result['exec_exception']['type'].__name__
elif not result['runtime_success']:
result['runtime_exception']['type'] = result['runtime_exception']['type'].__name__

if result['runtime_success']:
stringified = result['returned']
elif not result['compile_success']:
stringified = 'N/A'
elif result['timeout']:
stringified = 'Timeout'
elif not result['exec_success']:
stringified = result['exec_exception']['type']
elif not result['runtime_success']:
stringified = result['runtime_exception']['type']

return stringified


def evaluate_function(code_text, function_name, input_value_tuples, expected_outputs):

results = {
'overall_success': False,
'test_cases': []
}

# Compute the result of each individual test
for test_index in range(len(input_value_tuples)):
results['test_cases'].append(evaluate_function_once(
code_text=code_text,
function_name=function_name,
input_values=input_value_tuples[test_index],
expected_output=expected_outputs[test_index],
))

# Compute the overall success across all tests
results['overall_success'] = all(r['success'] for r in results['test_cases'])

return results


def evaluate_function_once(code_text, function_name, input_values, expected_output):

cant_run_code = False
result = {
'input_values': input_values,
'expected': expected_output,
'success': False, # we assume the test case failed until it completely succeeds
'exec_success': False,
'timeout': False,
'runtime_success': False,
}

# Save the original stdout so we can resume them after this function runs
original_stdout = sys.stdout

# Compile the code to runnable form
try:
code = compile(code_text, '<string>', 'exec')
result['compile_success'] = True
except SyntaxError as s:
result['compile_success'] = False
result['syntax_error'] = {
'lineno': s.lineno,
'offset': s.offset,
'msg': s.msg,
'text': s.text,
}
cant_run_code = True

# Set up fresh scopes for the code to run within
local_scope = {}
global_scope = {}

if cant_run_code is False:

# Run the code to capture the function definition
try:
exec(code, global_scope, local_scope)
result['exec_success'] = True
except Exception as e:
result['exec_exception'] = {
'type': type(e),
'args': e.args,
}
cant_run_code = True

if cant_run_code is False:

def run_function(function, function_name, input_values, result):

# It's critical to do a few things here:
# 1. Transfer the input values into the sandbox scope
# 2. Transfer the function name into the sandbox global scope so it can be called
# recursively (otherwise, it can't be found for recursive calls)
# 3. Store the output in a sandbox variable and retrieve it later.
local_scope['input_values'] = input_values
global_scope[function_name] = local_scope[function_name]

# Create a new version of stdout to capture what gets printed
capturable_stdout = StringIO()
sys.stdout = capturable_stdout

try:
exec('output = ' + function_name + '(*input_values)', global_scope, local_scope)
output = local_scope['output']
result['returned'] = output
result['stdout'] = capturable_stdout.getvalue()
result['runtime_success'] = True
# If we catch a TimeoutError, this is probably from the timer that we built
# to keep programs from executing for too long. Propagate it up to the next
# level, where we can catch it and include the exception in the results.
except TimeoutError as te:
raise te
except Exception as e:
result['runtime_exception'] = {
'type': type(e),
'args': e.args,
}

# Create a new process to run the test function, so we can terminate it if it loops
result_shared_data = {}
try:
with timeout(seconds=.5):
run_function(local_scope[function_name], function_name, input_values, result_shared_data)
except TimeoutError:
result['timeout'] = True

'''
result_shared_data = Manager().dict()
process = Process(
target=run_function,
args=(
local_scope[function_name],
function_name,
input_values,
result_shared_data,
)
)a
# Run the function, and terminate it if it runs too long
process.start()
start = time.time()
while time.time() < start + PROCESS_TIMEOUT:
if process.is_alive():
time.sleep(.1)
else:
result['timeout'] = False
break
process.terminate()
'''

# Merge the results from the function that was run with the main results
result.update(result_shared_data)
result['success'] = (result['returned'] == expected_output) if 'returned' in result else False

# Return stdout to original
sys.stdout = original_stdout

return result
Loading

0 comments on commit a113334

Please sign in to comment.