-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Tester: Add script for pre-running all tests
- Loading branch information
1 parent
28cb2d4
commit a113334
Showing
6 changed files
with
418 additions
and
134 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
delete from entries; | ||
delete from users; | ||
delete from submissions; | ||
delete from grades; | ||
delete from notes; | ||
delete from gradenotes; | ||
delete from codeedits; | ||
delete from fixes; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
create table if not exists entries ( | ||
id integer primary key, | ||
title text not null, | ||
cluster_id integer not null, | ||
question_number integer not null, | ||
tab_id integer not null, | ||
'text' text not null | ||
); | ||
|
||
create table if not exists users ( | ||
id integer primary key, | ||
username text not null, | ||
session_id integer, /* A null session ID means one has to be generated */ | ||
unique(username) | ||
); | ||
insert or ignore into users (username) values | ||
('admin1'), | ||
('admin2'), | ||
('admin3'), | ||
('admin4'), | ||
('user1'), | ||
('user2'), | ||
('user3'), | ||
('user4'), | ||
('user5'), | ||
('user6'), | ||
('user7'), | ||
('user8'), | ||
('user9'), | ||
('user10'); | ||
|
||
create table if not exists testresults ( | ||
id integer primary key, | ||
submission_id integer not null, | ||
test_case_index integer not null, | ||
input_values text not null, | ||
success boolean not null, | ||
expected text not null, | ||
observed text, | ||
foreign key (submission_id) references submissions(id), | ||
unique(submission_id, test_case_index) | ||
); | ||
|
||
create table if not exists submissions ( | ||
id integer primary key, | ||
question_number integer not null, | ||
submission_id integer not null, | ||
code text note null, | ||
unique(question_number, submission_id) | ||
); | ||
|
||
create table if not exists grades ( | ||
id integer primary key, | ||
session_id integer note null, | ||
question_number integer not null, | ||
submission_id integer not null, | ||
grade float not null, | ||
unique(session_id, question_number, submission_id) | ||
); | ||
|
||
create table if not exists notes ( | ||
id integer primary key, | ||
session_id integer not null, | ||
'text' text not null, | ||
unique(id, 'text') | ||
); | ||
|
||
create table if not exists gradenotes ( | ||
id integer primary key, | ||
note_id integer not null, | ||
grade_id integer not null, | ||
foreign key (note_id) references notes(id), | ||
foreign key (grade_id) references grades(id), | ||
unique(note_id, grade_id) | ||
); | ||
|
||
create table if not exists codeedits ( | ||
id integer primary key, | ||
question_number integer not null, | ||
submission_id integer not null, | ||
code text not null, | ||
unique(question_number, submission_id) | ||
); | ||
|
||
create table if not exists fixes ( | ||
id integer primary key, | ||
session_id integer not null, | ||
question_number integer not null, | ||
submission_id integer not null, | ||
fixed_submission_id integer not null, /* which submission was fixed to produce this fix */ | ||
before text not null, | ||
after text not null, | ||
unique(session_id, question_number, submission_id, fixed_submission_id) | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,260 @@ | ||
import sys | ||
import signal | ||
from multiprocessing import Manager, Process | ||
from io import StringIO | ||
import time | ||
import inspect | ||
|
||
|
||
PROCESS_TIMEOUT = .5 | ||
|
||
|
||
TEST_CONDITIONS = { | ||
0: { | ||
'function_name': 'accumulate', | ||
'input_value_tuples': [ | ||
(lambda x, y: x + y, 11, 5, lambda x: x), | ||
(lambda x, y: x + y, 0, 5, lambda x: x), | ||
(lambda x, y: x * y, 2, 3, lambda x: x * x), | ||
(lambda x, y: x + y, 11, 0, lambda x: x), | ||
(lambda x, y: x + y, 11, 3, lambda x: x * x), | ||
], | ||
'expected_outputs': [ | ||
26, | ||
15, | ||
72, | ||
11, | ||
25, | ||
], | ||
}, | ||
1: { | ||
'function_name': 'product', | ||
'input_values_tuples': [ | ||
(3, lambda x: x), | ||
(5, lambda x: x), | ||
(3, lambda x: x * x), | ||
(5, lambda x: x * x), | ||
], | ||
'expected_outputs': [ | ||
6, | ||
120, | ||
36, | ||
14400, | ||
], | ||
}, | ||
} | ||
|
||
class timeout(object): | ||
''' REUSE: Adapted from source code for UC Berkeley CS 61A auto-grader. ''' | ||
|
||
def __init__(self, seconds, error_message="Timeout"): | ||
self.seconds = seconds | ||
self.error_message = error_message | ||
|
||
def handle_timeout(self, signum, frame): | ||
raise TimeoutError(self.error_message) | ||
|
||
def __enter__(self): | ||
signal.signal(signal.SIGALRM, self.handle_timeout) | ||
signal.setitimer(signal.ITIMER_REAL, self.seconds) | ||
|
||
def __exit__(self, type, value, traceback): | ||
signal.alarm(0) | ||
|
||
|
||
|
||
""" | ||
def repeated(f, n):\n \"\"\"Return the function that computes the nth application of f.\n\n >>> add_three = repeated(increment, 3)\n >>> add_three(5)\n 8\n >>> repeated(triple, 5)(1) # 3 * 3 * 3 * 3 * 3 * 1\n 243\n >>> repeated(square, 2)(5) # square(square(5))\n 625\n >>> repeated(square, 4)(5) # square(square(square(square(5))))\n 152587890625\n >>> repeated(square, 0)(5)\n 5\n \"\"\"\n d | ||
'function_name': 'repeated', | ||
'input_values_tuples': [ | ||
(lambda x: x + 1, 3), | ||
], | ||
'expected_outputs': [ | ||
], | ||
} | ||
} | ||
""" | ||
|
||
|
||
def stringify_input(input_values): | ||
''' Expects input_values to be provided as a list. ''' | ||
|
||
for input_value in input_values: | ||
# We use a heuristic if there are lambdas: | ||
# Just find the source code of the line where this set of examples | ||
# was defined, and add that source code line here. | ||
if callable(input_value): | ||
return inspect.getsource(input_value).strip() | ||
|
||
return str(input_values) | ||
|
||
|
||
def stringify_output(result): | ||
|
||
if not result['compile_success']: | ||
pass | ||
elif result['timeout']: | ||
pass | ||
elif not result['exec_success']: | ||
result['exec_exception']['type'] = result['exec_exception']['type'].__name__ | ||
elif not result['runtime_success']: | ||
result['runtime_exception']['type'] = result['runtime_exception']['type'].__name__ | ||
|
||
if result['runtime_success']: | ||
stringified = result['returned'] | ||
elif not result['compile_success']: | ||
stringified = 'N/A' | ||
elif result['timeout']: | ||
stringified = 'Timeout' | ||
elif not result['exec_success']: | ||
stringified = result['exec_exception']['type'] | ||
elif not result['runtime_success']: | ||
stringified = result['runtime_exception']['type'] | ||
|
||
return stringified | ||
|
||
|
||
def evaluate_function(code_text, function_name, input_value_tuples, expected_outputs): | ||
|
||
results = { | ||
'overall_success': False, | ||
'test_cases': [] | ||
} | ||
|
||
# Compute the result of each individual test | ||
for test_index in range(len(input_value_tuples)): | ||
results['test_cases'].append(evaluate_function_once( | ||
code_text=code_text, | ||
function_name=function_name, | ||
input_values=input_value_tuples[test_index], | ||
expected_output=expected_outputs[test_index], | ||
)) | ||
|
||
# Compute the overall success across all tests | ||
results['overall_success'] = all(r['success'] for r in results['test_cases']) | ||
|
||
return results | ||
|
||
|
||
def evaluate_function_once(code_text, function_name, input_values, expected_output): | ||
|
||
cant_run_code = False | ||
result = { | ||
'input_values': input_values, | ||
'expected': expected_output, | ||
'success': False, # we assume the test case failed until it completely succeeds | ||
'exec_success': False, | ||
'timeout': False, | ||
'runtime_success': False, | ||
} | ||
|
||
# Save the original stdout so we can resume them after this function runs | ||
original_stdout = sys.stdout | ||
|
||
# Compile the code to runnable form | ||
try: | ||
code = compile(code_text, '<string>', 'exec') | ||
result['compile_success'] = True | ||
except SyntaxError as s: | ||
result['compile_success'] = False | ||
result['syntax_error'] = { | ||
'lineno': s.lineno, | ||
'offset': s.offset, | ||
'msg': s.msg, | ||
'text': s.text, | ||
} | ||
cant_run_code = True | ||
|
||
# Set up fresh scopes for the code to run within | ||
local_scope = {} | ||
global_scope = {} | ||
|
||
if cant_run_code is False: | ||
|
||
# Run the code to capture the function definition | ||
try: | ||
exec(code, global_scope, local_scope) | ||
result['exec_success'] = True | ||
except Exception as e: | ||
result['exec_exception'] = { | ||
'type': type(e), | ||
'args': e.args, | ||
} | ||
cant_run_code = True | ||
|
||
if cant_run_code is False: | ||
|
||
def run_function(function, function_name, input_values, result): | ||
|
||
# It's critical to do a few things here: | ||
# 1. Transfer the input values into the sandbox scope | ||
# 2. Transfer the function name into the sandbox global scope so it can be called | ||
# recursively (otherwise, it can't be found for recursive calls) | ||
# 3. Store the output in a sandbox variable and retrieve it later. | ||
local_scope['input_values'] = input_values | ||
global_scope[function_name] = local_scope[function_name] | ||
|
||
# Create a new version of stdout to capture what gets printed | ||
capturable_stdout = StringIO() | ||
sys.stdout = capturable_stdout | ||
|
||
try: | ||
exec('output = ' + function_name + '(*input_values)', global_scope, local_scope) | ||
output = local_scope['output'] | ||
result['returned'] = output | ||
result['stdout'] = capturable_stdout.getvalue() | ||
result['runtime_success'] = True | ||
# If we catch a TimeoutError, this is probably from the timer that we built | ||
# to keep programs from executing for too long. Propagate it up to the next | ||
# level, where we can catch it and include the exception in the results. | ||
except TimeoutError as te: | ||
raise te | ||
except Exception as e: | ||
result['runtime_exception'] = { | ||
'type': type(e), | ||
'args': e.args, | ||
} | ||
|
||
# Create a new process to run the test function, so we can terminate it if it loops | ||
result_shared_data = {} | ||
try: | ||
with timeout(seconds=.5): | ||
run_function(local_scope[function_name], function_name, input_values, result_shared_data) | ||
except TimeoutError: | ||
result['timeout'] = True | ||
|
||
''' | ||
result_shared_data = Manager().dict() | ||
process = Process( | ||
target=run_function, | ||
args=( | ||
local_scope[function_name], | ||
function_name, | ||
input_values, | ||
result_shared_data, | ||
) | ||
)a | ||
# Run the function, and terminate it if it runs too long | ||
process.start() | ||
start = time.time() | ||
while time.time() < start + PROCESS_TIMEOUT: | ||
if process.is_alive(): | ||
time.sleep(.1) | ||
else: | ||
result['timeout'] = False | ||
break | ||
process.terminate() | ||
''' | ||
|
||
# Merge the results from the function that was run with the main results | ||
result.update(result_shared_data) | ||
result['success'] = (result['returned'] == expected_output) if 'returned' in result else False | ||
|
||
# Return stdout to original | ||
sys.stdout = original_stdout | ||
|
||
return result |
Oops, something went wrong.