Skip to content

Commit d749a40

Browse files
authored
Merge pull request #187 from galaxyproject/dev
Release 2.3.0
2 parents 19f901e + 8d183ad commit d749a40

8 files changed

Lines changed: 178 additions & 40 deletions

File tree

abm/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.2.0
1+
2.3.0

abm/lib/benchmark.py

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import logging
66
import argparse
77
from lib import Keys, INVOCATIONS_DIR, METRICS_DIR
8-
from lib.common import connect, Context
8+
from lib.common import connect, Context, print_json
99
from bioblend.galaxy import GalaxyInstance
1010

1111
log = logging.getLogger('abm')
@@ -122,19 +122,37 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
122122
print(f'ERROR: Invalid input specification for {spec[Keys.NAME]}')
123123
return False
124124

125-
dsname = spec[Keys.DATASET_ID]
126-
input_names.append(dsname)
127-
#inputs.append(dsname)
128-
# dsid = find_dataset_id(gi, dsname)
129-
dsdata = _get_dataset_data(gi, dsname)
130-
if dsdata is None:
131-
raise Exception(f"ERROR: unable to resolve {dsname} to a dataset.")
132-
dsid = dsdata['id']
133-
dssize = dsdata['size']
134-
input_data_size.append(dssize)
135-
print(f"Input dataset ID: {dsname} [{dsid}] {dssize}")
136-
inputs[input[0]] = {'id': dsid, 'src': 'hda', 'size': dssize}
137-
125+
if 'value' in spec:
126+
inputs[input[0]] = spec['value']
127+
print(f"Input data value: {spec['value']}")
128+
elif 'collection' in spec:
129+
dsname = spec['collection']
130+
input_names.append(dsname)
131+
#inputs.append(dsname)
132+
# dsid = find_dataset_id(gi, dsname)
133+
dsdata = _get_dataset_data(gi, dsname)
134+
if dsdata is None:
135+
raise Exception(f"ERROR: unable to resolve {dsname} to a dataset.")
136+
dsid = dsdata['id']
137+
dssize = dsdata['size']
138+
input_data_size.append(dssize)
139+
print(f"Input dataset ID: {dsname} [{dsid}] {dssize}")
140+
inputs[input[0]] = {'id': dsid, 'src': 'hdca', 'size': dssize}
141+
elif Keys.DATASET_ID in spec:
142+
dsname = spec[Keys.DATASET_ID]
143+
input_names.append(dsname)
144+
#inputs.append(dsname)
145+
# dsid = find_dataset_id(gi, dsname)
146+
dsdata = _get_dataset_data(gi, dsname)
147+
if dsdata is None:
148+
raise Exception(f"ERROR: unable to resolve {dsname} to a dataset.")
149+
dsid = dsdata['id']
150+
dssize = dsdata['size']
151+
input_data_size.append(dssize)
152+
print(f"Input dataset ID: {dsname} [{dsid}] {dssize}")
153+
inputs[input[0]] = {'id': dsid, 'src': 'hda', 'size': dssize}
154+
else:
155+
raise Exception(f'Invalid input value')
138156
print(f"Running workflow {wfid}")
139157
new_history_name = output_history_name
140158
if history_prefix is not None:
@@ -415,12 +433,33 @@ def make_result(data):
415433
try:
416434
datasets = gi.datasets.get_datasets(name=name_or_id) # , deleted=True, purged=True)
417435
for ds in datasets:
418-
if ds['state'] == 'ok' and not ds['deleted'] and ds['visible']:
436+
print_json(ds)
437+
state = True
438+
if 'state' in ds:
439+
state = ds['state'] == 'ok'
440+
if state and not ds['deleted'] and ds['visible']:
419441
# The dict returned by get_datasets does not include the input
420442
# file sizes so we need to make another call to show_datasets
421443
return make_result(gi.datasets.show_dataset(ds['id']))
444+
# if ds['state'] == 'ok':
445+
# print('state is ok')
446+
# if ds['deleted']:
447+
# print('dataset deleted')
448+
# else:
449+
# print('dataset not deleted')
450+
# if ds['visible']:
451+
# print('dataset visible')
452+
# else:
453+
# print('dataset not visible')
422454
except Exception as e:
423-
print(e)
455+
pass
424456

425457
return None
426458

459+
460+
from pprint import pprint
461+
def test(context:Context, args:list):
462+
id = 'c90fffcf98b31cd3'
463+
gi = connect(context)
464+
inputs = gi.workflows.get_workflow_inputs(id, 'PE fastq input')
465+
pprint(inputs)

abm/lib/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ def __init__(self, *args):
4141

4242

4343

44-
def print_json(obj):
45-
print(json.dumps(obj, indent=2))
44+
def print_json(obj, indent=2):
45+
print(json.dumps(obj, indent=indent))
4646

4747

4848
def print_yaml(obj):

abm/lib/dataset.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import os
88
import yaml
99

10-
1110
def list(context: Context, args: list):
1211
gi = connect(context)
1312
kwargs = {
@@ -29,11 +28,10 @@ def list(context: Context, args: list):
2928
print('No datasets found')
3029
return
3130
print(f'Found {len(datasets)} datasets')
32-
print('ID\tHistory\tDeleted\tState\tName')
31+
print('ID\tHistory\tType\tDeleted\tState\tName')
3332
for dataset in datasets:
3433
state = dataset['state'] if 'state' in dataset else 'unknown'
35-
print(f"{dataset['id']}\t{dataset['history_id']}\t{dataset['deleted']}\t{state}\t{dataset['name']}")
36-
#pprint(dataset)
34+
print(f"{dataset['id']}\t{dataset['history_id']}\t{dataset['history_content_type']}\t{dataset['deleted']}\t{state}\t{dataset['name']}")
3735

3836

3937
def clean(context: Context, args: list):
@@ -62,6 +60,15 @@ def show(context: Context, args: list):
6260
print(json.dumps(result, indent=4))
6361

6462

63+
def get(context: Context, args: list):
64+
if len(args) == 0:
65+
print("ERROR: no dataset ID provided")
66+
return
67+
gi = connect(context)
68+
result = gi.datasets.get_datasets(args[0])
69+
print(json.dumps(result, indent=4))
70+
71+
6572
def delete(context: Context, args: list):
6673
# gi = connect(context)
6774
print("dataset delete not implemented")

abm/lib/experiment.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import helm
77
import benchmark
88
import logging
9+
import traceback
910
from common import load_profiles, Context
1011
from time import perf_counter
1112
from datetime import timedelta
@@ -113,20 +114,24 @@ def summarize(context: Context, args: list):
113114
if separator is not None:
114115
print('ERROR: The output format is specified more than once')
115116
return
117+
print('tsv')
116118
separator = '\t'
117119
elif arg in ['-c', '--csv']:
118120
if separator is not None:
119121
print('ERROR: The output format is specified more than once')
120122
return
121123
separator = ','
124+
print('csv')
122125
elif arg in ['-m', '--model']:
123126
if separator is not None:
124127
print('ERROR: The output format is specified more than once')
125128
return
129+
print('making a model')
126130
separator = ','
127131
make_row = make_model_row
128132
header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2"
129133
else:
134+
print(f"Input dir {arg}")
130135
input_dirs.append(arg)
131136

132137
if len(input_dirs) == 0:
@@ -142,32 +147,35 @@ def summarize(context: Context, args: list):
142147
if not os.path.isfile(input_path) or not input_path.endswith('.json'):
143148
continue
144149
try:
150+
print(f"Loading {input_path}")
145151
with open(input_path, 'r') as f:
146152
data = json.load(f)
147-
if data['metrics']['tool_id'] == 'upload1':
153+
if data['job_metrics']['tool_id'] == 'upload1':
154+
print('Ignoring upload tool')
148155
continue
149156
row = make_row(data)
150157
print(separator.join([ str(x) for x in row]))
151158
except Exception as e:
152159
# Silently fail to allow the remainder of the table to be generated.
153-
# print(f"Unable to process {input_path}")
154-
# print(e)
155-
pass
160+
print(f"Unable to process {input_path}")
161+
print(e)
162+
traceback.print_exc( )
163+
#pass
156164

157165

158166
accept_metrics = ['galaxy_slots', 'galaxy_memory_mb', 'runtime_seconds', 'cpuacct.usage','memory.limit_in_bytes', 'memory.max_usage_in_bytes'] #,'memory.soft_limit_in_bytes']
159167

160168
def make_table_row(data: dict):
161169
row = [ str(data[key]) for key in ['run', 'cloud', 'job_conf', 'workflow_id', 'history_id', 'inputs']]
162-
row.append(parse_toolid(data['metrics']['tool_id']))
163-
row.append(data['metrics']['state'])
164-
for e in _get_metrics(data['metrics']['job_metrics']):
170+
row.append(parse_toolid(data['job_metrics']['tool_id']))
171+
row.append(data['job_metrics']['state'])
172+
for e in _get_metrics(data['job_metrics']['job_metrics']):
165173
row.append(e)
166174
return row
167175

168176

169177
def make_model_row(data: dict):
170-
metrics = data['metrics']
178+
metrics = data['job_metrics']
171179
row = []
172180
row.append(metrics['id'])
173181
tool_id = metrics['tool_id']

abm/lib/job.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
def list(context: Context, args: list):
1010
state = ''
11+
history_id = None
1112
log.debug('Processing args')
1213
log_state = False
1314
while len(args) > 0:
@@ -18,14 +19,16 @@ def list(context: Context, args: list):
1819
return
1920
state = args.pop(0)
2021
log_state = True
21-
22+
elif arg in ['-h', '--history']:
23+
history_id = args.pop(0)
24+
log.debug(f"Getting jobs from history {history_id}")
2225
log.debug('Connecting to the Galaxy server')
2326
gi = connect(context)
2427
if log_state:
2528
log.debug(f"Getting jobs with state {state}")
2629
else:
2730
log.debug("Getting job list")
28-
job_list = gi.jobs.get_jobs(state=state)
31+
job_list = gi.jobs.get_jobs(state=state, history_id=history_id)
2932
log.debug(f"Iterating over job list with {len(job_list)} items")
3033
for job in job_list:
3134
print(f"{job['id']}\t{job['state']}\t{job['update_time']}\t{job['tool_id']}")
@@ -51,7 +54,30 @@ def metrics(context: Context, args: list):
5154
print("ERROR: no job ID provided")
5255
return
5356
gi = connect(context)
54-
metrics = gi.jobs.get_metrics(args[0])
57+
if len(args) > 1:
58+
arg = args.pop(0)
59+
if arg in ['-h', '--history']:
60+
history_id = args.pop(0)
61+
log.debug(f"Getting metrics for jobs from history {history_id}")
62+
job_list = gi.jobs.get_jobs(history_id=history_id)
63+
metrics = []
64+
for job in job_list:
65+
metrics.append({
66+
'job_id': job['id'],
67+
'job_state': job['state'],
68+
'tool_id': job['tool_id'],
69+
'job_metrics': gi.jobs.get_metrics(job['id'])
70+
})
71+
else:
72+
print(f"ERROR: Unrecognized argument {arg}")
73+
else:
74+
job = gi.jobs.show_job(args[0])
75+
metrics = [{
76+
'job_id': job['id'],
77+
'job_state': job['state'],
78+
'tool_id': job['tool_id'],
79+
'job_metrics': gi.jobs.get_metrics(args[0])
80+
}]
5581
print(json.dumps(metrics, indent=4))
5682
# metrics = {}
5783
# for m in gi.jobs.get_metrics(args[0]):
@@ -78,4 +104,4 @@ def problems(context: Context, args: list):
78104
print('ERROR: no job ID provided.')
79105
return
80106
gi = connect(context)
81-
pprint(gi.jobs.get_common_problems(args[0]))
107+
pprint(gi.jobs.get_common_problems(args[0]))

abm/lib/menu.yml

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
- name:
1+
- name:
22
- benchmark
33
- bench
44
help: 'manage benchmarks'
@@ -15,6 +15,10 @@
1515
handler: benchmark.validate
1616
help: validate that workflow and dataset names can be translated into IDs
1717
params: PATH
18+
- name: [test]
19+
handler: benchmark.test
20+
help: experimental code
21+
params: VARIES
1822
- name:
1923
- workflow
2024
- wf
@@ -55,6 +59,14 @@
5559
handler: workflow.rename
5660
params: "ID 'new workflow name'"
5761
help: "rename a workflow on the Galaxy server"
62+
- name: ['invocation']
63+
handler: workflow.invocation
64+
params: "--workflow WORKFLOW_ID --invocation INVOCATION_ID"
65+
help: show details about a specific workflow invocation
66+
- name: ['inputs']
67+
help: list inputs required by a workflow
68+
params: WORKFLOW_ID
69+
handler: workflow.inputs
5870
- name: ['test']
5971
handler: workflow.test
6072
help: run some test code
@@ -86,6 +98,10 @@
8698
handler: dataset.show
8799
params: ID
88100
help: show detailed information on a dataset
101+
- name: ['get']
102+
handler: dataset.get
103+
params: NAME_OR_ID
104+
help: show information for a given dataset name or ID
89105
- name: [cleanup, clean, clear]
90106
handler: dataset.clean
91107
params: "[STATE [STATE...]]"
@@ -158,9 +174,9 @@
158174
help: manage jobs on the server
159175
menu:
160176
- name: [ list, ls ]
161-
help: list all jobs, or jobs in a particular state
177+
help: list all jobs, or jobs in a particular state. Can filter by a history.
162178
handler: job.list
163-
params: "[-s|--state ok|running|error|waiting]"
179+
params: "[-s|--state ok|running|error|waiting] [-h|--history_id historyID]"
164180
- name: [ show ]
165181
help: show detailed information about a job
166182
handler: job.show
@@ -174,9 +190,9 @@
174190
handler: job.cancel
175191
params: ID
176192
- name: [ metrics, stats ]
177-
help: display runtime metrics for the job
193+
help: display runtime metrics for the job, or a list of jobs contained in a history
178194
handler: job.metrics
179-
params: ID
195+
params: "[ID | -h|--history historyID]"
180196
- name: [users, user]
181197
help: manage users on the Galaxy instance
182198
menu:

0 commit comments

Comments
 (0)