Skip to content

Commit 3a8465a

Browse files
donaldcampbelljrvreuterkhoroshevskyi
authored
0.14.0 Release (#200)
* try getting just stage name, but fall back to str representation of stage; close #197 * version 0.13.3a1 for pipestat 0.6.0a1 * updated to pipestat 0.6.0 * updated requirements * testing, drop python 3.7 * fix f-string quote issue for python 3.10 * minor refactor to use pipestat properties instead of cfg dict * update changelog and version number * update v0.13.3 and changelog * fix _refresh_stats bug and change version to 0.14.0 * potential fix for #201 * changelog * v0.14.0a1 prerelease * report_object -> change message_raw to be a values dict to conform with pipestat output schemas * self.pipestat_results_file should take priority over self.pipeline_stats_file related to databio/pepatac#257 * make pipestat_results_file = pipeline_stats_file if it is not provided * set pipeline_stats_file if pipestat_results_file IS provided, remove checking for the first record_identifier during get_stat * add pipestat_pipeline_type, defaulting to sample * pipestat req version bump, v0.14.0a2 bump for pre-release * v0.14.0 release prep --------- Co-authored-by: Vince Reuter <vince.reuter@gmail.com> Co-authored-by: Khoroshevskyi <sasha99250@gmail.com>
1 parent 8aaede5 commit 3a8465a

File tree

9 files changed

+103
-77
lines changed

9 files changed

+103
-77
lines changed

.github/workflows/run-pytest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ${{ matrix.os }}
1212
strategy:
1313
matrix:
14-
python-version: ["3.7", "3.8", "3.9", "3.10"]
14+
python-version: ["3.8", "3.10"]
1515
os: [ubuntu-latest]
1616

1717
steps:

docs/changelog.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
# Changelog
22

3+
## [0.14.0] -- 2023-12-22
4+
### Changed
5+
- refactor for pipestat v0.6.0 release
6+
- drop python 2.7
7+
- updated requirements
8+
- changed message_raw to be a value_dict when reporting to conform to pipestat
9+
- ### Fixed
10+
- fixed #196 and #197
11+
- ### Added
12+
- added `force_overwrite` to `report_result` and `report_object`
13+
- added pipestat_pipeline_type, defaulting to sample-level
14+
315
## [0.13.2] -- 2023-08-02
416
### Fixed
517
- fixed self.new_start overriding checkpoints.

pypiper/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.13.2"
1+
__version__ = "0.14.0"

pypiper/manager.py

Lines changed: 74 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ def __init__(
141141
pipestat_schema=None,
142142
pipestat_results_file=None,
143143
pipestat_config=None,
144+
pipestat_pipeline_type=None,
144145
pipestat_result_formatter=None,
145146
**kwargs,
146147
):
@@ -329,32 +330,35 @@ def __init__(
329330
signal.signal(signal.SIGTERM, self._signal_term_handler)
330331

331332
# pipestat setup
332-
self.pipestat_sample_name = pipestat_sample_name or DEFAULT_SAMPLE_NAME
333-
# getattr(self, "sample_name", DEFAULT_SAMPLE_NAME)
333+
self.pipestat_record_identifier = pipestat_sample_name or DEFAULT_SAMPLE_NAME
334+
self.pipestat_pipeline_type = pipestat_pipeline_type or "sample"
334335

335336
# don't force default pipestat_results_file value unless
336337
# pipestat config not provided
337338
if pipestat_config is None and pipestat_results_file is None:
338-
pipestat_results_file = pipeline_filepath(
339-
self, filename="pipestat_results.yaml"
340-
)
339+
self.pipestat_results_file = self.pipeline_stats_file
340+
elif pipestat_results_file:
341+
self.pipestat_results_file = pipestat_results_file
342+
self.pipeline_stats_file = self.pipestat_results_file
341343

342344
def _get_arg(args_dict, arg_name):
343345
"""safely get argument from arg dict -- return None if doesn't exist"""
344346
return None if arg_name not in args_dict else args_dict[arg_name]
345347

346348
self._pipestat_manager = PipestatManager(
347-
sample_name=self.pipestat_sample_name
349+
record_identifier=self.pipestat_record_identifier
348350
or _get_arg(args_dict, "pipestat_sample_name")
349351
or DEFAULT_SAMPLE_NAME,
350352
pipeline_name=self.name,
351353
schema_path=pipestat_schema
352354
or _get_arg(args_dict, "pipestat_schema")
353355
or default_pipestat_output_schema(sys.argv[0]),
354-
results_file_path=self.pipeline_stats_file
355-
or _get_arg(args_dict, "pipestat_results_file"),
356+
results_file_path=self.pipestat_results_file
357+
or _get_arg(args_dict, "pipestat_results_file")
358+
or self.pipeline_stats_file,
356359
config_file=pipestat_config or _get_arg(args_dict, "pipestat_config"),
357360
multi_pipelines=multi,
361+
pipeline_type=self.pipestat_pipeline_type,
358362
)
359363

360364
self.start_pipeline(args, multi)
@@ -437,7 +441,7 @@ def _completed(self):
437441
:return bool: Whether the managed pipeline is in a completed state.
438442
"""
439443
return (
440-
self.pipestat.get_status(self._pipestat_manager.sample_name)
444+
self.pipestat.get_status(self._pipestat_manager.record_identifier)
441445
== COMPLETE_FLAG
442446
)
443447

@@ -448,7 +452,10 @@ def _failed(self):
448452
449453
:return bool: Whether the managed pipeline is in a failed state.
450454
"""
451-
return self.pipestat.get_status(self._pipestat_manager.sample_name) == FAIL_FLAG
455+
return (
456+
self.pipestat.get_status(self._pipestat_manager.record_identifier)
457+
== FAIL_FLAG
458+
)
452459

453460
@property
454461
def halted(self):
@@ -457,7 +464,8 @@ def halted(self):
457464
:return bool: Whether the managed pipeline is in a paused/halted state.
458465
"""
459466
return (
460-
self.pipestat.get_status(self._pipestat_manager.sample_name) == PAUSE_FLAG
467+
self.pipestat.get_status(self._pipestat_manager.record_identifier)
468+
== PAUSE_FLAG
461469
)
462470

463471
@property
@@ -720,11 +728,12 @@ def start_pipeline(self, args=None, multi=False):
720728
results = self._pipestat_manager.__str__().split("\n")
721729
for i in results:
722730
self.info("* " + i)
723-
self.info("* Sample name: " + self.pipestat_sample_name + "\n")
731+
self.info("* Sample name: " + self.pipestat_record_identifier + "\n")
724732
self.info("\n----------------------------------------\n")
725733
self.status = "running"
726734
self.pipestat.set_status(
727-
sample_name=self._pipestat_manager.sample_name, status_identifier="running"
735+
record_identifier=self._pipestat_manager.record_identifier,
736+
status_identifier="running",
728737
)
729738

730739
# Record the start in PIPE_profile and PIPE_commands output files so we
@@ -770,7 +779,8 @@ def _set_status_flag(self, status):
770779
prev_status = self.status
771780
self.status = status
772781
self.pipestat.set_status(
773-
sample_name=self._pipestat_manager.sample_name, status_identifier=status
782+
record_identifier=self._pipestat_manager.record_identifier,
783+
status_identifier=status,
774784
)
775785
self.debug("\nChanged status from {} to {}.".format(prev_status, self.status))
776786

@@ -786,8 +796,8 @@ def _flag_file_path(self, status=None):
786796
"""
787797

788798
flag_file_name = "{}_{}_{}".format(
789-
self._pipestat_manager["_pipeline_name"],
790-
self.pipestat_sample_name,
799+
self._pipestat_manager.pipeline_name,
800+
self.pipestat_record_identifier,
791801
flag_name(status or self.status),
792802
)
793803
return pipeline_filepath(self, filename=flag_file_name)
@@ -1419,7 +1429,7 @@ def _wait_for_lock(self, lock_file):
14191429
)
14201430
# self._set_status_flag(WAIT_FLAG)
14211431
self.pipestat.set_status(
1422-
sample_name=self._pipestat_manager.sample_name,
1432+
record_identifier=self._pipestat_manager.record_identifier,
14231433
status_identifier="waiting",
14241434
)
14251435
first_message_flag = True
@@ -1443,7 +1453,7 @@ def _wait_for_lock(self, lock_file):
14431453
self.timestamp("File unlocked.")
14441454
# self._set_status_flag(RUN_FLAG)
14451455
self.pipestat.set_status(
1446-
sample_name=self._pipestat_manager.sample_name,
1456+
record_identifier=self._pipestat_manager.record_identifier,
14471457
status_identifier="running",
14481458
)
14491459

@@ -1582,7 +1592,9 @@ def _report_profile(
15821592
with open(self.pipeline_profile_file, "a") as myfile:
15831593
myfile.write(message_raw + "\n")
15841594

1585-
def report_result(self, key, value, nolog=False, result_formatter=None):
1595+
def report_result(
1596+
self, key, value, nolog=False, result_formatter=None, force_overwrite=False
1597+
):
15861598
"""
15871599
Writes a key:value pair to self.pipeline_stats_file.
15881600
@@ -1592,6 +1604,7 @@ def report_result(self, key, value, nolog=False, result_formatter=None):
15921604
logfile. Use sparingly in case you will be printing the result in a
15931605
different format.
15941606
:param str result_formatter: function for formatting via pipestat backend
1607+
:param bool force_overwrite: overwrite results if they already exist?
15951608
:return str reported_result: the reported result is returned as a list of formatted strings.
15961609
15971610
"""
@@ -1602,13 +1615,19 @@ def report_result(self, key, value, nolog=False, result_formatter=None):
16021615

16031616
reported_result = self.pipestat.report(
16041617
values={key: value},
1605-
sample_name=self.pipestat_sample_name,
1618+
record_identifier=self.pipestat_record_identifier,
16061619
result_formatter=rf,
1620+
force_overwrite=force_overwrite,
16071621
)
16081622

16091623
if not nolog:
1610-
for r in reported_result:
1611-
self.info(r)
1624+
if isinstance(
1625+
reported_result, bool
1626+
): # Pipestat can return False if results are NOT reported.
1627+
self.info("Result successfully reported? " + str(reported_result))
1628+
else:
1629+
for r in reported_result:
1630+
self.info(r)
16121631

16131632
return reported_result
16141633

@@ -1621,6 +1640,7 @@ def report_object(
16211640
annotation=None,
16221641
nolog=False,
16231642
result_formatter=None,
1643+
force_overwrite=False,
16241644
):
16251645
"""
16261646
Writes a key:value pair to self.pipeline_stats_file. Note: this function
@@ -1641,6 +1661,7 @@ def report_object(
16411661
logfile. Use sparingly in case you will be printing the result in a
16421662
different format.
16431663
:param str result_formatter: function for formatting via pipestat backend
1664+
:param bool force_overwrite: overwrite results if they already exist?
16441665
:return str reported_result: the reported result is returned as a list of formatted strings.
16451666
"""
16461667
warnings.warn(
@@ -1659,37 +1680,30 @@ def report_object(
16591680
anchor_text = str(key).strip()
16601681
# better to use a relative path in this file
16611682
# convert any absolute paths into relative paths
1662-
relative_filename = (
1663-
os.path.relpath(filename, self.outfolder)
1664-
if os.path.isabs(filename)
1665-
else filename
1666-
)
1667-
1668-
if anchor_image:
1669-
relative_anchor_image = (
1670-
os.path.relpath(anchor_image, self.outfolder)
1671-
if os.path.isabs(anchor_image)
1672-
else anchor_image
1673-
)
1674-
else:
1675-
relative_anchor_image = "None"
16761683

1677-
message_raw = "{filename}\t{anchor_text}\t{anchor_image}\t{annotation}".format(
1678-
filename=relative_filename,
1679-
anchor_text=anchor_text,
1680-
anchor_image=relative_anchor_image,
1681-
annotation=annotation,
1682-
)
1683-
1684-
val = {key: message_raw.replace("\t", " ")}
1684+
values = {
1685+
"path": filename,
1686+
"thumbnail_path": anchor_image,
1687+
"title": anchor_text,
1688+
"annotation": annotation,
1689+
}
1690+
val = {key: values}
16851691

16861692
reported_result = self.pipestat.report(
1687-
values=val, sample_name=self.pipestat_sample_name, result_formatter=rf
1693+
values=val,
1694+
record_identifier=self.pipestat_record_identifier,
1695+
result_formatter=rf,
1696+
force_overwrite=force_overwrite,
16881697
)
1698+
16891699
if not nolog:
1690-
for r in reported_result:
1691-
self.info(r)
1692-
return reported_result
1700+
if isinstance(
1701+
reported_result, bool
1702+
): # Pipestat can return False if results are NOT reported.
1703+
self.info("Result successfully reported? " + str(reported_result))
1704+
else:
1705+
for r in reported_result:
1706+
self.info(r)
16931707

16941708
def _safe_write_to_file(self, file, message):
16951709
"""
@@ -1849,15 +1863,11 @@ def _refresh_stats(self):
18491863

18501864
if os.path.isfile(self.pipeline_stats_file):
18511865
_, data = read_yaml_data(path=self.pipeline_stats_file, what="stats_file")
1852-
print(data)
1853-
pipeline_key = list(
1854-
data[self.pipestat["_pipeline_name"]][self.pipestat["_pipeline_type"]]
1855-
)[0]
1856-
if self.name == pipeline_key:
1857-
for key, value in data[self.pipestat["_pipeline_name"]][
1858-
self.pipestat["_pipeline_type"]
1859-
][pipeline_key].items():
1860-
self.stats_dict[key] = value.strip()
1866+
1867+
for key, value in data[self._pipestat_manager.pipeline_name][
1868+
self._pipestat_manager.pipeline_type
1869+
][self._pipestat_manager.record_identifier].items():
1870+
self.stats_dict[key] = value
18611871

18621872
def get_stat(self, key):
18631873
"""
@@ -1989,12 +1999,12 @@ def complete(self):
19891999
"""Stop a completely finished pipeline."""
19902000
self.stop_pipeline(status=COMPLETE_FLAG)
19912001

1992-
def fail_pipeline(self, exc, dynamic_recover=False):
2002+
def fail_pipeline(self, exc: Exception, dynamic_recover: bool = False):
19932003
"""
19942004
If the pipeline does not complete, this function will stop the pipeline gracefully.
19952005
It sets the status flag to failed and skips the normal success completion procedure.
19962006
1997-
:param Exception e: Exception to raise.
2007+
:param Exception exc: Exception to raise.
19982008
:param bool dynamic_recover: Whether to recover e.g. for job termination.
19992009
"""
20002010
# Take care of any active running subprocess
@@ -2024,9 +2034,8 @@ def fail_pipeline(self, exc, dynamic_recover=False):
20242034
total_time = datetime.timedelta(seconds=self.time_elapsed(self.starttime))
20252035
self.info("Total time: " + str(total_time))
20262036
self.info("Failure reason: " + str(exc))
2027-
# self._set_status_flag(FAIL_FLAG)
20282037
self.pipestat.set_status(
2029-
sample_name=self._pipestat_manager.sample_name,
2038+
record_identifier=self._pipestat_manager.record_identifier,
20302039
status_identifier="failed",
20312040
)
20322041

@@ -2087,7 +2096,8 @@ def stop_pipeline(self, status=COMPLETE_FLAG):
20872096
"""
20882097
# self._set_status_flag(status)
20892098
self.pipestat.set_status(
2090-
sample_name=self._pipestat_manager.sample_name, status_identifier=status
2099+
record_identifier=self._pipestat_manager.record_identifier,
2100+
status_identifier=status,
20912101
)
20922102
self._cleanup()
20932103
elapsed_time_this_run = str(
@@ -2457,8 +2467,8 @@ def _cleanup(self, dry_run=False):
24572467
for fn in glob.glob(self.outfolder + flag_name("*"))
24582468
if COMPLETE_FLAG not in os.path.basename(fn)
24592469
and not "{}_{}_{}".format(
2460-
self._pipestat_manager["_pipeline_name"],
2461-
self.pipestat_sample_name,
2470+
self._pipestat_manager.pipeline_name,
2471+
self.pipestat_record_identifier,
24622472
run_flag,
24632473
)
24642474
== os.path.basename(fn)

pypiper/pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ def run(self, start_point=None, stop_before=None, stop_after=None):
330330
# between results from different stages.
331331
skip_mode = False
332332

333-
print("Running stage: {}".format(stage))
333+
print(f"Running stage: {getattr(stage, 'name', str(stage))}")
334334

335335
stage.run()
336336
self.executed.append(stage)

pypiper/utils.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -387,20 +387,20 @@ def split_by_pipes(cmd):
387387
cmdlist = []
388388
newcmd = str()
389389
for char in cmd:
390-
if char is "{":
390+
if char == "{":
391391
stack_brace.append("{")
392-
elif char is "}":
392+
elif char == "}":
393393
stack_brace.pop()
394-
elif char is "(":
394+
elif char == "(":
395395
stack_paren.append("(")
396-
elif char is ")":
396+
elif char == ")":
397397
stack_paren.pop()
398398

399399
if len(stack_brace) > 0 or len(stack_paren) > 0:
400400
# We are inside a parenthetic of some kind; emit character
401401
# no matter what it is
402402
newcmd += char
403-
elif char is "|":
403+
elif char == "|":
404404
# if it's a pipe, finish the command and start a new one
405405
cmdlist.append(newcmd)
406406
newcmd = str()
@@ -1110,9 +1110,11 @@ def _add_args(parser, args, required):
11101110
return parser
11111111

11121112

1113-
def result_formatter_markdown(pipeline_name, sample_name, res_id, value) -> str:
1113+
def result_formatter_markdown(pipeline_name, record_identifier, res_id, value) -> str:
11141114
"""
11151115
Returns Markdown formatted value as string
1116+
1117+
# Pipeline_name and record_identifier should be kept because pipestat needs it
11161118
"""
11171119

11181120
message_markdown = "\n> `{key}`\t{value}\t_RES_".format(key=res_id, value=value)

0 commit comments

Comments
 (0)