Skip to content

Commit

Permalink
Merge pull request #219 from databio/dev
Browse files Browse the repository at this point in the history
v0.14.1 release
  • Loading branch information
donaldcampbelljr authored Apr 19, 2024
2 parents 3a8465a + 703580d commit 2682369
Show file tree
Hide file tree
Showing 13 changed files with 104 additions and 114 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,6 @@ piper.egg-info/

*ipynb_checkpoints*
*.egg-info*


example_pipelines/pipeline_output
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[![Documentation Status](https://readthedocs.org/projects/pypiper/badge/?version=latest)](http://pypiper.readthedocs.org/en/latest/?badge=latest)
[![Build Status](https://github.com/databio/pypiper/actions/workflows/run-pytest.yml/badge.svg)](https://github.com/databio/pypiper/actions/workflows/run-pytest.yml)
[![PEP compatible](http://pepkit.github.io/img/PEP-compatible-green.svg)](http://pepkit.github.io)
[![PEP compatible](https://pepkit.github.io/img/PEP-compatible-green.svg)](http://pepkit.github.io)
[![pypi-badge](https://img.shields.io/pypi/v/piper)](https://pypi.org/project/piper)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

Expand Down
8 changes: 8 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## [0.14.1] -- 2024-04-19
### Changed
- remove pipestat_project_name from PipelineManager parameters
- refactor pipestat_sample_name to pipestat_record_identifier in PipelineManager parameters
- update requirements for pipestat 0.9.0, ubiquerg 0.8.0, and yacman 0.9.3
- set `force_overwrite` to default to true, Issue #209


## [0.14.0] -- 2023-12-22
### Changed
- refactor for pipestat v0.6.0 release
Expand Down
4 changes: 2 additions & 2 deletions example_pipelines/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@
tgt = "pipeline_output/test.out"

# build the command
cmd = "shuf -i 1-500000000 -n 10000000 > " + tgt
cmd = f"shuf -i 1-500000000 -n 10000000 > {tgt}"

# and run with run().
pm.run(cmd, target=tgt)

# Now copy the data into a new file.
# first specify target file and build command:
tgt = "pipeline_output/copied.out"
cmd = "cp pipeline_output/test.out " + tgt
cmd = f"cp pipeline_output/test.out {tgt}"
pm.run(cmd, target=tgt)

# You can also string multiple commands together, which will execute
Expand Down
2 changes: 1 addition & 1 deletion example_pipelines/hello_pypiper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# Now build a command-line command however you like, and pass it to pm.run()
target_file = "hello_pypiper_results/output.txt"
cmd = "echo 'Hello, Pypiper!' > " + target_file
cmd = f"echo 'Hello, Pypiper!' > {target_file}"
pm.run(cmd, target_file)

pm.stop_pipeline()
2 changes: 1 addition & 1 deletion pypiper/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.14.0"
__version__ = "0.14.1"
1 change: 0 additions & 1 deletion pypiper/const.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
""" Pypiper constants. """


CHECKPOINT_EXTENSION = ".checkpoint"
DEFAULT_SAMPLE_NAME = "DEFAULT_SAMPLE_NAME"
PIPELINE_CHECKPOINT_DELIMITER = "_"
Expand Down
162 changes: 64 additions & 98 deletions pypiper/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,13 @@
default_pipestat_output_schema,
result_formatter_markdown,
)
from pipestat.helpers import read_yaml_data
from yacman import load_yaml

__all__ = ["PipelineManager"]


LOCK_PREFIX = "lock."
LOGFILE_SUFFIX = "_log.md"


class Unbuffered(object):
Expand Down Expand Up @@ -112,6 +113,12 @@ class PipelineManager(object):
protect from a case in which a restart begins upstream of a stage
for which a checkpoint file already exists, but that depends on the
upstream stage and thus should be rerun if it's "parent" is rerun.
:param str pipestat_record_identifier: record_identifier to report results via pipestat
:param str pipestat_schema: output schema used by pipestat to report results
:param str pipestat_results_file: path to file backend for reporting results
:param str pipestat_config_file: path to pipestat configuration file
:param str pipestat_pipeline_type: Sample or Project level pipeline
:param pipestat_result_formatter: function used to style reported results, defaults to result_formatter_markdown
:raise TypeError: if start or stop point(s) are provided both directly and
via args namespace, or if both stopping types (exclusive/prospective
and inclusive/retrospective) are provided.
Expand All @@ -136,8 +143,7 @@ def __init__(
output_parent=None,
overwrite_checkpoints=False,
logger_kwargs=None,
pipestat_project_name=None,
pipestat_sample_name=None,
pipestat_record_identifier=None,
pipestat_schema=None,
pipestat_results_file=None,
pipestat_config=None,
Expand Down Expand Up @@ -193,10 +199,7 @@ def __init__(
# If no starting point was specified, assume that the pipeline's
# execution is to begin right away and set the internal flag so that
# run() is let loose to execute instructions given.
if not self.start_point:
self._active = True
else:
self._active = False
self._active = not self.start_point

# Pipeline-level variables to track global state and pipeline stats
# Pipeline settings
Expand All @@ -210,26 +213,37 @@ def __init__(
self.output_parent = params["output_parent"]
self.testmode = params["testmode"]

# Establish the log file to check safety with logging keyword arguments.
# Establish the output folder since it's required for the log file.
self.outfolder = os.path.join(outfolder, "") # trailing slash
self.pipeline_log_file = pipeline_filepath(self, suffix=LOGFILE_SUFFIX)

# Set up logger
logger_kwargs = logger_kwargs or {}
if logger_kwargs.get("logfile") == self.pipeline_log_file:
raise ValueError(
f"The logfile given for the pipeline manager's logger matches that which will be used by the manager itself: {self.pipeline_log_file}"
)
default_logname = ".".join([__name__, self.__class__.__name__, self.name])
if not args:
self._logger = None
if args:
logger_builder_method = "logger_via_cli"
try:
self._logger = logger_via_cli(args, **logger_kwargs)
except logmuse.est.AbsentOptionException as e:
# Defer logger construction to init_logger.
self.debug(f"logger_via_cli failed: {e}")
if self._logger is None:
logger_builder_method = "init_logger"
# covers cases of bool(args) being False, or failure of logger_via_cli.
# strict is only for logger_via_cli.
kwds = {k: v for k, v in logger_kwargs.items() if k != "strict"}
logger_kwargs = {k: v for k, v in logger_kwargs.items() if k != "strict"}
try:
name = kwds.pop("name")
name = logger_kwargs.pop("name")
except KeyError:
name = default_logname
self._logger = logmuse.init_logger(name, **kwds)
self.debug("Logger set with logmuse.init_logger")
else:
logger_kwargs.setdefault("name", default_logname)
try:
self._logger = logmuse.logger_via_cli(args)
self.debug("Logger set with logmuse.logger_via_cli")
except logmuse.est.AbsentOptionException:
self._logger = logmuse.init_logger("pypiper", level="DEBUG")
self.debug("logger_via_cli failed; Logger set with logmuse.init_logger")
self._logger = logmuse.init_logger(name, **logger_kwargs)
self.debug(f"Logger set with {logger_builder_method}")

# Keep track of an ID for the number of processes attempted
self.proc_count = 0
Expand Down Expand Up @@ -276,10 +290,7 @@ def __init__(
# self.output_parent = os.path.join(os.getcwd(), self.output_parent)

# File paths:
self.outfolder = os.path.join(outfolder, "") # trailing slash
self.make_sure_path_exists(self.outfolder)
self.pipeline_log_file = pipeline_filepath(self, suffix="_log.md")

self.pipeline_profile_file = pipeline_filepath(self, suffix="_profile.tsv")

# Stats and figures are general and so lack the pipeline name.
Expand Down Expand Up @@ -330,7 +341,9 @@ def __init__(
signal.signal(signal.SIGTERM, self._signal_term_handler)

# pipestat setup
self.pipestat_record_identifier = pipestat_sample_name or DEFAULT_SAMPLE_NAME
self.pipestat_record_identifier = (
pipestat_record_identifier or DEFAULT_SAMPLE_NAME
)
self.pipestat_pipeline_type = pipestat_pipeline_type or "sample"

# don't force default pipestat_results_file value unless
Expand Down Expand Up @@ -631,88 +644,41 @@ def start_pipeline(self, args=None, multi=False):
# Print out a header section in the pipeline log:
# Wrap things in backticks to prevent markdown from interpreting underscores as emphasis.
# print("----------------------------------------")
self.info("### Pipeline run code and environment:\n")
self.info(
"* " + "Command".rjust(20) + ": " + "`" + str(" ".join(sys.argv)) + "`"
)
self.info("* " + "Compute host".rjust(20) + ": " + platform.node())
self.info("* " + "Working dir".rjust(20) + ": " + os.getcwd())
self.info("* " + "Outfolder".rjust(20) + ": " + self.outfolder)
def logfmt(key, value=None, padding=16):
padded_key = key.rjust(padding)
formatted_val = f"`{value}`" if value else ""
return f"* {padded_key}: {formatted_val}"

self.timestamp("* " + "Pipeline started at".rjust(20) + ": ")
self.info("### Pipeline run code and environment:\n")
self.info(logfmt("Command", str(" ".join(sys.argv))))
self.info(logfmt("Compute host", platform.node()))
self.info(logfmt("Working dir", os.getcwd()))
self.info(logfmt("Outfolder", self.outfolder))
self.info(logfmt("Log file", self.pipeline_log_file))
self.timestamp(logfmt("Start time"))

self.info("\n### Version log:\n")
self.info("* " + "Python version".rjust(20) + ": " + platform.python_version())
self.info(logfmt("Python version", platform.python_version()))
try:
self.info(
"* "
+ "Pypiper dir".rjust(20)
+ ": "
+ "`"
+ gitvars["pypiper_dir"].strip()
+ "`"
)
self.info("* " + "Pypiper version".rjust(20) + ": " + __version__)
self.info(
"* " + "Pypiper hash".rjust(20) + ": " + str(gitvars["pypiper_hash"])
)
self.info(
"* "
+ "Pypiper branch".rjust(20)
+ ": "
+ str(gitvars["pypiper_branch"])
)
self.info(
"* " + "Pypiper date".rjust(20) + ": " + str(gitvars["pypiper_date"])
)
self.info(logfmt("Pypiper dir", gitvars["pypiper_dir"].strip()))
self.info(logfmt("Pypiper version", __version__))
self.info(logfmt("Pypiper hash", gitvars["pypiper_hash"]))
self.info(logfmt("Pypiper branch", gitvars["pypiper_branch"]))
self.info(logfmt("Pypiper date", gitvars["pypiper_date"]))
if gitvars["pypiper_diff"]:
self.info(
"* "
+ "Pypiper diff".rjust(20)
+ ": "
+ str(gitvars["pypiper_diff"])
)
self.info(logfmt("Pypiper diff", gitvars["pypiper_diff"]))
except KeyError:
# It is ok if keys aren't set, it means pypiper isn't in a git repo.
pass

try:
self.info(
"* "
+ "Pipeline dir".rjust(20)
+ ": "
+ "`"
+ gitvars["pipe_dir"].strip()
+ "`"
)
self.info(
"* " + "Pipeline version".rjust(20) + ": " + str(self.pl_version)
)
self.info(
"* "
+ "Pipeline hash".rjust(20)
+ ": "
+ str(gitvars["pipe_hash"]).strip()
)
self.info(
"* "
+ "Pipeline branch".rjust(20)
+ ": "
+ str(gitvars["pipe_branch"]).strip()
)
self.info(
"* "
+ "Pipeline date".rjust(20)
+ ": "
+ str(gitvars["pipe_date"]).strip()
)
self.info(logfmt("Pipeline dir", gitvars["pipe_dir"].strip()))
self.info(logfmt("Pipeline version", self.pl_version))
self.info(logfmt("Pipeline hash", gitvars["pipe_hash"]).strip())
self.info(logfmt("Pipeline branch", gitvars["pipe_branch"]).strip())
self.info(logfmt("Pipeline date", gitvars["pipe_date"]).strip())
if gitvars["pipe_diff"] != "":
self.info(
"* "
+ "Pipeline diff".rjust(20)
+ ": "
+ str(gitvars["pipe_diff"]).strip()
)
self.info(logfmt("Pipeline diff", gitvars["pipe_diff"]).strip())
except KeyError:
# It is ok if keys aren't set, it means the pipeline isn't a git repo.
pass
Expand Down Expand Up @@ -1593,7 +1559,7 @@ def _report_profile(
myfile.write(message_raw + "\n")

def report_result(
self, key, value, nolog=False, result_formatter=None, force_overwrite=False
self, key, value, nolog=False, result_formatter=None, force_overwrite=True
):
"""
Writes a key:value pair to self.pipeline_stats_file.
Expand Down Expand Up @@ -1640,7 +1606,7 @@ def report_object(
annotation=None,
nolog=False,
result_formatter=None,
force_overwrite=False,
force_overwrite=True,
):
"""
Writes a key:value pair to self.pipeline_stats_file. Note: this function
Expand Down Expand Up @@ -1862,7 +1828,7 @@ def _refresh_stats(self):
"""

if os.path.isfile(self.pipeline_stats_file):
_, data = read_yaml_data(path=self.pipeline_stats_file, what="stats_file")
data = load_yaml(filepath=self.pipeline_stats_file)

for key, value in data[self._pipestat_manager.pipeline_name][
self._pipestat_manager.pipeline_type
Expand Down
3 changes: 1 addition & 2 deletions pypiper/ngstk.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,7 @@ def get_file_size(self, filenames):
return sum([self.get_file_size(filename) for filename in filenames])

return round(
sum([float(os.stat(f).st_size) for f in filenames.split(" ")])
/ (1024**2),
sum([float(os.stat(f).st_size) for f in filenames.split(" ")]) / (1024**2),
4,
)

Expand Down
4 changes: 1 addition & 3 deletions pypiper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,12 +785,10 @@ def pipeline_filepath(pm, filename=None, suffix=None):
filename as given or determined by the pipeline name, and suffix
appended if given.
"""

if filename is None and suffix is None:
raise TypeError(
"Provide filename and/or suffix to create " "path to a pipeline file."
"Provide filename and/or suffix to create path to a pipeline file."
)

filename = (filename or pm.name) + (suffix or "")

# Note that Pipeline and PipelineManager define the same outfolder.
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ mkdocs>=1.0
markdown-include
pydoc-markdown
piper
pipestat>=0.6.0
pipestat>=0.9.0a1
https://github.com/databio/mkdocs-databio/archive/master.zip
6 changes: 3 additions & 3 deletions requirements/requirements-pypiper.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
logmuse>=0.2.4
psutil
pandas
ubiquerg>=0.4.5
yacman
pipestat>=0.6.0
ubiquerg>=0.8.0
yacman>=0.9.3
pipestat>=0.9.0a1
Loading

0 comments on commit 2682369

Please sign in to comment.