Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allows for file-like objects to be passed to read_molecule_file #84

Merged
merged 12 commits into from
Jul 16, 2020
19 changes: 15 additions & 4 deletions propka/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def open_file_for_reading(input_file):
then will attempt fseek(0).
IAlibay marked this conversation as resolved.
Show resolved Hide resolved
"""
try:
input_file.fseek(0)
input_file.seek(0)
return input_file
except AttributeError:
pass
Expand All @@ -35,25 +35,36 @@ def open_file_for_reading(input_file):
return file_


def read_molecule_file(input_file, mol_container):
def read_molecule_file(input_file, mol_container, filename=None):
orbeckst marked this conversation as resolved.
Show resolved Hide resolved
"""Read input file (PDB or PROPKA) for a molecular container

Args
input_file: input file to read
IAlibay marked this conversation as resolved.
Show resolved Hide resolved
mol_container: MolecularContainer object
filename: str, optional input filename when using a filestream
Returns
updated MolecularContainer object
Raises
ValuError if invalid input given
"""
input_path = Path(input_file)
try:
input_path = Path(input_file)
except TypeError:
try:
input_path = Path(filename)
except TypeError:
errmsg = ("Path of provided input_file could not be determined "
"if passing a stream-like object, please provide an "
"appropriate string for the filename argument.")
raise TypeError(errmsg) from None

mol_container.name = input_path.stem
input_file_extension = input_path.suffix
if input_file_extension.lower() == '.pdb':
# input is a pdb file. read in atoms and top up containers to make
# sure that all atoms are present in all conformations
conformations, conformation_names = read_pdb(
input_path, mol_container.version.parameters, mol_container)
input_file, mol_container.version.parameters, mol_container)
if len(conformations) == 0:
str_ = ('Error: The pdb file does not seems to contain any '
'molecular conformations')
Expand Down
63 changes: 63 additions & 0 deletions tests/test_basic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,66 @@ def test_regression(pdb, options, tmp_path):
run_propka(options, pdb_path, tmp_path)
if ref_path is not None:
compare_output(pdb, tmp_path, ref_path)


def run_propka_stream(options, input_file, tmp_path):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it also work with StringIO buffers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes :) I've moved everything over to a separate test_streamio.py file (and added an init.py), it was getting a tad bit crowded for a file that was really meant to test basic regression. I've also:
a) tests both TextIO and StringIO inputs.
b) reduced the number of test cases (all that needed to be checked was if it gave the same results and also allowed arguments).
c) switched the usage of tmp_dir over to pytest's tmpdir, which seems a lot cleaner to use.
d) added a test to capture the TypeError that should be thrown if you don't pass a value to filename in read_molecule_file.

"""Run PROPKA software.

Args:
options: list of PROPKA options
input_file: file-like PDB object
tmp_path: path for working directory
"""
options += [input_file.name]
args = loadOptions(options)
try:
_LOGGER.warning(
"Working in tmpdir {0:s} because of PROPKA file output; "
"need to fix this.".format(str(tmp_path)))
cwd = Path.cwd()
os.chdir(tmp_path)
parameters = read_parameter_file(args.parameters, Parameters())
molecule = MolecularContainer(parameters, args)
molecule = read_molecule_file(input_file, molecule,
filename=input_file.name)
molecule.calculate_pka()
molecule.write_pka()
if args.generate_propka_input:
molecule.write_propka()
finally:
os.chdir(cwd)


@pytest.mark.parametrize("pdb, options", [
pytest.param("1FTJ-Chain-A", [], id="1FTJ-Chain-A: no options"),
pytest.param('1HPX', [], id="1HPX: no options"),
pytest.param('4DFR', [], id="4DFR: no options"),
pytest.param('3SGB', [], id="3SGB: no options"),
pytest.param('3SGB-subset', [
"--titrate_only",
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")])
def test_filestream_regression(pdb, options, tmp_path):
"""Basic regression but using streams for the input PDB file"""
path_dict = get_test_dirs()
ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb))
if ref_path.is_file():
ref_path = ref_path.resolve()
else:
_LOGGER.warning("Missing results file for comparison: {0:s}".format(
str(ref_path)))
ref_path = None
pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb))
if pdb_path.is_file():
pdb_path = pdb_path.resolve()
input_file = open(pdb_path)
else:
errstr = "Missing PDB file: {0:s}".format(pdb_path)
raise FileNotFoundError(errstr)
tmp_path = Path(tmp_path).resolve()

run_propka_stream(options, input_file, tmp_path)

if ref_path is not None:
compare_output(pdb, tmp_path, ref_path)