Skip to content

Commit

Permalink
API docs: fix various formatting issues
Browse files Browse the repository at this point in the history
1. Add an extra line between sections and doctests to correctly
render the doctest blocks
2. Add literal blocks using `::` to preserve formatting of example files
3. Correct `Returns` subtitles
  • Loading branch information
joverlee521 committed Apr 20, 2022
1 parent fb46a0b commit 77df332
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 23 deletions.
1 change: 1 addition & 0 deletions augur/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def strip_non_reference(aln, reference, insertion_csv=None):
list
list of trimmed sequences, effectively a multiple alignment
Tests
-----
>>> [s.name for s in strip_non_reference(read_alignment("tests/data/align/test_aligned_sequences.fasta"), "with_gaps")]
Expand Down
36 changes: 18 additions & 18 deletions augur/clades.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ def read_in_clade_definitions(clade_file):
Inheritance is allowed, but needs to be acyclic. Alleles can be overwritten by inheriting clades.
Sites are 1 indexed in the file, and are converted to 0 indexed in the output
Empty lines are ignored, comments after # are ignored
Format
------
clade gene site alt
Clade_1 ctpE 81 D
Clade_2 nuc 30642 T
Clade_3 nuc 444296 A
Clade_3 S 1 P
\\# Clade_4 inherits from Clade_3
Clade_4 clade Clade_3
Clade_4 pks8 634 T
\\# Inherited allele can be overwritten
Clade_4 S 1 L
Format::
clade gene site alt
Clade_1 ctpE 81 D
Clade_2 nuc 30642 T
Clade_3 nuc 444296 A
Clade_3 S 1 P
# Clade_4 inherits from Clade_3
Clade_4 clade Clade_3
Clade_4 pks8 634 T
# Inherited allele can be overwritten
Clade_4 S 1 L
Parameters
----------
Expand Down Expand Up @@ -74,22 +74,22 @@ def read_in_clade_definitions(clade_file):
# This way all clades can be reached by traversal
for clade in df.clade.unique():
G.add_edge(root, clade)

# Build inheritance graph
# For clades that inherit, disconnect from root
# Add edge from parent
for _, row in clade_inheritance_rows.iterrows():
G.remove_edge(root, row.clade)
G.add_edge(row.site, row.clade)

if not nx.is_directed_acyclic_graph(G):
raise ValueError(f"Clade definitions contain cycles {list(nx.simple_cycles(G))}")

# Traverse graph top down, so that children can inherit from parents and grandparents
# Topological sort ensures parents are visited before children
# islice is used to skip the root node (which has no parent)
for clade in islice(nx.topological_sort(G),1,None):
# Get name of parent clade
# Get name of parent clade
# G.predecessors(clade) returns iterator, thus next() necessary
# despite the fact that there should only be one parent
parent_clade = next(G.predecessors(clade))
Expand All @@ -99,7 +99,7 @@ def read_in_clade_definitions(clade_file):
for _, row in df[(df.clade == clade) & (df.gene != 'clade')].iterrows():
# Overwrite of parent alleles is possible and happens here
clades[clade][(row.gene, int(row.site)-1)] = row.alt

# Convert items from dict[str, dict[(str,int),str]] to dict[str, list[(str,int,str)]]
clades = {
clade: [
Expand All @@ -110,7 +110,7 @@ def read_in_clade_definitions(clade_file):
# If clause avoids root (helper) from being emmitted
if clade != root
}

return clades


Expand Down
2 changes: 2 additions & 0 deletions augur/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def read_distance_map(map_file):
dict :
Python representation of the distance map JSON
>>> sorted(read_distance_map("tests/data/distance_map_weight_per_site.json").items())
[('default', 0), ('map', {'HA1': {144: 1}})]
>>> sorted(read_distance_map("tests/data/distance_map_weight_per_site_and_sequence.json").items())
Expand Down Expand Up @@ -236,6 +237,7 @@ def get_distance_between_nodes(node_a_sequences, node_b_sequences, distance_map,
float :
distance between node sequences based on the given map
>>> node_a_sequences = {"gene": "ACTG"}
>>> node_b_sequences = {"gene": "ACGG"}
>>> distance_map = {"default": 0, "map": {}}
Expand Down
1 change: 1 addition & 0 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,7 @@ def set_data_provenance(data_json, config):
config : dict
config JSON with an expected ``data_provenance`` key
>>> config = {"data_provenance": [{"name": "GISAID"}, {"name": "INSDC"}]}
>>> data_json = {"meta": {}}
>>> set_data_provenance(data_json, config)
Expand Down
15 changes: 15 additions & 0 deletions augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def filter_by_exclude_all(metadata):
set[str]:
Empty set of strains
>>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
>>> filter_by_exclude_all(metadata)
set()
Expand All @@ -114,6 +115,7 @@ def filter_by_exclude(metadata, exclude_file):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
>>> with NamedTemporaryFile(delete=False) as exclude_file:
... characters_written = exclude_file.write(b'strain1')
Expand Down Expand Up @@ -143,6 +145,7 @@ def parse_filter_query(query):
str :
Value of column to query
>>> parse_filter_query("property=value")
('property', <built-in function eq>, 'value')
>>> parse_filter_query("property!=value")
Expand Down Expand Up @@ -177,6 +180,7 @@ def filter_by_exclude_where(metadata, exclude_where):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
>>> filter_by_exclude_where(metadata, "region!=Europe")
{'strain2'}
Expand Down Expand Up @@ -228,6 +232,7 @@ def filter_by_query(metadata, query):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
>>> filter_by_query(metadata, "region == 'Africa'")
{'strain1'}
Expand Down Expand Up @@ -256,6 +261,7 @@ def filter_by_ambiguous_date(metadata, date_column="date", ambiguity="any"):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-XX"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
>>> filter_by_ambiguous_date(metadata)
{'strain2'}
Expand Down Expand Up @@ -298,6 +304,7 @@ def filter_by_date(metadata, date_column="date", min_date=None, max_date=None):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
>>> filter_by_date(metadata, min_date=numeric_date("2020-01-02"))
{'strain2'}
Expand Down Expand Up @@ -352,6 +359,7 @@ def filter_by_sequence_index(metadata, sequence_index):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
>>> sequence_index = pd.DataFrame([{"strain": "strain1", "ACGT": 28000}]).set_index("strain")
>>> filter_by_sequence_index(metadata, sequence_index)
Expand Down Expand Up @@ -381,6 +389,7 @@ def filter_by_sequence_length(metadata, sequence_index, min_length=0):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
>>> sequence_index = pd.DataFrame([{"strain": "strain1", "A": 7000, "C": 7000, "G": 7000, "T": 7000}, {"strain": "strain2", "A": 6500, "C": 6500, "G": 6500, "T": 6500}]).set_index("strain")
>>> filter_by_sequence_length(metadata, sequence_index, min_length=27000)
Expand Down Expand Up @@ -417,6 +426,7 @@ def filter_by_non_nucleotide(metadata, sequence_index):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
>>> sequence_index = pd.DataFrame([{"strain": "strain1", "invalid_nucleotides": 0}, {"strain": "strain2", "invalid_nucleotides": 1}]).set_index("strain")
>>> filter_by_non_nucleotide(metadata, sequence_index)
Expand Down Expand Up @@ -447,6 +457,7 @@ def include(metadata, include_file):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
>>> with NamedTemporaryFile(delete=False) as include_file:
... characters_written = include_file.write(b'strain1')
Expand Down Expand Up @@ -479,6 +490,7 @@ def include_by_include_where(metadata, include_where):
set[str]:
Strains that pass the filter
>>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
>>> include_by_include_where(metadata, "region!=Europe")
{'strain1'}
Expand Down Expand Up @@ -664,6 +676,7 @@ def filter_kwargs_to_str(kwargs):
str :
String representation of the kwargs for reporting.
>>> sequence_index = pd.DataFrame([{"strain": "strain1", "ACGT": 28000}, {"strain": "strain2", "ACGT": 26000}, {"strain": "strain3", "ACGT": 5000}]).set_index("strain")
>>> exclude_by = [(filter_by_sequence_length, {"sequence_index": sequence_index, "min_length": 27000})]
>>> filter_kwargs_to_str(exclude_by[0][1])
Expand Down Expand Up @@ -721,6 +734,7 @@ def apply_filters(metadata, exclude_by, include_by):
For example, filter data by minimum date, but force the include of strains
from Africa.
>>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-10-02"}, {"region": "North America", "date": "2020-01-01"}], index=["strain1", "strain2", "strain3"])
>>> exclude_by = [(filter_by_date, {"min_date": numeric_date("2020-04-01")})]
>>> include_by = [(include_by_include_where, {"include_where": "region=Africa"})]
Expand Down Expand Up @@ -825,6 +839,7 @@ def get_groups_for_subsampling(strains, metadata, group_by=None):
list :
A list of dictionaries with strains that were skipped from grouping and the reason why (see also: `apply_filters` output).
>>> strains = ["strain1", "strain2"]
>>> metadata = pd.DataFrame([{"strain": "strain1", "date": "2020-01-01", "region": "Africa"}, {"strain": "strain2", "date": "2020-02-01", "region": "Europe"}]).set_index("strain")
>>> group_by = ["region"]
Expand Down
1 change: 1 addition & 0 deletions augur/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def read_metadata(metadata_file, id_columns=("strain", "name"), chunk_size=None)
KeyError :
When the metadata file does not have any valid index columns.
For standard use, request a metadata file and get a pandas DataFrame.
>>> read_metadata("tests/functional/filter/metadata.tsv").index.values[0]
Expand Down
3 changes: 3 additions & 0 deletions augur/titer_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def load_from_file(filenames, excluded_sources=None):
tuple (dict, list, list)
tuple of a dict of titer measurements, list of strains, list of sources
>>> measurements, strains, sources = TiterCollection.load_from_file("tests/data/titer_model/h3n2_titers_subset.tsv")
>>> type(measurements)
<class 'dict'>
Expand Down Expand Up @@ -107,6 +108,7 @@ def count_strains(titers):
dict
number of measurements per strain
>>> measurements, strains, sources = TiterCollection.load_from_file("tests/data/titer_model/h3n2_titers_subset.tsv")
>>> titer_counts = TiterCollection.count_strains(measurements)
>>> titer_counts["A/Acores/11/2013"]
Expand Down Expand Up @@ -142,6 +144,7 @@ def filter_strains(titers, strains):
reduced dictionary of titer measurements containing only those were
test and reference virus are part of the strain list
>>> measurements, strains, sources = TiterCollection.load_from_file("tests/data/titer_model/h3n2_titers_subset.tsv")
>>> len(measurements)
11
Expand Down
1 change: 1 addition & 0 deletions augur/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def check_conflicting_args(tree_builder_args, defaults):
ConflictingArgumentsException
When any user-provided arguments match those in the defaults.
>>> defaults = ("-nt", "-m", "-s")
>>> check_conflicting_args("-czb -n 2", defaults)
>>> check_conflicting_args("-czb -nt 2", defaults)
Expand Down
10 changes: 5 additions & 5 deletions augur/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,8 +646,8 @@ def read_bed_file(bed_file):
bed_file : str
Path to the BED file
Returns:
--------
Returns
-------
list[int]:
Sorted list of unique zero-indexed sites
"""
Expand Down Expand Up @@ -677,8 +677,8 @@ def read_mask_file(mask_file):
mask_file : str
Path to the masking file
Returns:
--------
Returns
-------
list[int]:
Sorted list of unique zero-indexed sites
"""
Expand Down Expand Up @@ -726,7 +726,7 @@ def read_strains(*files, comment_char="#"):
set of distinct strains.
Strain names can be commented with full-line or inline comments. For
example, the following is a valid strain names file:
example, the following is a valid strain names file::
# this is a comment at the top of the file
strain1 # exclude strain1 because it isn't sequenced properly
Expand Down

0 comments on commit 77df332

Please sign in to comment.