Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c1f3fae
Add altpaths module for computing path covers with simple naming
glennhickey Jan 21, 2026
587cfc1
2nd pass, adapt interface
glennhickey Jan 21, 2026
947a0b1
work on deconstruct refactor for altpaths
glennhickey Jan 21, 2026
4479e17
start work on call
glennhickey Jan 21, 2026
88a0fed
fix formatting for tests
glennhickey Jan 21, 2026
6c48a52
fix tests
glennhickey Jan 21, 2026
1adf0b1
default to ignoring altpath cover everywhere
glennhickey Jan 22, 2026
32a03ae
refactor away from netsed flow caller
glennhickey Jan 22, 2026
9874361
do some de-slopping
glennhickey Jan 22, 2026
a2d2e40
work on (top-down) nesting
glennhickey Jan 22, 2026
91508b1
leave old nested logic accessible in --bottom-up
glennhickey Jan 22, 2026
8eb8d12
more tests
glennhickey Jan 23, 2026
9024666
fix tests
glennhickey Jan 23, 2026
2db6b12
nested callinfo ; more tests!
glennhickey Jan 23, 2026
5c195aa
dont use altpaths for anchoring snarls
glennhickey Jan 23, 2026
31daa68
make sure refpath has priority for trav finding
glennhickey Jan 23, 2026
66f2e49
deslop deconstruct a bit
glennhickey Jan 26, 2026
12b7eaa
update
glennhickey Jan 28, 2026
5f24841
big naming refactor
glennhickey Jan 28, 2026
3c14feb
stop checking for altpaths in other tools
glennhickey Jan 28, 2026
f073797
clean out old deconstruct nesting logic
glennhickey Jan 29, 2026
c409c27
remove cruft from call
glennhickey Jan 29, 2026
067fd1c
Merge remote-tracking branch 'origin/master' into altpaths
glennhickey Jan 29, 2026
d326e02
options cleanup
glennhickey Jan 29, 2026
5223a7f
clean slop
glennhickey Jan 29, 2026
917efd6
deslop again
glennhickey Jan 29, 2026
fe2280a
better nested calling
glennhickey Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,132 changes: 1,132 additions & 0 deletions src/augref.cpp

Large diffs are not rendered by default.

198 changes: 198 additions & 0 deletions src/augref.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#ifndef VG_AUGREF_HPP_INCLUDED
#define VG_AUGREF_HPP_INCLUDED

/**
* \file augref.hpp
*
* Interface for computing and querying augmented reference path covers.
*
* An augref cover is a set of path fragments (stored as separate paths) in the graph.
* They are always relative to an existing reference sample (ie GRCh38 or CHM13).
* Unlike rGFA paths which use complex metadata embedding, augref paths use a simple naming
* scheme: {base_path_name}_{N}_alt
*
* For example, if the reference path is "CHM13#0#chr1", augref paths would be named:
* - CHM13#0#chr1_1_alt
* - CHM13#0#chr1_2_alt
* - etc.
*
* The data structures used in this class are always relative to the original paths
* in the graph. The REFERENCE-sense fragments that are used to serialize the
* cover can be created and loaded, but they are not used beyond that.
*/

#include "handle.hpp"
#include "snarls.hpp"
#include "traversal_finder.hpp"

namespace vg {

using namespace std;

class AugRefCover {
public:
// The suffix used to identify augref paths
static const string augref_suffix; // "_alt"

// Create an augref path name from a base reference path name and an index.
// Example: make_augref_name("CHM13#0#chr1", 1) -> "CHM13#0#chr1_1_alt"
static string make_augref_name(const string& base_path_name, int64_t augref_index);

// Test if a path name is an augref path (contains "_{N}_alt" suffix).
static bool is_augref_name(const string& path_name);

// Parse an augref path name to extract the base reference path name.
// Returns the original base path name, or the input if not an augref path.
// Example: parse_base_path("CHM13#0#chr1_3_alt") -> "CHM13#0#chr1"
static string parse_base_path(const string& augref_name);

// Parse an augref path name to extract the augref index.
// Returns -1 if the path is not an augref path.
// Example: parse_augref_index("CHM13#0#chr1_3_alt") -> 3
static int64_t parse_augref_index(const string& augref_name);

public:
// Clear out any existing augref paths from the graph. Recommended to run this
// before compute().
void clear(MutablePathMutableHandleGraph* graph);

// Compute the augref cover from the graph, starting with a given set of reference paths.
void compute(const PathHandleGraph* graph,
SnarlManager* snarl_manager,
const unordered_set<path_handle_t>& reference_paths,
int64_t minimum_length);

// Load existing augref paths from the graph, assuming they've been computed already.
// The reference_paths should be the rank-0 paths the augref paths extend from.
void load(const PathHandleGraph* graph,
const unordered_set<path_handle_t>& reference_paths);

// Apply the augref cover to a graph (must have been computed first), adding it
// as a bunch of REFERENCE-sense paths with the simplified naming scheme.
// If augref_sample_name is set, base paths are first copied to the new sample,
// and augref paths are created under the new sample name.
void apply(MutablePathMutableHandleGraph* mutable_graph);

// Set the sample name for augref paths. When set, apply() will:
// 1. Copy base reference paths to this new sample (CHM13#0#chr1 -> new_sample#0#chr1)
// 2. Create augref paths under the new sample (new_sample#0#chr1_1_alt, etc.)
void set_augref_sample(const string& sample_name);

// Get the current augref sample name (empty string if not set).
const string& get_augref_sample() const;

// Get the rank (level) of a given node (0 if on a reference path).
int64_t get_rank(nid_t node_id) const;

// Get the step of a given node in its covering interval.
step_handle_t get_step(nid_t node_id) const;

// Get the parent intervals (left and right) of a given interval.
pair<const pair<step_handle_t, step_handle_t>*,
const pair<step_handle_t, step_handle_t>*> get_parent_intervals(const pair<step_handle_t, step_handle_t>& interval) const;

// Get all computed intervals.
const vector<pair<step_handle_t, step_handle_t>>& get_intervals() const;

// Get an interval from a node. Returns nullptr if node not in an interval.
const pair<step_handle_t, step_handle_t>* get_interval(nid_t node_id) const;

// Get the number of reference intervals (rank-0).
int64_t get_num_ref_intervals() const;

// Print out a table of statistics.
void print_stats(ostream& os);

protected:

// Compute the cover for the given snarl, by greedily finding the covered paths through it.
// The cover is added to the two "thread_" structures.
void compute_snarl(const Snarl& snarl, PathTraversalFinder& path_trav_finder, int64_t minimum_length,
vector<pair<step_handle_t, step_handle_t>>& thread_augref_intervals,
unordered_map<nid_t, int64_t>& thread_node_to_interval);

// Get intervals in traversal that are not covered according to this->node_to_interval or
// the thread_node_to_interval parameter.
vector<pair<int64_t, int64_t>> get_uncovered_intervals(const vector<step_handle_t>& trav,
const unordered_map<nid_t, int64_t>& thread_node_to_interval);

// Add a new interval into the augref_intervals vector and update the node_to_interval map.
// If the interval can be merged into an existing, contiguous interval, do that instead.
// Returns true if a new interval was added, false if an existing interval was updated.
bool add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_augref_intervals,
unordered_map<nid_t, int64_t>& thread_node_to_interval,
const pair<step_handle_t, step_handle_t>& new_interval,
bool global = false);

// add_interval() can delete an existing interval. This requires a full update at the end.
void defragment_intervals();

// Get the total coverage of a traversal (sum of step lengths * path count).
int64_t get_coverage(const vector<step_handle_t>& trav, const pair<int64_t, int64_t>& uncovered_interval);

// Make sure all nodes in all augref paths are in forward orientation.
// This is always possible because they are, by definition, disjoint.
// This should only be run from inside apply().
void forwardize_augref_paths(MutablePathMutableHandleGraph* mutable_graph);

// Second pass: greedily cover any nodes not covered by snarl traversals.
// This handles nodes that are outside of snarls or in complex regions
// where the traversal finder couldn't find good coverage.
void fill_uncovered_nodes(int64_t minimum_length);

// Search back to the reference and return <distance, node_id> when found.
// (here distance is the number of intervals crossed, aka rank)
// "first" toggles returning the first interval found vs all of them.
vector<pair<int64_t, nid_t>> get_reference_nodes(nid_t node_id, bool first) const;

// Debug function: verify that every node in the graph is covered by the augref cover.
// Raises an error for each uncovered node, showing the node id and any paths that touch it.
void verify_cover() const;

protected:

const PathHandleGraph* graph = nullptr;

// Intervals are end-exclusive (like BED).
vector<pair<step_handle_t, step_handle_t>> augref_intervals;

// augref_intervals[0, num_ref_intervals-1] are all rank-0 reference intervals.
int64_t num_ref_intervals = 0;

// Map from node ID to interval index.
unordered_map<nid_t, int64_t> node_to_interval;

// Counter for generating unique augref indices per base path.
// Using mutable so it can be updated in apply() which is logically const for the cover.
mutable unordered_map<string, int64_t> base_path_augref_counter;

// Optional sample name for augref paths. When set, base paths are copied to this
// sample and augref paths are created under it.
string augref_sample_name;

// Map from original reference path handles to their copies under augref_sample_name.
// Only populated when augref_sample_name is set.
unordered_map<path_handle_t, path_handle_t> ref_path_to_copy;

// Copy base reference paths to the augref sample.
// Creates new paths like "new_sample#0#chr1" from "CHM13#0#chr1".
// Returns a map from original path handles to new path handles.
void copy_base_paths_to_sample(MutablePathMutableHandleGraph* mutable_graph,
const unordered_set<path_handle_t>& reference_paths);

// Used when selecting traversals to make the greedy cover.
struct RankedFragment {
int64_t coverage;
const string* name;
int64_t trav_idx;
pair<int64_t, int64_t> fragment;
bool operator<(const RankedFragment& f2) const {
// note: name comparison is flipped because we want to select high coverage / low name
return this->coverage < f2.coverage || (this->coverage == f2.coverage && *this->name > *f2.name);
}
};
};

}

#endif
Loading
Loading