-
Notifications
You must be signed in to change notification settings - Fork 1
Fix counting statistics with shorter trees #125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,44 @@ | |
|
|
||
| PVALUE_THRESHOLD_FOR_INTENSITY_BASED_COUNTING = 0.1 | ||
|
|
||
| # Determines at which level missing value testing is performed. | ||
| # Set once based on tree structure, then reused. | ||
| MISSINGVAL_TEST_LEVEL = None | ||
|
|
||
|
|
||
| def determine_missingval_test_level(root_node): | ||
| """Determine the appropriate level for missing value statistical testing. | ||
|
|
||
| Scenarios: | ||
| 1) "mod_seq_charge" exists in tree -> test at mod_seq_charge level | ||
| 2) "mod_seq" is one level above leaves -> test at base ion level | ||
| 3) "seq" is one level above leaves -> test at base ion level | ||
| 4) "gene" is one level above leaves -> test at base ion level | ||
| """ | ||
| global MISSINGVAL_TEST_LEVEL | ||
|
|
||
| # Check if mod_seq_charge nodes exist (fragment-level data) | ||
| mod_seq_charge_nodes = anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") | ||
| if len(mod_seq_charge_nodes) > 0: | ||
| MISSINGVAL_TEST_LEVEL = "mod_seq_charge" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (nit) Comment with |
||
| return | ||
|
|
||
| # For all other cases, check what's one level above leaves | ||
| leaf_parent_type = root_node.leaves[0].parent.type | ||
|
|
||
| if leaf_parent_type == "mod_seq": | ||
| # Scenario 2: charged peptides without fragments | ||
| MISSINGVAL_TEST_LEVEL = "base" | ||
| elif leaf_parent_type == "seq": | ||
| # Scenario 3: peptides without charge info | ||
| MISSINGVAL_TEST_LEVEL = "base" | ||
| elif leaf_parent_type == "gene": | ||
| # Scenario 4: simplest hierarchy, leaves directly under gene | ||
| MISSINGVAL_TEST_LEVEL = "base" | ||
| else: | ||
| raise ValueError(f"Unexpected tree structure: leaf parent type is '{leaf_parent_type}'. " | ||
| f"Expected one of: 'mod_seq', 'seq', 'gene', or tree with 'mod_seq_charge' nodes.") | ||
|
|
||
| def create_protnode_from_missingval_ions(gene_name,diffions, normed_c1, normed_c2): | ||
| return MissingValProtNodeCreator(gene_name, diffions, normed_c1, normed_c2).prot_node | ||
|
|
||
|
|
@@ -76,11 +114,21 @@ def _assign_properties_to_missingval_base_ions(self, root_node): | |
|
|
||
|
|
||
| @staticmethod | ||
| def _get_nodes_to_test(root_node): #get the nodes in the lowest level that is relevant for the binomial test | ||
| if root_node.leaves[0].parent.type == "mod_seq": #when AlphaQuant works with precursors only (not fragments), the precursors themselves are the "base ions" and the "mod_seq_charge" node does not exist | ||
| return root_node.children | ||
| else: | ||
| def _get_nodes_to_test(root_node): | ||
| """Get the nodes at which to perform the missing value statistical test. | ||
|
|
||
| Uses MISSINGVAL_TEST_LEVEL which is set once based on tree structure. | ||
| """ | ||
| global MISSINGVAL_TEST_LEVEL | ||
|
|
||
| # Set the test level if not already determined | ||
| if MISSINGVAL_TEST_LEVEL is None: | ||
| determine_missingval_test_level(root_node) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could this be then you don't need to have |
||
|
|
||
| if MISSINGVAL_TEST_LEVEL == "mod_seq_charge": | ||
| return anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") | ||
| else: # "base" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or |
||
| return root_node.leaves | ||
|
|
||
|
|
||
| def _propagate_properties_to_nodes_to_test(self,nodes_to_test): #goes through each node to test and merges the properties from it's base to the node itself | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1342,8 +1342,25 @@ diaumpire_precursor_ms1: | |
| ion_cols: | ||
| - Peptide Key | ||
|
|
||
| fragpipe_precursor: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just noting: l.1363 was "fragpipe_precursors" |
||
| format: widetable | ||
| quant_pre_or_suffix: " Intensity" | ||
| protein_cols: | ||
| - Protein | ||
| ion_hierarchy: | ||
| sequence_int: | ||
| order: [SEQ, MOD, CHARGE] | ||
| mapping: | ||
| SEQ: | ||
| - Peptide Sequence | ||
| MOD: | ||
| - Modified Sequence | ||
| CHARGE: | ||
| - Charge | ||
| use_iontree: False | ||
| ml_level: SEQ | ||
|
|
||
| fragpipe_precursors: | ||
| fragpipe_modseq: | ||
| format: widetable | ||
| quant_pre_or_suffix: " Intensity" | ||
| protein_cols: | ||
|
|
@@ -1358,3 +1375,6 @@ fragpipe_precursors: | |
| - Modified Sequence | ||
| use_iontree: False | ||
| ml_level: SEQ | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
private?