Skip to content

Commit cf47cf2

Browse files
authored
Merge pull request #145 from wilhelm-lab/fix/protein_id_percolator
Add proper protein ids to percolator
2 parents 878a94c + 4e4ea3f commit cf47cf2

File tree

3 files changed

+11
-13
lines changed

3 files changed

+11
-13
lines changed

spectrum_fundamentals/metrics/percolator.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,7 @@ def add_percolator_metadata_columns(self):
336336
else:
337337
spec_id_cols = ["RAW_FILE", "SCAN_NUMBER", "MODIFIED_SEQUENCE", "PRECURSOR_CHARGE"]
338338
self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._")
339-
self.metrics_val["Proteins"] = self.metadata[
340-
"MODIFIED_SEQUENCE"
341-
] # we don't need the protein ID to get PSM / peptide results, fill with peptide sequence
339+
self.metrics_val["Proteins"] = self.metadata["PROTEINS"]
342340

343341
if "SCAN_EVENT_NUMBER" in self.metadata.columns:
344342
spec_id_cols.append("SCAN_EVENT_NUMBER")
Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
,RAW_FILE,SCAN_NUMBER,MODIFIED_SEQUENCE,SEQUENCE,PRECURSOR_CHARGE,MASS,CALCULATED_MASS,SCORE,REVERSE,FRAGMENTATION,MASS_ANALYZER,SCAN_EVENT_NUMBER,RETENTION_TIME,PREDICTED_IRT,COLLISION_ENERGY
2-
0,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,7978,AAIGEATRL,AAIGEATRL,2,900.50345678,900.50288029264,60.43600000000001,False,HCD,FTMS,1,0.5000000183883155,0.5000000323590892,30.0
3-
1,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,12304,AAVPRAAFL,AAVPRAAFL,2,914.53379,914.53379,34.006,True,HCD,FTMS,2,1.000000038995633,1.5000000246189773,30.0
4-
2,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,12398,AAYFGVYDTAK,AAYFGVYDTAK,2,1204.5764,1204.5764,39.97399999999999,True,HCD,FTMS,3,1.5000000993570157,2.500000056694346,30.0
5-
3,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,11716,AAYYHPSYL,AAYYHPSYL,2,1083.5025,1083.5025,99.919,False,HCD,FTMS,4,2.0000000098074193,3.5000000203256407,30.0
6-
4,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,5174,AEDLNTRVA,AEDLNTRVA,2,987.49852,987.49852,87.802,False,HCD,FTMS,5,2.500000083793533,4.5000000747038005,30.0
7-
5,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,5174,AEDLNTRVA,AEDLNTRVA,2,987.49852,987.49852,62.802,False,HCD,FTMS,6,3.0000000338727677,5.5000000960095505,30.0
8-
6,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,5174,AEDLNTRVA,AEDLNTRVA,2,987.49852,987.49852,79.802,False,HCD,FTMS,7,3.500000001673834,6.5000000134594345,30.0
9-
7,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,5174,AEDLNTRVA,AEDLNTRVA,2,987.49852,987.49852,79.802,False,HCD,FTMS,8,4.000000066050141,7.500000085284446,30.0
1+
,RAW_FILE,SCAN_NUMBER,MODIFIED_SEQUENCE,SEQUENCE,PRECURSOR_CHARGE,MASS,CALCULATED_MASS,SCORE,REVERSE,FRAGMENTATION,MASS_ANALYZER,SCAN_EVENT_NUMBER,RETENTION_TIME,PREDICTED_IRT,COLLISION_ENERGY,PROTEINS
2+
0,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,7978,AAIGEATRL,AAIGEATRL,2,900.5034568,900.5028803,60.436,FALSE,HCD,FTMS,1,0.500000018,0.500000032,30,sp|O23523|RGGA_ARATH
3+
1,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,12304,AAVPRAAFL,AAVPRAAFL,2,914.53379,914.53379,34.006,TRUE,HCD,FTMS,2,1.000000039,1.500000025,30,sp|O23523|RGGA_ARATH
4+
2,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,12398,AAYFGVYDTAK,AAYFGVYDTAK,2,1204.5764,1204.5764,39.974,TRUE,HCD,FTMS,3,1.500000099,2.500000057,30,sp|O23523|RGGA_ARATH
5+
3,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,11716,AAYYHPSYL,AAYYHPSYL,2,1083.5025,1083.5025,99.919,FALSE,HCD,FTMS,4,2.00000001,3.50000002,30,sp|O23523|RGGA_ARATH
6+
4,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,5174,AEDLNTRVA,AEDLNTRVA,2,987.49852,987.49852,87.802,FALSE,HCD,FTMS,5,2.500000084,4.500000075,30,sp|O23523|RGGA_ARATH
7+
5,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,5174,AEDLNTRVA,AEDLNTRVA,2,987.49852,987.49852,62.802,FALSE,HCD,FTMS,6,3.000000034,5.500000096,30,sp|O23523|RGGA_ARATH
8+
6,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,5174,AEDLNTRVA,AEDLNTRVA,2,987.49852,987.49852,79.802,FALSE,HCD,FTMS,7,3.500000002,6.500000013,30,sp|O23523|RGGA_ARATH
9+
7,20210122_0263_TMUCLHan_Peiru_DDA_IP_C797S_02,5174,AEDLNTRVA,AEDLNTRVA,2,987.49852,987.49852,79.802,FALSE,HCD,FTMS,8,4.000000066,7.500000085,30,sp|O23523|RGGA_ARATH

tests/unit_tests/test_percolator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ def test_calc(self):
395395
# np.testing.assert_almost_equal(percolator.metrics_val['ExpMass'][0], 900.50345678)
396396
np.testing.assert_string_equal(percolator.metrics_val["Peptide"][0], "_.AAIGEATRL._")
397397
np.testing.assert_string_equal(
398-
percolator.metrics_val["Proteins"][0], "AAIGEATRL"
398+
percolator.metrics_val["Proteins"][0], "sp|O23523|RGGA_ARATH"
399399
) # we don't need the protein ID to get PSM / peptide results
400400

401401
# features

0 commit comments

Comments
 (0)