forked from Nirmal2310/MIP_ORACLE
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MIP_5.py
59 lines (49 loc) · 1.46 KB
/
MIP_5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# -*- coding: utf-8 -*-
"""
@author: Sakshi
"""
# Filter for organisms
import pandas as pd
import os
name1="Resultswr.xml"
path=os.getcwd()
files = os.listdir(path)
trial_3=pd.DataFrame()
accid=[]
organisms=[]
df=pd.read_excel('MIP parsed(NEW).xlsx', index_col=None)
defline=df['Alignment Definition'].tolist()
accid=df['Def Line'].tolist()
#Parsing the organism from the definition line
for s in accid:
s=s.strip()
val = s.rpartition("|")
organisms.append(val[2])
presentorg=[]
def_list = []
new=[]
for i in defline:
val = ' '.join(i.split(' ')[0:2])
if val:
def_list.append(val)
#check organism against the alignment definition line
for i in range(len(defline)):
aldef=def_list[i]
org=organisms[i]
if org in aldef:
new.append('yes')
else:
new.append('no')
for h in range(len(new)):
if new[h]=='no':
presentorg.append("Different Organism Found")
else:
presentorg.append("Perfect Match Found")
#Output files generated for both perfect matches found and all matches found(BLAST Organism Check)
if len(presentorg) != len(df):
print(f"Warning: Length of 'presentorg' ({len(presentorg)}) does not match length of DataFrame ({len(df)})")
else:
df["Organism Check"]=presentorg
df.to_excel('BLAST Organism Check.xlsx', index=False)
df = df[~df['Organism Check'].isin(['Different Organism Found'])]
df.to_excel('BLAST Organism Check(Only perfect).xlsx', index=False)