You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# a script to extract fasta records from a fasta file to multiple separate fasta files based on a particular ID (time point) in a particular field, for a given delimiter
# to run, navigate to file location with command prompt and enter: python split_fasta_by_collections.py infile.fasta
fromBioimportSeqIO
importos
importsys
print('started')
records=SeqIO.parse(sys.argv[1], "fasta")
collected= {}
forrecordinrecords:
descr=record.description.split("_")[0].strip() # "_" sets the delimeter, "1" sets the field where counting starts at 0 for the first field
try:
collected[descr].append(record)
exceptKeyError:
collected[descr] = [record ,]
file_name="%s.fasta"
file_path=os.getcwd() #sets the output file path to your current working directory