Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 83 additions & 20 deletions annotate.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
#!/usr/bin/python

import sys,os,commands
from ftplib import FTP

# Must import storage before utils
import update_settings as settings
from storage import storage

storage.uri = settings.MONGODB_URI
storage.db_name = settings.MONGODB_DB_NAME
storage.connect()
storage.authenticate ('whynotadmin', 'waivuy8N')
#storage.authenticate ('whynotadmin', 'waivuy8N')

from utils import entries_by_pdbid, get_unannotated_entries, get_missing_entries, read_http

Expand All @@ -27,8 +29,9 @@
# <databank name>, <pdbid 5>
# etc.


# Returns a list of triples: (comment, databank name, pdbid)
def parse_comments (lines):
def parse_comments(lines):

if len(lines) < 2:
return {}
Expand All @@ -43,6 +46,7 @@ def parse_comments (lines):
elif ',' in line:

databank_name, pdbid = line.strip ().replace (' ','').split (',')
databank_name.replace('-', '_')
d.append ((comment, databank_name, pdbid))

elif len (line.strip ()) > 0:
Expand All @@ -67,12 +71,13 @@ def parse_comment(lines, entry):

for line in lines[1:]:

line = line.replace (' ','').strip ()
line = line.replace (' ','').replace('-', '_').strip ()
if line == '%s,%s' % (entry ['databank_name'], entry ['pdbid']):
return comment

return ''


def update_entry (entry):

databank_name = entry ['databank_name']
Expand All @@ -84,6 +89,7 @@ def update_entry (entry):
else:
storage.insert ('entries', entry)


# This function gets all comment information from a whynot
# file and updates the corresponding entries with it.
def annotate_from_file (path):
Expand Down Expand Up @@ -116,8 +122,7 @@ def annotate_from_file (path):
# else just check all other sources of information...



# Check the files in the whynot comments directory:
print 'Check the files in the whynot comments directory'

whynotdir = os.path.dirname (sys.argv [0])
commentsdir = os.path.join (whynotdir, 'comment')
Expand All @@ -143,14 +148,16 @@ def annotate_from_file (path):
# A pdb entry can contain only carbohydrates or only nucleic acids, in
# which case no DSSP can be made.

pdbidscarbonly = Set ()
pdbidsnuconly = Set ()
pdbidsnmr = Set ()
pdbidsem = Set ()
pdbidsother = Set ()
pdbidsdiff = Set ()
pdbidscarbonly = Set()
pdbidsnuconly = Set()
pdbidsnmr = Set()
pdbidsem = Set()
pdbidsother = Set()
pdbidsdiff = Set()
pdbidssf = Set()
pdbidsnmrr = Set()

# Parse wwpdb entry type record
print 'Parse wwpdb entry type record'
for line in read_http('ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_entry_type.txt').split('\n'):
if len(line.strip()) <= 0:
continue
Expand All @@ -171,8 +178,26 @@ def annotate_from_file (path):
elif method=='other':
pdbidsother.add(pdbid)

# Generate comments for missing structure factors.
# Do this wherever the experimental method is not diffraction:

print 'Listing deposited structure factor files'
ftp = FTP('ftp.wwpdb.org')
ftp.login()
ftp.cwd('/pub/pdb/data/structures/divided/structure_factors/')
for part in ftp.nlst():
for filename in ftp.nlst(part):
pdbid = filename[1: 5]
pdbidssf.add(pdbid)


print 'Listing deposited nmr restraints files'
ftp.cwd('/pub/pdb/data/structures/divided/nmr_restraints/')
for part in ftp.nlst():
for filename in ftp.nlst(part):
pdbid = filename[0: 4]
pdbidsnmrr.add(pdbid)


print 'Generate comments for missing structure factors'
for entry in get_unannotated_entries('STRUCTUREFACTORS'):

pdbid = entry['pdbid']
Expand All @@ -191,12 +216,16 @@ def annotate_from_file (path):
entry['comment'] = 'Not a Diffraction experiment'
entry['mtime'] = time()

elif pdbid not in pdbidssf:

entry['comment'] = 'Not deposited'
entry['mtime'] = time()

if 'comment' in entry:
update_entry (entry)


# Generate comments for missing nmr data.
# Do this wherever the experimental method is not nmr:
print 'Generate comments for missing nmr data'
for entry in get_unannotated_entries('NMR'):

pdbid = entry['pdbid']
Expand All @@ -215,9 +244,16 @@ def annotate_from_file (path):
entry['comment'] = 'Not an NMR experiment'
entry['mtime'] = time()

elif pdbid not in pdbidsnmrr:

entry['comment'] = 'Not deposited'
entry['mtime'] = time()

if 'comment' in entry:
update_entry (entry)


print 'Generate comments for missing hssp files'
# To find out why HSSP entries are missing, one must check the error output of
# mkhssp when it ran. It's been stored in a reserved directory:
for entry in get_unannotated_entries('HSSP'):
Expand All @@ -243,6 +279,8 @@ def annotate_from_file (path):
entry ['mtime'] = time()
update_entry (entry)


print 'Generate comments for missing dssp files'
# DSSP files can be missing for multiple reasons:
# 1 the structure has no protein, carbohydrates/nucleic acids only
# 2 the structure hase no backbone, only alpha carbon atoms
Expand Down Expand Up @@ -277,14 +315,35 @@ def annotate_from_file (path):
continue

# Run dsspcmbi and catch stderr:
lines = commands.getoutput('%s %s /tmp/%s.dssp 2>&1 >/dev/null' % (mkdssp, inputfile, pdbid)).split('\n')
dsspfile = '/tmp/%s.dssp' % pdbid
lines = commands.getoutput('%s %s %s 2>&1 >/dev/null' % (mkdssp, inputfile, dsspfile)).split('\n')
if os.path.isfile(dsspfile):
os.remove(dsspfile)
if lines [-1].strip () == 'empty protein, or no valid complete residues':
entry['comment'] = 'No residues with complete backbone' # for backwards compatibility
entry['mtime'] = time()

if 'comment' in entry:
update_entry (entry)


print 'Generate comments for missing pdbredo entries'
for entry in get_missing_entries('PDB_REDO'):

pdbid = entry['pdbid']
whynotfile = '/srv/data/pdb_redo/whynot/%s.txt' % pdbid
if not os.path.isfile(whynotfile):
continue

lines = open(whynotfile, 'r').readlines()
comment = parse_comment(lines, entry)
if len(comment) > 0:
entry['comment'] = comment
entry['mtime'] = time()
update_entry(entry)


print 'Generate comments for missing bdb files'
# BDB comments are simply stored in a file, generated by the bdb script.
for entry in get_missing_entries('BDB'):

Expand All @@ -297,10 +356,12 @@ def annotate_from_file (path):
lines = open(whynotfile, 'r').readlines()
comment = parse_comment(lines, entry)
if len(comment) > 0:
entry ['comment'] = comment
entry ['mtime'] = time()
update_entry (entry)
entry['comment'] = comment
entry['mtime'] = time()
update_entry(entry)


print 'Generate comments for whatif lists'
# WHATIF list comments are simply stored in a file, generated by the script.
for lis in ['acc', 'cal', 'cc1', 'cc2', 'cc3', 'chi', 'dsp', 'iod', 'sbh', 'sbr', 'ss1', 'ss2', 'tau', 'wat']:
for src in ['pdb', 'redo']:
Expand All @@ -320,6 +381,8 @@ def annotate_from_file (path):
entry['mtime'] = time()
update_entry (entry)


print 'Generate comments for scenes'
# WHATIF scene comments are simply stored in a file, generated by the script.
for lis in ['iod', 'ss2']:
for src in ['pdb', 'redo']:
Expand Down
50 changes: 25 additions & 25 deletions install.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,53 +41,53 @@ def create_databanks():
docs.append(_create_databank('PDB','http://www.wwpdb.org/',
'ftp://ftp.wwpdb.org/pub/pdb/data/structures/divided/pdb/${PART}/pdb${PDBID}.ent.gz',
re.compile(r'.*/pdb([\w]{4})\.ent(\.gz)?'),FILE,'MMCIF'))
docs.append(_create_databank('BDB','http://www.cmbi.ru.nl/bdb/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/bdb/${PART}/${PDBID}/${PDBID}.bdb',
docs.append(_create_databank('BDB','http://www.cmbi.umcn.nl/bdb/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/bdb/${PART}/${PDBID}/${PDBID}.bdb',
re.compile(r'.*/([\w]{4})\.bdb'),FILE,'PDB'))
docs.append(_create_databank('DSSP','http://swift.cmbi.ru.nl/gv/dssp/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/dssp/${PDBID}.dssp',
docs.append(_create_databank('DSSP','http://swift.cmbi.umcn.nl/gv/dssp/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/dssp/${PDBID}.dssp',
re.compile(r'.*/([\w]{4})\.dssp'),FILE,'MMCIF'))
docs.append(_create_databank('HSSP','http://swift.cmbi.ru.nl/gv/hssp/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/hssp/${PDBID}.hssp.bz2',
docs.append(_create_databank('HSSP','http://swift.cmbi.umcn.nl/gv/hssp/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/hssp/${PDBID}.hssp.bz2',
re.compile(r'.*/([\w]{4})\.hssp.bz2'),FILE,'DSSP'))
docs.append(_create_databank('PDBFINDER','http://swift.cmbi.ru.nl/gv/pdbfinder/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/pdbfinder/PDBFIND.TXT.gz',
re.compile(r'ID : ([\w]{4})'),LINE,'PDB'))
docs.append(_create_databank('PDBFINDER2','http://swift.cmbi.ru.nl/gv/pdbfinder/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/pdbfinder2/PDBFIND2.TXT.gz',
docs.append(_create_databank('PDBFINDER','http://swift.cmbi.umcn.nl/gv/pdbfinder/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/pdbfinder/PDBFIND.TXT.gz',
re.compile(r'ID : ([\w]{4})'),LINE,'HSSP'))
docs.append(_create_databank('PDBFINDER2','http://swift.cmbi.umcn.nl/gv/pdbfinder/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/pdbfinder2/PDBFIND2.TXT.gz',
re.compile(r'ID : ([\w]{4})'),LINE,'PDBFINDER'))
docs.append(_create_databank('NMR','http://www.bmrb.wisc.edu/',
'ftp://ftp.wwpdb.org/pub/pdb/data/structures/all/nmr_restraints/${PDBID}.mr.gz',
re.compile(r'.*/([\w]{4}).mr.gz'),FILE,'PDB'))
docs.append(_create_databank('STRUCTUREFACTORS','http://www.pdb.org/',
'ftp://ftp.wwpdb.org/pub/pdb/data/structures/divided/structure_factors/${PART}/r${PDBID}sf.ent.gz',
re.compile(r'.*/r([\w]{4})sf\.ent\.gz'),FILE,'MMCIF'))
docs.append(_create_databank('PDBREPORT','http://swift.cmbi.ru.nl/gv/pdbreport/',
'http://www.cmbi.ru.nl/pdbreport/cgi-bin/nonotes?PDBID=${PDBID}',
docs.append(_create_databank('PDBREPORT','http://swift.cmbi.umcn.nl/gv/pdbreport/',
'http://www.cmbi.umcn.nl/pdbreport/cgi-bin/nonotes?PDBID=${PDBID}',
re.compile(r'pdbreport\/\w{2}\/(\w{4})\/pdbout\.txt'),FILE,'PDB'))
docs.append(_create_databank('PDB_REDO','http://www.cmbi.ru.nl/pdb_redo/',
'http://www.cmbi.ru.nl/pdb_redo/cgi-bin/redir2.pl?pdbCode=${PDBID}',
docs.append(_create_databank('PDB_REDO','http://www.cmbi.umcn.nl/pdb_redo/',
'http://www.cmbi.umcn.nl/pdb_redo/cgi-bin/redir2.pl?pdbCode=${PDBID}',
re.compile(r'\/\w{2}\/\w{4}\/(\w{4})_final\.pdb'),FILE,'STRUCTUREFACTORS'))
docs.append(_create_databank('DSSP_REDO','http://swift.cmbi.ru.nl/gv/dssp/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/dssp_redo/${PDBID}.dssp',
docs.append(_create_databank('DSSP_REDO','http://swift.cmbi.umcn.nl/gv/dssp/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/dssp_redo/${PDBID}.dssp',
re.compile(r'.*/([\w]{4})\.dssp'),FILE,'PDB_REDO'))

for lis in ['dsp','iod','sbh','sbr','ss1','ss2','tau','acc','cal','wat',
'cc1','cc2','cc3','chi']:
docs.append(_create_databank('WHATIF_PDB_%s' % lis, 'http://swift.cmbi.ru.nl/whatif/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/wi-lists/pdb/%s/${PDBID}/${PDBID}.%s.bz2' % (lis, lis),
docs.append(_create_databank('WHATIF_PDB_%s' % lis, 'http://swift.cmbi.umcn.nl/whatif/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/wi-lists/pdb/%s/${PDBID}/${PDBID}.%s.bz2' % (lis, lis),
re.compile(r'.*/([\w]{4})\.' + lis + r'(\.bz2)?$'),FILE,'PDB'))
docs.append(_create_databank('WHATIF_REDO_%s' % lis, 'http://swift.cmbi.ru.nl/whatif/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/wi-lists/redo/%s/${PDBID}/${PDBID}.%s.bz2' % (lis, lis),
docs.append(_create_databank('WHATIF_REDO_%s' % lis, 'http://swift.cmbi.umcn.nl/whatif/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/wi-lists/redo/%s/${PDBID}/${PDBID}.%s.bz2' % (lis, lis),
re.compile(r'.*/([\w]{4})\.' + lis + r'(\.bz2)?$'),FILE,'PDB_REDO'))

scenames = { 'ss2': 'sym-contacts', 'iod': 'ion-sites'}
for lis in scenames:
docs.append(_create_databank('PDB_SCENES_%s' % lis, 'http://www.cmbi.ru.nl/pdb-vis/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/wi-lists/pdb/scenes/%s/${PDBID}/${PDBID}_%s.sce' % (lis, scenames[lis]),
docs.append(_create_databank('PDB_SCENES_%s' % lis, 'http://www.cmbi.umcn.nl/pdb-vis/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/wi-lists/pdb/scenes/%s/${PDBID}/${PDBID}_%s.sce' % (lis, scenames[lis]),
re.compile(r'.*/([\w]{4})_' + scenames[lis] + r'\.sce'),FILE,'WHATIF_PDB_%s' % lis))
docs.append(_create_databank('REDO_SCENES_%s' % lis, 'http://www.cmbi.ru.nl/pdb-vis/',
'ftp://ftp.cmbi.ru.nl/pub/molbio/data/wi-lists/redo/scenes/%s/${PDBID}/${PDBID}_%s.sce' % (lis, scenames[lis]),
docs.append(_create_databank('REDO_SCENES_%s' % lis, 'http://www.cmbi.umcn.nl/pdb-vis/',
'ftp://ftp.cmbi.umcn.nl/pub/molbio/data/wi-lists/redo/scenes/%s/${PDBID}/${PDBID}_%s.sce' % (lis, scenames[lis]),
re.compile(r'.*/([\w]{4})_' + scenames[lis] + r'\.sce'),FILE,'WHATIF_REDO_%s' % lis))

return docs
2 changes: 1 addition & 1 deletion whynot_web/default_settings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# mongo
MONGODB_URI = "mongodb://whynot_mongo_1"
MONGODB_URI = "mongodb://chelonium.cmbi.umcn.nl:27017"
MONGODB_DB_NAME = "whynot"