Skip to content

Commit

Permalink
Merge pull request ENCODE-DCC#117 from ENCODE-DCC/1059-1114-1091-edw-…
Browse files Browse the repository at this point in the history
…sync-new

Squashed patch for issues 1059 1115 1091
  • Loading branch information
lrowe committed Jan 22, 2014
2 parents 5f1d1bb + aff5186 commit fd7da8a
Show file tree
Hide file tree
Showing 14 changed files with 290 additions and 317 deletions.
352 changes: 219 additions & 133 deletions src/encoded/commands/sync_edw.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/encoded/contentbase.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# See http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/resources.html


import logging
import venusian
from abc import ABCMeta
Expand Down
5 changes: 3 additions & 2 deletions src/encoded/edw_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ def make_edw(data_host=None):

# Create db engine
sys.stderr.write('Connecting to %s://%s/%s...' % (engine, host, db))
edw_db = create_engine('%s://%s:%s@%s/%s' %
(engine, user, password, host, db))
cnx_str = '%s://%s:%s@%s/%s' % (engine, user, password, host, db)
edw_db = create_engine(cnx_str)

# TODO: A nice-to-have suggested by Laurence -- have MySQL directly read conf file.
# Something like the commented-out code below should do the trick.
Expand Down Expand Up @@ -267,6 +267,7 @@ def get_edw_fileinfo(edw, limit=None, experiment=True, start_id=0,
v.c.ucscDb.label('assembly'),
f.c.md5.label('md5sum'),
u.c.email.label('submitted_by'),
v.c.pairedEnd.label('paired_end'),
# either of these two error fields will cause status to be OBSOLETE
f.c.deprecated.label('lab_error_message'),
f.c.errorMessage.label('edw_error_message')])
Expand Down
9 changes: 9 additions & 0 deletions src/encoded/schemas/file.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@
"signal"
]
},
"paired_end": {
"title": "Paired End Identifier",
"description": "Which pair the file belongs to (if paired end library)",
"type": "string",
"enum": [
"1",
"2"
]
},
"dataset": {
"title": "Dataset",
"description": "The experiment or dataset the file belongs to.",
Expand Down
32 changes: 1 addition & 31 deletions src/encoded/tests/data/edw_file/edw_file_mock.tsv

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions src/encoded/tests/data/inserts/dataset.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
test uuid accession dataset_type description status submitted_by award lab encode2_dbxrefs:array geo_dbxrefs:array references:array documents:array files:array
fake dataset d2470afe-ac68-4489-8f51-90ddfbc8e00b ENCSR999BLA composite Testing dataset CURRENT facilisi.tristique@potenti.vivamus U54HG004576 richard-myers
test uuid accession dataset_type description status submitted_by award lab aliases:array geo_dbxrefs:array references:array documents:array files:arrayfake dataset d2470afe-ac68-4489-8f51-90ddfbc8e00b ENCSR999BLA composite Testing dataset CURRENT facilisi.tristique@potenti.vivamus U54HG004576 richard-myers ec3 dataset e52a0b01-2fb8-4015-bcad-c902f1d786fb ENCSR000AJW project RNA Working Group Evaluation Dataset CURRENT facilisi.tristique@potenti.vivamus U54HG004592 john-stamatoyannopoulos ec2 dataset ce459d50-bea1-407e-acfa-95c1b543cd60 ENCSR847FDT composite Data from UCSC Genome Browser composite hg19-wgEncodeUwAffyExonArray CURRENT facilisi.tristique@potenti.vivamus U54HG004592 john-stamatoyannopoulos ucsc_encode_db:hg19-wgEncodeUwAffyExonArray;random-key:FakeyName PMID:19966280
Expand Down
2 changes: 1 addition & 1 deletion src/encoded/tests/data/inserts/experiment.tsv
Original file line number Diff line number Diff line change
@@ -1 +1 @@
test uuid accession biosample_term_id biosample_term_name biosample_type assay_term_name assay_term_id target dataset_type description status submitted_by award lab encode2_dbxrefs:array geo_dbxrefs:array references:array documents:array possible_controls:array files:arrayshared biosample 5a6d5a57-e62d-44b9-a1bd-5d1815247348 ENCSR000AER EFO:0002067 K562 immortalized cell line RAMPAGE OBI:0001864 experiment RNA Evaluation K562 Long Total RAMPAGE from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras ENCFF001RDR;ENCFF001RDX;ENCFF001RES;ENCFF001RETshared biosample a357b33b-cdaa-4312-91c0-086bfec24181 ENCSR000AEM EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Long Poly-A+ RNA-seq from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras ENCFF001RDZ;ENCFF001RED;ENCFF001REF;ENCFF001REGshared biosample d8e3c296-ae36-417b-855a-8bfbe3d33e3b ENCSR000AES EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Small Total RNA-seq from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras ENCFF001REL;ENCFF001REQcontrol chip-seq with files df02f3f9-a6d4-40e4-a8dc-c1c7387e4556 ENCSR000ADH BTO:0002493 CH12.LX cell immortalized cell line ChIP-seq OBI:0000716 Control-mouse experiment CH12 Control Histone Mods by ChIP-seq performed by Yale CURRENT amet.fusce@est.fermentum RC2HG005602 sherman-weissman wgEncodeEM002001 GSM798326 ;GSM798326;GSE32218 ENCODE:Snyder_Histone_Mouse_protocol ENCFF001MXE;ENCFF001MXG;ENCFF001MYMrefers to the control dataset c05a9d79-713e-4d54-8d8e-6daae94368b3 ENCSR000ADI BTO:0002493 CH12.LX cell immortalized cell line ChIP-seq OBI:0000716 H3K4me3-mouse experiment CH12 H3K4me3 Histone Mods by ChIP-seq performed by Yale CURRENT amet.fusce@est.fermentum RC2HG005602 sherman-weissman wgEncodeEM002004 GSM798327 ;GSM798327;GSE32218 ENCODE:Snyder_Histone_Mouse_protocol df02f3f9-a6d4-40e4-a8dc-c1c7387e4556 ENCFF001MWZ;ENCFF001MXA;ENCFF001MXB;ENCFF001MXD;ENCFF001MXF;ENCFF001MXHhuman chip-seq 8f8085dd-8e9c-4b75-a187-21dbafe46de1 ENCSR000AHF BTO:0000093 MCF-7 cell immortalized cell line ChIP-seq OBI:0000716 TAF1-human experiment HAIB ChIP TAF1 in MCF-7 CURRENT facilisi.tristique@potenti.vivamus U54HG004576 richard-myers wgEncodeEH003317 GSM1010811 ENCODE:Myers_Lab_ChIP-seq_Protocol_v042211 methy array b29de469-ed4b-4acf-8b4d-f1af02def8e8 ENCSR000ACY CL:1000350 basal cell of epithelium of terminal bronchiole primary cell line MethylArray OBI:0001332 experiment UW SAEC CURRENT facilisi.tristique@potenti.vivamus U54HG004576 richard-myers wgEncodeEH002229 GSM999346 ;GSE40699 ENCODE:Myers_Methyl450_protocol ENCFF000LSPNTR for assay and no replicates 355ffbf8-0bc9-4545-ad3c-1ae71b4e2d85 ENCSR000AJK Nanostring NTR:0000020 experiment Irvine Nanostring GM12878 CURRENT netus.lorem@risus.lobortis U54HG006998 ali-mortazavi shared biosample 0cb85395-da45-47e5-84c7-888f90c983fc ENCSR000AEN EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Long Total from Graveley CURRENT platea.a@volutpat.viverra U54HG007005 brenton-graveley ENCFF001RCT;ENCFF001RCU;ENCFF001RDB;ENCFF001RDCno replicates ddd11381-7958-4d58-af02-fc56255c838b ENCSR000AAL EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Small Total RNA-seq from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras ENCFF001REL;ENCFF001REQreplicates no files 223af596-e5ca-4048-9ea8-9a41c240181d ENCSR999NOF EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Small Total RNA-seq from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras
test uuid accession biosample_term_id biosample_term_name biosample_type assay_term_name assay_term_id target dataset_type description status submitted_by award lab encode2_dbxrefs:array geo_dbxrefs:array references:array documents:array possible_controls:array files:arrayshared biosample 5a6d5a57-e62d-44b9-a1bd-5d1815247348 ENCSR000AER EFO:0002067 K562 immortalized cell line RAMPAGE OBI:0001864 experiment RNA Evaluation K562 Long Total RAMPAGE from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras ENCFF001RDR;ENCFF001RDX;ENCFF001RES;ENCFF001RETshared biosample a357b33b-cdaa-4312-91c0-086bfec24181 ENCSR000AEM EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Long Poly-A+ RNA-seq from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras ENCFF001RDZ;ENCFF001RED;ENCFF001REF;ENCFF001REGshared biosample d8e3c296-ae36-417b-855a-8bfbe3d33e3b ENCSR000AES EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Small Total RNA-seq from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras ENCFF001REL;ENCFF001REQcontrol chip-seq with files df02f3f9-a6d4-40e4-a8dc-c1c7387e4556 ENCSR000ADH BTO:0002493 CH12.LX cell immortalized cell line ChIP-seq OBI:0000716 Control-mouse experiment CH12 Control Histone Mods by ChIP-seq performed by Yale CURRENT amet.fusce@est.fermentum RC2HG005602 sherman-weissman wgEncodeEM002001 GSM798326 ;GSM798326;GSE32218 ENCODE:Snyder_Histone_Mouse_protocol ENCFF001MXE;ENCFF001MXG;ENCFF001MYMrefers to the control dataset c05a9d79-713e-4d54-8d8e-6daae94368b3 ENCSR000ADI BTO:0002493 CH12.LX cell immortalized cell line ChIP-seq OBI:0000716 H3K4me3-mouse experiment CH12 H3K4me3 Histone Mods by ChIP-seq performed by Yale CURRENT amet.fusce@est.fermentum RC2HG005602 sherman-weissman wgEncodeEM002004 GSM798327 ;GSM798327;GSE32218 ENCODE:Snyder_Histone_Mouse_protocol df02f3f9-a6d4-40e4-a8dc-c1c7387e4556 ENCFF001MWZ;ENCFF001MXA;ENCFF001MXB;ENCFF001MXD;ENCFF001MXF;ENCFF001MXHhuman chip-seq 8f8085dd-8e9c-4b75-a187-21dbafe46de1 ENCSR000AHF BTO:0000093 MCF-7 cell immortalized cell line ChIP-seq OBI:0000716 TAF1-human experiment HAIB ChIP TAF1 in MCF-7 CURRENT facilisi.tristique@potenti.vivamus U54HG004576 richard-myers wgEncodeEH003317 GSM1010811 ENCODE:Myers_Lab_ChIP-seq_Protocol_v042211 methy array b29de469-ed4b-4acf-8b4d-f1af02def8e8 ENCSR000ACY CL:1000350 basal cell of epithelium of terminal bronchiole primary cell line MethylArray OBI:0001332 experiment UW SAEC CURRENT facilisi.tristique@potenti.vivamus U54HG004576 richard-myers wgEncodeEH002229 GSM999346 ;GSE40699 ENCODE:Myers_Methyl450_protocol ENCFF000LSPNTR for assay and no replicates 355ffbf8-0bc9-4545-ad3c-1ae71b4e2d85 ENCSR000AJK Nanostring NTR:0000020 experiment Irvine Nanostring GM12878 CURRENT netus.lorem@risus.lobortis U54HG006998 ali-mortazavi shared biosample 0cb85395-da45-47e5-84c7-888f90c983fc ENCSR000AEN EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Long Total from Graveley CURRENT platea.a@volutpat.viverra U54HG007005 brenton-graveley ENCFF001RCT;ENCFF001RCU;ENCFF001RDB;ENCFF001RDCno replicates ddd11381-7958-4d58-af02-fc56255c838b ENCSR000AAL EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Small Total RNA-seq from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras ENCFF001REL;ENCFF001REQreplicates no files 223af596-e5ca-4048-9ea8-9a41c240181d ENCSR999NOF EFO:0002067 K562 immortalized cell line RNA-seq OBI:0001271 experiment RNA Evaluation K562 Small Total RNA-seq from Gingeras CURRENT dignissim.euismod@amet.habitant U54HG007004 thomas-gingeras patch users 509cf1b1-4113-4a1e-a49f-d62ebf3367cf ENCSR000ACT CL:1000350 basal cell of epithelium of terminal bronchiole primary cell line MethylArray OBI:0001332 experiment UW SAEC CURRENT facilisi.tristique@potenti.vivamus U54HG004576 richard-myers ENCFF001RIC;ENCFF001RID
Expand Down
Loading

0 comments on commit fd7da8a

Please sign in to comment.