Skip to content

Commit 0a43810

Browse files
committed
new files for v302 update
1 parent 6a1f638 commit 0a43810

File tree

269 files changed

+61173
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

269 files changed

+61173
-0
lines changed

src/hg/encode3/eap/README

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
This the source code for the Encode Analysis Pipeline (EAP). It is all available under the BSD
2+
license as part of the ENCODE Project.
3+
4+
The EAP is a system for running genomics pipelines implemented using mySQL, Parasol, and C.
5+
The C code is
6+
eapDaemon - take anything in edwAnalysisJob table and feed it to Parasol
7+
eapSchedule - set up edwAnalysisJob and edwAnalysisRun tables make output temp dirs
8+
eapFinish - move stuff from output temp dirs into encode data warehouse
9+
eapMonitor - look at how jobs are progressing through table and parasol.
10+
If need be chill and kill steps gone awry.
11+
eapAddSoftware - add a piece of software that is used by and tracked by the system
12+
eapUpdateSoftware - Tell system about a new version of software
13+
eapAddStep - add a new analysis step (something that processes files and generally
14+
creates other files, though there are also 'statistics only' steps.
15+
eapMetaSync - still in development but meant to turn eap objects into JSON that gets
16+
sent to the metadatabase which responds with metaUuid we store in tables.
17+
There's also a few temporary programs:
18+
eapRunAddJson - Creates JSON for ENCODED at Stanford, so that database can also
19+
track file relationships and steps. This was just a second draft.
20+
It turns out we may well use a different approach.
21+
eapToHub - Create a hub for DNAse hypersensitivity data calculated by EAP. Just for
22+
demo.
23+
24+
The overall process for making a new analysis is:
25+
1) Create an analysis step bash script that should start with the name "eap_run_" and belong
26+
in the github shared with Stanford.
27+
2) Test the step on hgwdev installing anything needed, and keeping track of what is installed
28+
3) Test the step on ku, asking system admins to install anything needed to run it on that machine
29+
or the associated cluster.
30+
4) Run eapAddSoftware to add any new software. Do this on both hgwdev and encode-02
31+
5) Edit eapAddStep.c and make a new item in the step array for your step, then run eapAddStep
32+
on hgwdev and encode-02
33+
6) Edit eapSchedule.c to tell system in what circumstances to run the step.
34+
7) Schedule step on somewhere between 1 and 10 files using eapSchedule on encode-02
35+
8) Examine output directory in eapRun.tempDir and make sure it looks good. It will be a subdir of
36+
/hive/groups/encode/encode3/encodeAnalysisPipeline/tmp that starts with the step name.
37+
9) Edit eapFinish.c to tell system how to store results.
38+
10) Run eapFinish with -noClean flag (so you get a chance to correct bugs before it
39+
deletes the directory).
40+
11) Check database to make sure everything is good.
41+
12) Run eapSchedule with -step=yourStepName to tell system to do step on whole database on
42+
encode-02.
43+
13) Contact Jim before running step on encode-01 (the production site).
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/* eapAddSoftware - Add a new software object. */
2+
3+
/* Copyright (C) 2014 The Regents of the University of California
4+
* See README in this or parent directory for licensing information. */
5+
#include "common.h"
6+
#include "linefile.h"
7+
#include "hash.h"
8+
#include "options.h"
9+
#include "../../encodeDataWarehouse/inc/encodeDataWarehouse.h"
10+
#include "../../encodeDataWarehouse/inc/edwLib.h"
11+
#include "eapDb.h"
12+
#include "eapLib.h"
13+
14+
void usage()
15+
/* Explain usage and exit. */
16+
{
17+
errAbort(
18+
"eapAddSoftware - Add a new software object\n"
19+
"usage:\n"
20+
" eapAddSoftware name version url email\n"
21+
"options:\n"
22+
" -xxx=XXX\n"
23+
);
24+
}
25+
26+
/* Command line validation table. */
27+
static struct optionSpec options[] = {
28+
{NULL, 0},
29+
};
30+
31+
void eapAddSoftware(char *name, char *version, char *url, char *email)
32+
/* eapAddSoftware - Add a new software object. */
33+
{
34+
/* If we already have software by that name warn and return */
35+
struct sqlConnection *conn = eapConnectReadWrite();
36+
char query[1024];
37+
sqlSafef(query, sizeof(query), "select count(*) from eapSoftware where name='%s'", name);
38+
int existingCount = sqlQuickNum(conn, query);
39+
if (existingCount > 0)
40+
{
41+
warn("%s already exists in eapSoftware", name);
42+
return;
43+
}
44+
45+
/* Add us. */
46+
sqlSafef(query, sizeof(query), "insert eapSoftware (name,url,email) values ('%s','%s','%s')",
47+
name, url, email);
48+
sqlUpdate(conn, query);
49+
50+
/* Find out executabe md5 */
51+
char path[PATH_LEN];
52+
eapPathForCommand(name, path);
53+
char md5[33];
54+
edwMd5File(path, md5);
55+
56+
/* Add initial version as well */
57+
sqlSafef(query, sizeof(query),
58+
"insert eapSwVersion (software,version,md5,notes) values ('%s','%s','%s','%s')",
59+
name, version, md5, "Initial version tracked.");
60+
sqlUpdate(conn, query);
61+
}
62+
63+
int main(int argc, char *argv[])
64+
/* Process command line. */
65+
{
66+
optionInit(&argc, argv, options);
67+
if (argc != 5)
68+
usage();
69+
eapAddSoftware(argv[1], argv[2], argv[3], argv[4]);
70+
return 0;
71+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
kentSrc = ../../../..
2+
A = eapAddSoftware
3+
preMyLibs += ../lib/$(MACHTYPE)/eaplib.a ../../encodeDataWarehouse/lib/$(MACHTYPE)/edwlib.a ../../../../parasol/lib/$(MACHTYPE)/paralib.a
4+
include $(kentSrc)/inc/userApp.mk

0 commit comments

Comments
 (0)