@@ -58,7 +58,7 @@ Due to the way `gt` is defined, sum of `gt` gives the count of non-reference all
58
58
We can use ` GROUP BY ` and [ aggregate functions] [ aggfunc ] to do computation on
59
59
genotypes.
60
60
``` sql
61
- -- get non-ref allele count for all variants
61
+ -- get non-ref allele count for each variant
62
62
SELECT vid,SUM (gt) -- SUM of gt grouped by vid
63
63
FROM Genotype
64
64
GROUP BY vid;
@@ -70,7 +70,7 @@ SELECT vid,GROUP_CONCAT(sid)
70
70
```
71
71
We can set conditions on aggregate functions using the ` HAVING ` clause:
72
72
``` sql
73
- -- get TP53 or CDC2 variants with allele count over 2
73
+ -- get variants with allele count over 2
74
74
SELECT g .vid
75
75
FROM Genotype g
76
76
GROUP BY g .vid
@@ -123,25 +123,27 @@ sqlite3 < test.sql
123
123
```
124
124
SQL statements:
125
125
``` sql
126
+ -- Schema
127
+
126
128
DROP TABLE IF EXISTS Variant;
127
129
CREATE TABLE Variant (
128
130
vid TEXT ,
129
131
gene TEXT
130
132
);
131
-
132
133
DROP TABLE IF EXISTS Sample;
133
134
CREATE TABLE Sample (
134
135
sid TEXT ,
135
136
age REAL
136
137
);
137
-
138
138
DROP TABLE IF EXISTS Genotype;
139
139
CREATE TABLE Genotype (
140
140
vid TEXT ,
141
141
sid TEXT ,
142
142
gt INT
143
143
);
144
144
145
+ -- Put data
146
+
145
147
INSERT INTO Variant VALUES (" V1" , " TP53" );
146
148
INSERT INTO Variant VALUES (" V2" , " CDK2" );
147
149
@@ -156,6 +158,8 @@ INSERT INTO Genotype VALUES ("V2", "S1", 0);
156
158
INSERT INTO Genotype VALUES (" V2" , " S2" , 0 );
157
159
INSERT INTO Genotype VALUES (" V2" , " S3" , 1 );
158
160
161
+ -- Query examples
162
+
159
163
SELECT vid FROM Variant WHERE gene= " TP53" ;
160
164
161
165
SELECT sid FROM Sample WHERE age< 40 ;
0 commit comments