Skip to content

Commit

Permalink
Merge pull request #1 from lukfor/features/multiple-scores
Browse files Browse the repository at this point in the history
- support multiple scores
- add PGS Catalog support
  • Loading branch information
lukfor authored Jun 18, 2020
2 parents ca0d9f7 + fa1b36e commit ba4d8b7
Show file tree
Hide file tree
Showing 13 changed files with 636 additions and 310 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ Apply PRS to multiple files by using file patterns:
./pgs-calc --ref PGS000018.txt.gz test.chr*.vcf.gz --out scores.txt
```

#### Multiple scores

Apply multiple score files:

```
./pgs-calc --ref PGS000018.txt.gz,PGS000027.txt.gz test.chr*.vcf.gz --out scores.txt
```


#### Filter by Imputation Qualitity

Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>genepi</groupId>
<groupId>lukfor</groupId>
<artifactId>pgs-calc</artifactId>
<version>0.8.2</version>
<version>0.8.3</version>
<packaging>jar</packaging>

<name>riskscore</name>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/genepi/riskscore/App.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class App {

public static final String APP = "pgs-calc";

public static final String VERSION = "0.8.2";
public static final String VERSION = "0.8.3";

public static final String URL = "https://github.com/lukfor/pgs-calc";

Expand Down
58 changes: 33 additions & 25 deletions src/main/java/genepi/riskscore/commands/ApplyScoreCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import genepi.riskscore.io.Chunk;
import genepi.riskscore.io.OutputFile;
import genepi.riskscore.model.RiskScoreFormat;
import genepi.riskscore.model.RiskScoreSummary;
import genepi.riskscore.tasks.ApplyScoreTask;
import htsjdk.samtools.util.StopWatch;
import picocli.CommandLine.ArgGroup;
Expand Down Expand Up @@ -80,31 +81,38 @@ public Integer call() throws Exception {
System.out.println();

ApplyScoreTask task = new ApplyScoreTask();
task.setRiskScoreFilename(ref);

String[] refs = parseRef(ref);
task.setRiskScoreFilenames(refs);
if (format != null) {
RiskScoreFormat riskScoreFormat = RiskScoreFormat.load(format);
for (String file : refs) {
task.setRiskScoreFormat(file, riskScoreFormat);
}
} else {
for (String file : refs) {
String autoFormat = file + ".format";
if (new File(autoFormat).exists()) {
RiskScoreFormat riskScoreFormat = RiskScoreFormat.load(autoFormat);
task.setRiskScoreFormat(file, riskScoreFormat);
}
}
}

if (chunk != null) {
task.setChunk(chunk);
}
task.setVcfFilenames(vcfs);
task.setMinR2(minR2);
task.setGenotypeFormat(genotypeFormat);
if (format != null) {
RiskScoreFormat riskScoreFormat = RiskScoreFormat.load(format);
task.setRiskScoreFormat(riskScoreFormat);
} else {
String autoFormat = ref + ".format";
if (new File(autoFormat).exists()) {
RiskScoreFormat riskScoreFormat = RiskScoreFormat.load(autoFormat);
task.setRiskScoreFormat(riskScoreFormat);
}
}
task.setOutputVariantFilename(outputVariantFilename);
task.setIncludeVariantFilename(includeVariantFilename);

StopWatch watch = new StopWatch();
watch.start();
task.run();

OutputFile output = new OutputFile(task.getRiskScores());
OutputFile output = new OutputFile(task.getRiskScores(), task.getSummaries());
output.save(out);
watch.stop();

Expand All @@ -119,19 +127,11 @@ public Integer call() throws Exception {
System.out.println(" - Samples: " + number(task.getCountSamples()));
System.out.println(" - Variants: " + number(task.getCountVariants()));
System.out.println();
System.out.println(" Risk Score:");
System.out.println(" - Variants: " + number(task.getCountVariantsRiskScore()));
System.out.println(" - Variants used: " + number(task.getCountVariantsUsed()) + " ("
+ percentage(task.getCountVariantsUsed(), task.getCountVariantsRiskScore()) + ")");
System.out.println(" - Found in target and filtered by: ");
System.out.println(" - allele mismatch: " + number(task.getCountVariantsAlleleMissmatch()));
System.out.println(" - multi allelic or indels: " + number(task.getCountVariantsMultiAllelic()));
System.out.println(" - low R2 value: " + number(task.getCountVariantsFilteredR2()));
System.out.println(" - variants file: " + number(task.getCountFiltered()));

int notFound = task.getCountVariantsRiskScore()
- (task.getCountVariantsUsed() + task.getCountFiltered() + task.getCountVariantsAlleleMissmatch()
+ task.getCountVariantsMultiAllelic() + task.getCountVariantsFilteredR2());

for (RiskScoreSummary summary : task.getSummaries()) {
System.out.println(summary);
System.out.println();
}

// System.out.println(" - Not found in target: " + number(notFound));
System.out.println();
Expand All @@ -141,6 +141,14 @@ public Integer call() throws Exception {

}

private String[] parseRef(String ref) {
String[] refs = ref.split(",");
for (int i = 0; i < refs.length; i++) {
refs[i] = refs[i].trim();
}
return refs;
}

public static String number(long number) {
DecimalFormat formatter = new DecimalFormat("###,###,###");
return formatter.format(number);
Expand Down
22 changes: 16 additions & 6 deletions src/main/java/genepi/riskscore/io/OutputFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import genepi.io.table.writer.CsvTableWriter;
import genepi.io.table.writer.ITableWriter;
import genepi.riskscore.model.RiskScore;
import genepi.riskscore.model.RiskScoreSummary;

public class OutputFile {

Expand All @@ -28,15 +29,24 @@ public OutputFile() {

}

public OutputFile(RiskScore[] finalScores) {
samples = new Vector<String>();
data = new Vector[1];
data[0] = new Vector<Float>();
public OutputFile(RiskScore[] finalScores, RiskScoreSummary[] summaries) {

scores = new Vector<String>();
scores.add(COLUMN_SCORE);
for (RiskScoreSummary summary : summaries) {
scores.add(summary.getName());
}

samples = new Vector<String>();
data = new Vector[scores.size()];
for (int i = 0; i < scores.size(); i++) {
data[i] = new Vector<Float>();
}

for (RiskScore riskScore : finalScores) {
samples.add(riskScore.getSample());
data[0].add(riskScore.getScore());
for (int i = 0; i < scores.size(); i++) {
data[i].add(riskScore.getScore(i));
}
}

}
Expand Down
45 changes: 45 additions & 0 deletions src/main/java/genepi/riskscore/io/PGSCatalog.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package genepi.riskscore.io;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.text.MessageFormat;

import genepi.io.FileUtil;

public class PGSCatalog {

public static String USER_HOME = System.getProperty("user.home");

public static String CACHE_DIR = FileUtil.path(USER_HOME, ".pgs-calc", "pgs-catalog");

public static String FILE_URL = "http://ftp.ebi.ac.uk/pub/databases/spot/pgs/scores/{0}/ScoringFiles/{0}.txt.gz";

public static String getFilenameById(String id) throws IOException {

String filename = FileUtil.path(CACHE_DIR, id + ".txt.gz");

if ((new File(filename)).exists()) {
System.out.println("Score '" + id + "' found in local cache " + filename);
return filename;
}

FileUtil.createDirectory(CACHE_DIR);

MessageFormat format = new MessageFormat(FILE_URL);
String url = format.format(new Object[] { id });

System.out.println("Downloading score '" + id + "' from " + url + "...");

InputStream in = new URL(url).openStream();
Files.copy(in, Paths.get(filename), StandardCopyOption.REPLACE_EXISTING);

return filename;

}

}
17 changes: 13 additions & 4 deletions src/main/java/genepi/riskscore/io/RiskScoreFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,22 @@ public RiskScoreFile(String filename, RiskScoreFormat format) throws Exception {
variants = new HashMap<Integer, ReferenceVariant>();

if (!new File(filename).exists()) {
throw new Exception("File '" + filename + "' not found.");

// check if its a PGS id
if (filename.startsWith("PGS") && filename.length() == 9 && !filename.endsWith(".txt.gz")) {
String id = filename;
this.filename = PGSCatalog.getFilenameById(id);
} else {

throw new Exception("File '" + filename + "' not found.");

}
}

DataInputStream in = openTxtOrGzipStream(filename);
DataInputStream in = openTxtOrGzipStream(this.filename);

ITableReader reader = new CsvTableReader(in, RiskScoreFormat.SEPARATOR);
checkFileFormat(reader, filename);
checkFileFormat(reader, this.filename);
reader.close();

}
Expand Down Expand Up @@ -70,7 +79,7 @@ private void checkFileFormat(ITableReader reader, String filename) throws Except
}

public void buildIndex(String chromosome) throws IOException {
buildIndex(chromosome, new Chunk());
buildIndex(chromosome, new Chunk());
}

public void buildIndex(String chromosome, Chunk chunk) throws IOException {
Expand Down
79 changes: 52 additions & 27 deletions src/main/java/genepi/riskscore/io/vcf/MinimalVariantContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,21 @@ public class MinimalVariantContext {

private boolean[] genotypes;

private String[] genotypesParsed;
private float[] genotypesParsed;

private String id = null;

private String genotype = null;

private String info = null;

private Map<String, String> infos;

private boolean dirtyGenotypes = true;

public MinimalVariantContext(int samples) {
genotypes = new boolean[samples];
genotypesParsed = new String[samples];
genotypesParsed = new float[samples];
}

public int getHetCount() {
Expand Down Expand Up @@ -133,6 +135,7 @@ public int getNSamples() {
public void setRawLine(String rawLine) {
this.rawLine = rawLine;
this.id = null;
this.dirtyGenotypes = true;
}

public String getRawLine() {
Expand Down Expand Up @@ -199,20 +202,20 @@ public String toString() {
}
return id;
}

public void setInfo(String info) {
this.info = info;
infos = null;
}

public String getInfo(String key) {
//lazy loadding
// lazy loadding
if (infos == null) {
infos = new HashMap<String, String>();
String[] tiles = this.info.split(";");
for (int i = 0; i < tiles.length; i++) {
String[] tiles2 = tiles[i].split("=");
if(tiles2.length == 2) {
if (tiles2.length == 2) {
infos.put(tiles2[0], tiles2[1]);
}
}
Expand All @@ -228,27 +231,49 @@ public double getInfoAsDouble(String key, double defaultValue) {
return defaultValue;
}
}

public String[] getGenotypes(String field) throws IOException {
String tiles[] = rawLine.split("\t", 10);
String[] formats = tiles[8].split(":");
int index = -1;
for (int i = 0; i < formats.length; i++) {
if (formats[i].equals(field)) {
index = i;

public float[] getGenotypeDosages(String field) throws IOException {

if (dirtyGenotypes) {

String tiles[] = rawLine.split("\t", 10);
String[] formats = tiles[8].split(":");
int index = -1;
for (int i = 0; i < formats.length; i++) {
if (formats[i].equals(field)) {
index = i;
}
}
}

if (index == -1) {
throw new IOException("field '" + field + "' not found in FORMAT");
}
String[] values = tiles[9].split("\t");
for (int i = 0; i < genotypesParsed.length; i++) {
String value = values[i];
String[] tiles2 = value.split(":");
genotypesParsed[i] = tiles2[index];

if (index == -1) {
throw new IOException("field '" + field + "' not found in FORMAT");
}
String[] values = tiles[9].split("\t");
for (int i = 0; i < genotypesParsed.length; i++) {
String value = values[i];
String[] tiles2 = value.split(":");
String genotype = tiles2[index];

float dosage = 0;
// genotypes
if (genotype.equals("0|0")) {
dosage = 0;
} else if (genotype.equals("0|1") || genotype.equals("1|0")) {
dosage = 1;
} else if (genotype.equals("1|1")) {
dosage = 2;
} else {
// dosage
dosage = Float.parseFloat(genotype);
}
genotypesParsed[i] = dosage;

}

dirtyGenotypes = false;

}
return genotypesParsed;
}

}
Loading

0 comments on commit ba4d8b7

Please sign in to comment.