Skip to content

Commit

Permalink
sources: Remodel Datasets and Narratives using a Resources with Subre…
Browse files Browse the repository at this point in the history
…sources interface

Refactors the common bits shared by the Dataset and Narrative classes
into a parent class, Resource, and the special "type" handling of
Datasets (main, root-sequence, etc) into Subresources.  Narratives also
gain Subresources, but only support a single type for now (md).  Sources
now own URL generation; subresources know their own basename but
delegate to the Source for the full URL.

Resource and Subresource classes are interfaces which provide a partial
implementation of required properties and methods.  The upshot of the
interfaces is that new endpoint code will be able to deal with Datasets
and Narratives interchangeably.  Subresource also provides a place to
attach more info about each "type" than just a URL.
  • Loading branch information
tsibley committed Nov 19, 2021
1 parent 37b32da commit a4bd2b5
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 74 deletions.
10 changes: 5 additions & 5 deletions src/endpoints/charon/getDataset.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ const queryString = require("query-string");

const utils = require("../../utils");
const {canonicalizePrefix, parsePrefix} = require("../../utils/prefix");
const {NoDatasetPathError} = require("../../exceptions");
const {NoResourcePathError} = require("../../exceptions");
const auspice = require("auspice");
const request = require('request');
const {BadRequest, NotFound, InternalServerError} = require("http-errors");
Expand Down Expand Up @@ -60,7 +60,7 @@ const sendV1Dataset = async (res, metaJsonUrl, treeJsonUrl) => {
* @throws {NotFound} Throws if the dataset didn't exist (or the streaming failed)
*/
const streamMainV2Dataset = async (res, dataset) => {
const main = await dataset.urlFor("main");
const main = await dataset.subresource("main").url();
try {
await new Promise((resolve, reject) => {
let statusCode;
Expand Down Expand Up @@ -126,7 +126,7 @@ const getDataset = async (req, res) => {
* Note that this leaks the existence of private sources, but I think
* broader discussions are leaning towards that anyhow.
*/
if (err instanceof NoDatasetPathError) {
if (err instanceof NoResourcePathError) {
utils.verbose(err.message);
return res.status(204).end();
}
Expand Down Expand Up @@ -156,7 +156,7 @@ const getDataset = async (req, res) => {
}

if (query.type) {
const url = await dataset.urlFor(query.type);
const url = await dataset.subresource(query.type).url();
return requestCertainFileType(res, req, url, query);
}

Expand All @@ -166,7 +166,7 @@ const getDataset = async (req, res) => {
} catch (errV2) {
try {
/* attempt to fetch the meta + tree JSONs, combine, and send */
return await sendV1Dataset(res, await dataset.urlFor("meta"), await dataset.urlFor("tree"));
return await sendV1Dataset(res, await dataset.subresource("meta").url(), await dataset.subresource("tree").url());
} catch (errV1) {
if (dataset.isRequestValidWithoutDataset) {
utils.verbose("Request is valid, but no dataset available. Returning 204.");
Expand Down
2 changes: 1 addition & 1 deletion src/endpoints/charon/getNarrative.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ const getNarrative = async (req, res) => {

// Generate the narrative's origin URL for fetching.
const narrative = source.narrative(prefixParts);
const fetchURL = await narrative.url();
const fetchURL = await narrative.subresource("md").url();

try {
utils.log(`Fetching narrative ${fetchURL} and streaming to client for parsing`);
Expand Down
6 changes: 3 additions & 3 deletions src/exceptions.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ class NextstrainError extends Error {
/* Thrown when a valid Source object is asked to create a new Dataset object
* without a dataset path.
*/
class NoDatasetPathError extends NextstrainError {
constructor(msg = "No dataset path provided", ...rest) {
class NoResourcePathError extends NextstrainError {
constructor(msg = "No resource path provided", ...rest) {
super(msg, ...rest);
}
}


module.exports = {
NoDatasetPathError
NoResourcePathError
};
170 changes: 105 additions & 65 deletions src/sources.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const yamlFront = require("yaml-front-matter");
const {fetch} = require("./fetch");
const queryString = require("query-string");
const {NotFound} = require('http-errors');
const {NoDatasetPathError} = require("./exceptions");
const {NoResourcePathError} = require("./exceptions");
const utils = require("./utils");

const S3 = new AWS.S3();
Expand All @@ -29,6 +29,10 @@ class Source {
async baseUrl() {
throw new Error("async baseUrl() must be implemented by subclasses");
}
async urlFor(path, method = 'GET') { // eslint-disable-line no-unused-vars
const url = new URL(path, await this.baseUrl());
return url.toString();
}
static isGroup() { /* is the source a "nextstrain group"? */
return false;
}
Expand Down Expand Up @@ -70,35 +74,80 @@ class Source {
}
}

class Dataset {
class Resource {
constructor(source, pathParts) {
this.source = source;
this.pathParts = pathParts;

// Require baseParts, otherwise we have no actual dataset path. This
// inspects baseParts because some of the pathParts (above) may not apply,
// which each Dataset subclass determines for itself.
// Require baseParts, otherwise we have no actual dataset/narrative path.
// This inspects baseParts because some of the pathParts (above) may not
// apply, which each Dataset/Narrative subclass determines for itself.
if (!this.baseParts.length) {
throw new NoDatasetPathError();
throw new NoResourcePathError();
}
}
get baseParts() {
return this.pathParts.slice();
}
baseNameFor(type) {
const baseName = this.baseParts.join("_");
return type === "main"
? `${baseName}.json`
: `${baseName}_${type}.json`;
get baseName() {
return this.baseParts.join("_");
}
async urlFor(type, method = 'GET') { // eslint-disable-line no-unused-vars
const url = new URL(this.baseNameFor(type), await this.source.baseUrl());
return url.toString();
async exists() {
throw new Error("exists() must be implemented by Resource subclasses");
}
subresource(type) { // eslint-disable-line no-unused-vars
throw new Error("subresource() must be implemented by Resource subclasses");
}
}

class Subresource {
constructor(resource, type) {
if (this.constructor === Subresource) {
throw new Error("Subresource interface class must be subclassed");
}
if (!(resource instanceof Resource)) {
throw new Error(`invalid Subresource parent resource type: ${resource.constructor}`);
}
if (!this.constructor.validTypes.includes(type)) {
throw new Error(`invalid Subresource type: ${type}`);
}
this.resource = resource;
this.type = type;
}
static get validTypes() {
throw new Error("validTypes() must be implemented by Subresource subclasses");
}
async url(method = 'GET') {
return await this.resource.source.urlFor(this.baseName, method);
}
get baseName() {
throw new Error("baseName() must be implemented by Subresource subclasses");
}
}

class DatasetSubresource extends Subresource {
static validTypes = ["main", "root-sequence", "tip-frequencies", "meta", "tree"];

get baseName() {
return this.type === "main"
? `${this.resource.baseName}.json`
: `${this.resource.baseName}_${this.type}.json`;
}
}

class NarrativeSubresource extends Subresource {
static validTypes = ["md"];

get baseName() {
return `${this.resource.baseName}.md`;
}
}

class Dataset extends Resource {
async exists() {
const method = "HEAD";
const _exists = async (type) =>
(await fetch(await this.urlFor(type, method), {method, cache: "no-store"})).status === 200;
(await fetch(await this.subresource(type).url(method), {method, cache: "no-store"})).status === 200;

const all = async (...promises) =>
(await Promise.all(promises)).every(x => x);
Expand Down Expand Up @@ -158,31 +207,24 @@ class Dataset {
get isRequestValidWithoutDataset() {
return false;
}
}

class Narrative {
constructor(source, pathParts) {
this.source = source;
this.pathParts = pathParts;
}
get baseParts() {
return this.pathParts.slice();
}
get baseName() {
const baseName = this.baseParts.join("_");
return `${baseName}.md`;
}
async url(method = 'GET') { // eslint-disable-line no-unused-vars
const url = new URL(this.baseName, await this.source.baseUrl());
return url.toString();
subresource(type) {
return new DatasetSubresource(this, type);
}
}

class Narrative extends Resource {
async exists() {
const method = "HEAD";
const _exists = async () =>
(await fetch(await this.url(method), {method, cache: "no-store"})).status === 200;
(await fetch(await this.subresource("md").url(method), {method, cache: "no-store"})).status === 200;

return (await _exists()) || false;
}

subresource(type) {
return new NarrativeSubresource(this, type);
}
}

class CoreSource extends Source {
Expand All @@ -191,8 +233,13 @@ class CoreSource extends Source {
get repo() { return "nextstrain/narratives"; }
get branch() { return "master"; }

narrative(pathParts) {
return new CoreNarrative(this, pathParts);
async urlFor(path, method = 'GET') { // eslint-disable-line no-unused-vars
const baseUrl = path.endsWith(".md")
? `https://raw.githubusercontent.com/${this.repo}/${await this.branch}/`
: await this.baseUrl();

const url = new URL(path, baseUrl);
return url.toString();
}

// The computation of these globals should move here.
Expand Down Expand Up @@ -241,14 +288,6 @@ class CoreStagingSource extends CoreSource {
get branch() { return "staging"; }
}

class CoreNarrative extends Narrative {
async url() {
const repoBaseUrl = `https://raw.githubusercontent.com/${this.source.repo}/${await this.source.branch}/`;
const url = new URL(this.baseName, repoBaseUrl);
return url.toString();
}
}

class CommunitySource extends Source {
constructor(owner, repoName) {
super();
Expand Down Expand Up @@ -416,8 +455,18 @@ class UrlDefinedSource extends Source {
}

class UrlDefinedDataset extends Dataset {
baseNameFor(type) {
const baseName = this.baseParts.join("/");
get baseName() {
return this.baseParts.join("/");
}
subresource(type) {
return new UrlDefinedDatasetSubresource(this, type);
}
}

class UrlDefinedDatasetSubresource extends DatasetSubresource {
get baseName() {
const type = this.type;
const baseName = this.resource.baseName;

if (type === "main") {
return baseName;
Expand All @@ -433,6 +482,15 @@ class UrlDefinedNarrative extends Narrative {
get baseName() {
return this.baseParts.join("/");
}
subresource(type) {
return new UrlDefinedNarrativeSubresource(this, type);
}
}

class UrlDefinedNarrativeSubresource extends NarrativeSubresource {
get baseName() {
return this.resource.baseName;
}
}


Expand Down Expand Up @@ -566,31 +624,13 @@ class S3Source extends Source {
}

class PrivateS3Source extends S3Source {
dataset(pathParts) {
return new PrivateS3Dataset(this, pathParts);
}
narrative(pathParts) {
return new PrivateS3Narrative(this, pathParts);
}
static visibleToUser(user) { // eslint-disable-line no-unused-vars
throw new Error("visibleToUser() must be implemented explicitly by subclasses (not inherited from PrivateS3Source)");
}
}

class PrivateS3Dataset extends Dataset {
async urlFor(type, method = 'GET') {
return S3.getSignedUrl(method === "HEAD" ? "headObject" : "getObject", {
Bucket: this.source.bucket,
Key: this.baseNameFor(type)
});
}
}

class PrivateS3Narrative extends Narrative {
async url(method = 'GET') {
async urlFor(path, method = 'GET') {
return S3.getSignedUrl(method === "HEAD" ? "headObject" : "getObject", {
Bucket: this.source.bucket,
Key: this.baseName
Bucket: this.bucket,
Key: path
});
}
}
Expand Down

0 comments on commit a4bd2b5

Please sign in to comment.