Skip to content

Commit

Permalink
Language and Version inference (broadinstitute#3676)
Browse files Browse the repository at this point in the history
Allow language factories to infer appropriateness
  • Loading branch information
cjllanwarne authored May 23, 2018
1 parent 385fe3f commit 48e191c
Show file tree
Hide file tree
Showing 18 changed files with 140 additions and 85 deletions.
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
name: cwl_optionals
testFormat: workflowsuccess
workflowType: CWL
workflowTypeVersion: v1.0

files {
workflow: cwl_optionals/cwl_optionals.cwl
inputs: cwl_optionals/cwl_optionals.json
}

metadata {
"submittedFiles.workflowType": CWL
"submittedFiles.workflowTypeVersion": v1.0
"actualWorkflowLanguage": CWL
"actualWorkflowLanguageVersion": v1.0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: draft3_infer_version
testFormat: workflowsuccess
# Note: no workflowType specified!
# Note: no workflowTypeVersion specified!
tags: ["wdl_1.0"]

files {
workflow: wdl_draft3/draft3_infer_version/draft3_infer_version.wdl
}

metadata {
workflowName: draft3_infer_version
status: Succeeded
"outputs.draft3_infer_version.j": 660
"actualWorkflowLanguage": WDL
"actualWorkflowLanguageVersion": 1.0
}
2 changes: 2 additions & 0 deletions centaur/src/main/resources/standardTestCases/forkjoin.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ files {

metadata {
workflowName: forkjoin
"actualWorkflowLanguage": WDL
"actualWorkflowLanguageVersion": draft-2
"calls.forkjoin.wc.outputs.count": 1000
"calls.forkjoin.grep.outputs.count": 21
"calls.forkjoin.join.outputs.proportion": 47
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

# Some heading commentary...
version 1.0

workflow draft3_infer_version {
input {
Int i = 12
}

output {
Int j = i * 55
}
}

This file was deleted.

This file was deleted.

6 changes: 3 additions & 3 deletions core/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,6 @@ workflow-options {
# Workflow-failure-mode determines what happens to other calls when a call fails. Can be either ContinueWhilePossible or NoNewCalls.
# Can also be overridden in workflow options. Defaults to NoNewCalls. Uncomment to change:
#workflow-failure-mode: "ContinueWhilePossible"

# When a workflow type is not provided on workflow submission, this specifies the default type.
default.workflow-type: "WDL"
}

# Optional call-caching configuration.
Expand Down Expand Up @@ -283,8 +280,10 @@ engine {
}

languages {
default: WDL
WDL {
versions {
default: "draft-2"
"draft-2" {
language-factory = "languages.wdl.draft2.WdlDraft2LanguageFactory"
config {
Expand All @@ -304,6 +303,7 @@ languages {
}
CWL {
versions {
default: "v1.0"
"v1.0" {
language-factory = "languages.cwl.CwlV1_0LanguageFactory"
config {
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/scala/cromwell/core/WorkflowMetadataKeys.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ object WorkflowMetadataKeys {
val ParentWorkflowId = "parentWorkflowId"
val RootWorkflowId = "rootWorkflowId"

val LanguageName = "actualWorkflowLanguage"
val LanguageVersionName = "actualWorkflowLanguageVersion"

val SubmissionSection = "submittedFiles"
val SubmissionSection_Workflow = "workflow"
val SubmissionSection_Root = "root"
Expand Down
5 changes: 0 additions & 5 deletions core/src/main/scala/cromwell/core/WorkflowOptions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package cromwell.core
import com.typesafe.config.ConfigFactory
import common.util.TryUtil
import spray.json._
import net.ceedubs.ficus.Ficus._

import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}
Expand Down Expand Up @@ -132,10 +131,6 @@ object WorkflowOptions {
}

val empty = WorkflowOptions.fromMap(Map.empty).get

lazy val defaultWorkflowType: Option[String] = Option(WorkflowOptionsConf.getString("default.workflow-type"))
lazy val defaultWorkflowTypeVersion: Option[String] =
WorkflowOptionsConf.as[Option[String]]("default.workflow-type-version")
}

case class WorkflowOptions(jsObject: JsObject) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@ import cromwell.engine.language.CromwellLanguages._
import cromwell.languages.LanguageFactory

// Construct a singleton instance of this class using 'initLanguages' below.
final case class CromwellLanguages private(languageConfig: List[LanguageConfigurationEntry]) {
final case class CromwellLanguages private(languageConfig: LanguagesConfiguration) {

val languages: Map[CromwellLanguageName, LanguageVersions] = makeLanguages
val default: LanguageVersions = languages.find(lang => languageConfig.default.contains(lang._1)).getOrElse(languages.head)._2

private def makeLanguages = (languageConfig map { lc =>
private def makeLanguages: Map[CromwellLanguageName, LanguageVersions] = (languageConfig.languages map { lc =>
val versions = lc.versions map { case (languageVersion, languageConfigEntryFields) =>
languageVersion -> makeLanguageFactory(languageConfigEntryFields.className, languageConfigEntryFields.config)
}
val default: LanguageFactory = versions.find(v => lc.default.contains(v._1)).getOrElse(versions.head)._2

lc.name.toUpperCase -> LanguageVersions(versions)
lc.name.toUpperCase -> LanguageVersions(versions, default)
}).toMap


private def makeLanguageFactory(className: String, config: Map[String, Any]) = {
Class.forName(className)
.getConstructor(classOf[Map[String, Any]])
Expand All @@ -28,7 +29,7 @@ final case class CromwellLanguages private(languageConfig: List[LanguageConfigur
/**
* Holds all the registered versions of a language.
*/
final case class LanguageVersions private(allVersions: Map[CromwellLanguageVersion, LanguageFactory])
final case class LanguageVersions private(allVersions: Map[CromwellLanguageVersion, LanguageFactory], default: LanguageFactory)

object CromwellLanguages {
type CromwellLanguageName = String
Expand All @@ -37,7 +38,7 @@ object CromwellLanguages {
private var _instance: CromwellLanguages = _
lazy val instance: CromwellLanguages = _instance

def initLanguages(backendEntries: List[LanguageConfigurationEntry]): Unit = {
def initLanguages(backendEntries: LanguagesConfiguration): Unit = {
_instance = CromwellLanguages(backendEntries)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,36 @@ import cromwell.engine.language.CromwellLanguages.{CromwellLanguageName, Cromwel

import scala.collection.JavaConverters._

final case class LanguageConfigurationEntry(name: CromwellLanguageName, versions: Map[CromwellLanguageVersion, LanguageConfigurationEntryFields])
final case class LanguageConfigurationEntryFields(className: String, config: Map[String, Any])
final case class LanguagesConfiguration(languages: List[LanguageVersionConfigurationEntry], default: Option[String])
final case class LanguageVersionConfigurationEntry(name: CromwellLanguageName, versions: Map[CromwellLanguageVersion, LanguageVersionConfigurationEntryFields], default: Option[String])
final case class LanguageVersionConfigurationEntryFields(className: String, config: Map[String, Any])

object LanguageConfiguration {
private val LanguagesConfig = ConfigFactory.load.getConfig("languages")
private val LanguageNames: Set[String] = LanguagesConfig.entrySet().asScala.map(findFirstKey).toSet
private val DefaultLanguageName: Option[String] = if (LanguagesConfig.hasPath("default")) Option(LanguagesConfig.getString("default")) else None

val AllLanguageEntries: List[LanguageConfigurationEntry] = LanguageNames.toList map { languageName =>
private val LanguageNames: Set[String] = LanguagesConfig.entrySet().asScala.map(findFirstKey).filterNot(_ == "default").toSet

val languageConfig = LanguagesConfig.getConfig(languageName)
val versionSet = languageConfig.getConfig("versions")
val languageVersionNames: Set[String] = versionSet.entrySet().asScala.map(findFirstKey).toSet
val AllLanguageEntries: LanguagesConfiguration = {
val languages = LanguageNames.toList map { languageName =>

val versions = (languageVersionNames.toList map { languageVersionName =>
val configEntry = versionSet.getConfig(s""""$languageVersionName"""")
val className: String = configEntry.getString("language-factory")
val factoryConfig: Map[String, Any] = if (configEntry.hasPath("config")) { configEntry.getObject("config").unwrapped().asScala.toMap } else Map.empty[String, Any]
val fields = LanguageConfigurationEntryFields(className, factoryConfig)
languageVersionName -> fields
}).toMap
val languageConfig = LanguagesConfig.getConfig(languageName)
val defaultVersionName: Option[String] = if (LanguagesConfig.hasPath("default")) { Option(LanguagesConfig.getString("default")) } else None
val versionSet = languageConfig.getConfig("versions")
val languageVersionNames: Set[String] = versionSet.entrySet().asScala.map(findFirstKey).filterNot(_ == "default").toSet

LanguageConfigurationEntry(languageName, versions)
val versions = (languageVersionNames.toList map { languageVersionName =>
val configEntry = versionSet.getConfig(s""""$languageVersionName"""")
val className: String = configEntry.getString("language-factory")
val factoryConfig: Map[String, Any] = if (configEntry.hasPath("config")) { configEntry.getObject("config").unwrapped().asScala.toMap } else Map.empty[String, Any]
val fields = LanguageVersionConfigurationEntryFields(className, factoryConfig)
languageVersionName -> fields
}).toMap

LanguageVersionConfigurationEntry(languageName, versions, defaultVersionName)
}

LanguagesConfiguration(languages, DefaultLanguageName)
}

// Gets the first key in a hocon key entry (which might contain several keys, perhaps surrounded by quotes)
Expand All @@ -42,12 +50,15 @@ object LanguageConfiguration {
// Called on the entry on line 2, returns '1.0'
// Called on the entry on line 3, returns 'x.y'
private def findFirstKey(entry: Entry[String, _]): String = {
val NoQuoteFirstKey = """([^.]*)\.(.*)""".r
val NoQuoteFirstKey = """([^/.]*)\.(.*)""".r
val SingleQuotedFirstKey = """"(.*)"\.(.*)""".r
val NoQuoteNoDot = """([^\.\"]*)""".r

entry.getKey match {
case SingleQuotedFirstKey(firstKey, _) => firstKey
case NoQuoteFirstKey(firstKey, _) => firstKey
case NoQuoteNoDot(key) => key
case other => throw new IllegalArgumentException(s"Unexpected key format: $other")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import cats.syntax.validated._
import com.typesafe.config.Config
import com.typesafe.scalalogging.LazyLogging
import common.exception.{AggregatedMessageException, MessageAggregation}
import common.validation.Checked._
import common.validation.ErrorOr._
import common.validation.Parse._
import cromwell.backend.BackendWorkflowDescriptor
Expand Down Expand Up @@ -218,19 +217,30 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef,
workflowOptions: WorkflowOptions,
pathBuilders: List[PathBuilder],
engineIoFunctions: EngineIoFunctions): Parse[EngineWorkflowDescriptor] = {
val namespaceValidation: Parse[ValidatedWomNamespace] = sourceFiles.workflowType match {
def chooseFactory(factories: List[LanguageFactory]): Option[LanguageFactory] = factories.find(_.looksParsable(sourceFiles.workflowSource))

val factory: ErrorOr[LanguageFactory] = sourceFiles.workflowType match {
case Some(languageName) if CromwellLanguages.instance.languages.contains(languageName.toUpperCase) =>
val language = CromwellLanguages.instance.languages(languageName.toUpperCase)
val factory: ErrorOr[LanguageFactory] = sourceFiles.workflowTypeVersion match {
sourceFiles.workflowTypeVersion match {
case Some(v) if language.allVersions.contains(v) => language.allVersions(v).valid
case Some(other) => s"Unknown version '$other' for workflow language '$languageName'".invalidNel
case _ => language.allVersions.head._2.valid
case _ => chooseFactory(language.allVersions.values.toList).getOrElse(language.default).valid
}
errorOrParse(factory).flatMap(_.validateNamespace(sourceFiles, workflowOptions, importLocalFilesystem, workflowIdForLogging, engineIoFunctions))
case Some(other) => fromEither[IO](s"Unknown workflow type: $other".invalidNelCheck[ValidatedWomNamespace])
case None => fromEither[IO]("Need a workflow type here !".invalidNelCheck[ValidatedWomNamespace])
case Some(other) => s"Unknown workflow type: $other".invalidNel[LanguageFactory]
case None =>
val allFactories = CromwellLanguages.instance.languages.values.flatMap(_.allVersions.values)
chooseFactory(allFactories.toList).getOrElse(CromwellLanguages.instance.default.default).validNel
}

factory foreach { validFactory =>
workflowLogger.info(s"Parsing workflow as ${validFactory.languageName} ${validFactory.languageVersionName}")
pushLanguageToMetadata(validFactory.languageName, validFactory.languageVersionName)
}

val namespaceValidation: Parse[ValidatedWomNamespace] =
errorOrParse(factory).flatMap(_.validateNamespace(sourceFiles, workflowOptions, importLocalFilesystem, workflowIdForLogging, engineIoFunctions))

val labels = convertJsonToLabels(sourceFiles.labelsJson)

for {
Expand All @@ -249,6 +259,14 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef,
serviceRegistryActor ! PutMetadataAction(importsMetadata.toVector ++ wfInputsMetadataEvents :+ wfNameMetadataEvent)
}

private def pushLanguageToMetadata(languageName: String, languageVersion: String): Unit = {
val events = List (
MetadataEvent(MetadataKey(workflowIdForLogging, None, WorkflowMetadataKeys.LanguageName), MetadataValue(languageName)),
MetadataEvent(MetadataKey(workflowIdForLogging, None, WorkflowMetadataKeys.LanguageVersionName), MetadataValue(languageVersion))
)
serviceRegistryActor ! PutMetadataAction(events)
}

private def wfInputsMetadata(workflowInputs: Map[OutputPort, WomValue]): Iterable[MetadataEvent] = {
import cromwell.core.CromwellGraphNode.CromwellEnhancedOutputPort

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,20 +176,6 @@ object PartialWorkflowSources {
def validateOptions(options: Option[WorkflowOptionsJson]): ErrorOr[WorkflowOptions] =
WorkflowOptions.fromJsonString(options.getOrElse("{}")).toErrorOr leftMap { _ map { i => s"Invalid workflow options provided: $i" } }

def validateWorkflowType(partialSource: PartialWorkflowSources): ErrorOr[Option[WorkflowType]] = {
partialSource.workflowType match {
case Some(_) => partialSource.workflowType.validNel
case None => WorkflowOptions.defaultWorkflowType.validNel
}
}

def validateWorkflowTypeVersion(partialSource: PartialWorkflowSources): ErrorOr[Option[WorkflowTypeVersion]] = {
partialSource.workflowTypeVersion match {
case Some(src) => Option(src).validNel
case None => WorkflowOptions.defaultWorkflowTypeVersion.validNel
}
}

def validateLabels(labels: WorkflowJson) : ErrorOr[WorkflowJson] = {

def validateKeyValuePair(key: String, value: String): ErrorOr[Unit] = (Label.validateLabelKey(key), Label.validateLabelValue(value)).tupled.void
Expand All @@ -212,14 +198,14 @@ object PartialWorkflowSources {
partialSources match {
case Valid(partialSource) =>
(validateInputs(partialSource),
validateOptions(partialSource.workflowOptions), validateWorkflowType(partialSource),
validateWorkflowTypeVersion(partialSource), validateLabels(partialSource.customLabels.getOrElse("{}"))) mapN {
case (wfInputs, wfOptions, workflowType, workflowTypeVersion, workflowLabels) =>
validateOptions(partialSource.workflowOptions),
validateLabels(partialSource.customLabels.getOrElse("{}"))) mapN {
case (wfInputs, wfOptions, workflowLabels) =>
wfInputs.map(inputsJson => WorkflowSourceFilesCollection(
workflowSource = partialSource.workflowSource,
workflowRoot = partialSource.workflowRoot,
workflowType = workflowType,
workflowTypeVersion = workflowTypeVersion,
workflowType = partialSource.workflowType,
workflowTypeVersion = partialSource.workflowTypeVersion,
inputsJson = inputsJson,
workflowOptionsJson = wfOptions.asPrettyJson,
labelsJson = workflowLabels,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ import wom.expression.IoFunctionSet

class CwlV1_0LanguageFactory(override val config: Map[String, Any]) extends LanguageFactory {

override val languageName: String = "CWL"
override val languageVersionName: String = "v1.0"

override def validateNamespace(source: WorkflowSourceFilesCollection,
workflowOptions: WorkflowOptions,
importLocalFilesystem: Boolean,
Expand Down Expand Up @@ -62,4 +65,9 @@ class CwlV1_0LanguageFactory(override val config: Map[String, Any]) extends Lang

override def createExecutable(womBundle: WomBundle, inputs: WorkflowJson, ioFunctions: IoFunctionSet): Checked[ValidatedWomNamespace] =
standardConfig.enabledCheck flatMap { _ => "No createExecutable method implemented in CWL v1".invalidNelCheck }

override def looksParsable(content: String): Boolean = content.lines.exists { l =>
val trimmed = l.trim.stripSuffix(",")
trimmed == """"cwlVersion": "v1.0"""" || trimmed == "cwlVersion: v1.0"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import wom.expression.IoFunctionSet

trait LanguageFactory {

def languageName: String
def languageVersionName: String

// Passed in by the constructor:
def config: Map[String, Any]

Expand All @@ -30,5 +33,10 @@ trait LanguageFactory {
importLocalFilesystem: Boolean,
workflowIdForLogging: WorkflowId,
ioFunctions: IoFunctionSet): Parse[ValidatedWomNamespace]
}

/**
* In case no version is specified: does this language factory feel like it might be suitable for this file?
* @param content The workflow description
*/
def looksParsable(content: String): Boolean
}
Loading

0 comments on commit 48e191c

Please sign in to comment.