From 386e92f8f8beaab4e19ce97389cbd980c2493885 Mon Sep 17 00:00:00 2001 From: Arnold Lin Date: Fri, 16 Oct 2020 13:44:37 -0400 Subject: [PATCH] Remove userInfo from Conda environment lists (#714) * drop user info when ingesting conda list explicit uris * add test * special taste of scala * more test * fix test * more docs * 1 more docs --- .../spark/api/conda/CondaEnvironment.scala | 2 ++ .../api/conda/CondaEnvironmentManager.scala | 22 ++++++++++++++++++- .../conda/CondaEnvironmentManagerTest.scala | 13 +++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/api/conda/CondaEnvironment.scala b/core/src/main/scala/org/apache/spark/api/conda/CondaEnvironment.scala index eed7ef583be01..ef8b90518a0c5 100644 --- a/core/src/main/scala/org/apache/spark/api/conda/CondaEnvironment.scala +++ b/core/src/main/scala/org/apache/spark/api/conda/CondaEnvironment.scala @@ -121,6 +121,8 @@ final class CondaEnvironment( *
  • In {@code Solve} mode, list resolved packages into a specfile * and use that on executors.
  • * + * Always using {@code File} mode for executors reduces conda init time by avoiding + * re-solving conda deps. */ def buildSetupInstructions: CondaSetupInstructions = { bootstrapMode match { diff --git a/core/src/main/scala/org/apache/spark/api/conda/CondaEnvironmentManager.scala b/core/src/main/scala/org/apache/spark/api/conda/CondaEnvironmentManager.scala index a4bd10b781338..b91d725b75143 100644 --- a/core/src/main/scala/org/apache/spark/api/conda/CondaEnvironmentManager.scala +++ b/core/src/main/scala/org/apache/spark/api/conda/CondaEnvironmentManager.scala @@ -75,12 +75,24 @@ final class CondaEnvironmentManager(condaBinaryPath: String, 0.until(verbosity).map(_ => "-v").toList } + /** + * List of exact uris of the packages in the solved environment, dropping any credential + * information (user info). + * + * This method is used by executors to obtain specfiles for repro-ing conda envs. File mode + * creation expects pkg uris without user info, so we need to drop them before returning. + * @param envDir + * @return List of uris + */ def listPackagesExplicit(envDir: String): List[String] = { logInfo("Retrieving a conda environment's list of installed packages") val command = Process(List(condaBinaryPath, "list", "-p", envDir, "--explicit"), None) val out = runOrFail(command, "retrieving the conda installation's list of installed packages") - out.split("\n").filterNot(line => line.startsWith("#") || line.startsWith("@")).toList + out.split("\n") + .filterNot(line => line.startsWith("#") || line.startsWith("@")) + .map(CondaEnvironmentManager.dropUserInfo) + .toList } def createWithMode( @@ -327,6 +339,14 @@ object CondaEnvironmentManager extends Logging { httpUrlToken.matcher(line).replaceAll("$1") } + /** + * Safely dropping the userInfo component in URI via UriBuilder. + * UriBuilder can safely alter URI components without throwing exceptions. + */ + private[conda] def dropUserInfo(uri: String): String = { + UriBuilder.fromUri(uri).userInfo(null).build().toString + } + def fromConf(sparkConf: SparkConf): CondaEnvironmentManager = { val condaBinaryPath = sparkConf.get(CONDA_BINARY_PATH).getOrElse( sys.error(s"Expected config ${CONDA_BINARY_PATH.key} to be set")) diff --git a/core/src/test/scala/org/apache/spark/api/conda/CondaEnvironmentManagerTest.scala b/core/src/test/scala/org/apache/spark/api/conda/CondaEnvironmentManagerTest.scala index db1cf3197866a..9b9dada80075a 100644 --- a/core/src/test/scala/org/apache/spark/api/conda/CondaEnvironmentManagerTest.scala +++ b/core/src/test/scala/org/apache/spark/api/conda/CondaEnvironmentManagerTest.scala @@ -71,4 +71,17 @@ class CondaEnvironmentManagerTest extends org.apache.spark.SparkFunSuite with Te "via spark.conda.bootstrapPackageUrlsUserInfo.") .equals(thrown.getMessage)) } + + test("CondaEnvironmentManager.dropUserInfo") { + val packageUrl = "https://x-5.bar/linux-64/package-0.0.1-py_0.tar.bz2" + assert(CondaEnvironmentManager.dropUserInfo(packageUrl) == packageUrl) + assert(CondaEnvironmentManager.dropUserInfo( + "https://a:b@x-5.bar/linux-64/package-0.0.1-py_0.tar.bz2") == packageUrl) + assert(CondaEnvironmentManager.dropUserInfo( + "https://:b@x-5.bar/linux-64/package-0.0.1-py_0.tar.bz2") == packageUrl) + assert(CondaEnvironmentManager.dropUserInfo( + "https://a:@x-5.bar/linux-64/package-0.0.1-py_0.tar.bz2") == packageUrl) + assert(CondaEnvironmentManager.dropUserInfo( + "https://:Bearer bf.ghi@x-5.bar/linux-64/package-0.0.1-py_0.tar.bz2") == packageUrl) + } }