-
-
Notifications
You must be signed in to change notification settings - Fork 345
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Detect assemblies with too many entries to fail shell script prepending #3140
Merged
Merged
Changes from 7 commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
e98c4da
Analyze assembly error - Reproduction of #528 and #2650
lefou ef73889
Improved test
lefou 6693a8a
Fixed typo
lefou 3f8209a
Detect assemblies with too many entries to fail shell script prepending
lefou 725b3b2
Refined test
lefou a5c7455
Converted assembly test into unit test
lefou 2b78fab
Refined error message
lefou 90563d7
Fix changed dependency path in example test
lefou 75bff80
Added some log output when upstreamAssembly is used
lefou File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
package mill.scalalib | ||
|
||
import mill._ | ||
import mill.api.Result | ||
import mill.eval.Evaluator | ||
import mill.util.{Jvm, TestEvaluator, TestUtil} | ||
import utest._ | ||
import utest.framework.TestPath | ||
|
||
import java.io.PrintStream | ||
|
||
// Ensure the assembly is runnable, even if we have assembled lots of dependencies into it | ||
// Reproduction of issues: | ||
// - https://github.com/com-lihaoyi/mill/issues/528 | ||
// - https://github.com/com-lihaoyi/mill/issues/2650 | ||
|
||
object AssemblyTests extends TestSuite { | ||
|
||
object TestCase extends TestUtil.BaseModule { | ||
trait Setup extends ScalaModule { | ||
def scalaVersion = "2.13.11" | ||
def sources = T.sources(T.workspace / "src") | ||
def ivyDeps = super.ivyDeps() ++ Agg( | ||
ivy"com.lihaoyi::scalatags:0.8.2", | ||
ivy"com.lihaoyi::mainargs:0.4.0", | ||
ivy"org.apache.avro:avro:1.11.1" | ||
) | ||
} | ||
trait ExtraDeps extends ScalaModule { | ||
def ivyDeps = super.ivyDeps() ++ Agg( | ||
ivy"dev.zio::zio:2.0.15", | ||
ivy"org.typelevel::cats-core:2.9.0", | ||
ivy"org.apache.spark::spark-core:3.4.0", | ||
ivy"dev.zio::zio-metrics-connectors:2.0.8", | ||
ivy"dev.zio::zio-http:3.0.0-RC2" | ||
) | ||
} | ||
|
||
object noExe extends Module { | ||
object small extends Setup { | ||
override def prependShellScript: T[String] = "" | ||
} | ||
object large extends Setup with ExtraDeps { | ||
override def prependShellScript: T[String] = "" | ||
} | ||
} | ||
|
||
object exe extends Module { | ||
object small extends Setup | ||
object large extends Setup with ExtraDeps | ||
} | ||
|
||
} | ||
|
||
val sources = Map( | ||
os.rel / "src" / "Main.scala" -> | ||
"""package ultra | ||
| | ||
|import scalatags.Text.all._ | ||
|import mainargs.{main, ParserForMethods} | ||
| | ||
|object Main { | ||
| def generateHtml(text: String) = { | ||
| h1(text).toString | ||
| } | ||
| | ||
| @main | ||
| def main(text: String) = { | ||
| println(generateHtml(text)) | ||
| } | ||
| | ||
| def main(args: Array[String]): Unit = ParserForMethods(this).runOrExit(args) | ||
|}""".stripMargin | ||
) | ||
|
||
def workspaceTest[T]( | ||
m: TestUtil.BaseModule, | ||
env: Map[String, String] = Evaluator.defaultEnv, | ||
debug: Boolean = false, | ||
errStream: PrintStream = System.err | ||
)(t: TestEvaluator => T)(implicit tp: TestPath): T = { | ||
val eval = new TestEvaluator(m, env = env, debugEnabled = debug, errStream = errStream) | ||
os.remove.all(m.millSourcePath) | ||
sources.foreach { case (file, content) => | ||
os.write(m.millSourcePath / file, content, createFolders = true) | ||
} | ||
os.remove.all(eval.outPath) | ||
os.makeDir.all(m.millSourcePath / os.up) | ||
t(eval) | ||
} | ||
|
||
def runAssembly(file: os.Path, wd: os.Path, checkExe: Boolean = false): Unit = { | ||
println(s"File size: ${os.stat(file).size}") | ||
Jvm.runSubprocess( | ||
commandArgs = Seq(Jvm.javaExe, "-jar", file.toString(), "--text", "tutu"), | ||
envArgs = Map.empty[String, String], | ||
workingDir = wd | ||
) | ||
if (checkExe) { | ||
Jvm.runSubprocess( | ||
commandArgs = Seq(file.toString(), "--text", "tutu"), | ||
envArgs = Map.empty[String, String], | ||
workingDir = wd | ||
) | ||
} | ||
} | ||
|
||
def tests: Tests = Tests { | ||
test("Assembly") { | ||
test("noExe") { | ||
test("small") { | ||
workspaceTest(TestCase) { eval => | ||
val Right((res, _)) = eval(TestCase.noExe.small.assembly) | ||
runAssembly(res.path, TestCase.millSourcePath) | ||
} | ||
} | ||
test("large") { | ||
workspaceTest(TestCase) { eval => | ||
val Right((res, _)) = eval(TestCase.noExe.large.assembly) | ||
runAssembly(res.path, TestCase.millSourcePath) | ||
} | ||
} | ||
} | ||
test("exe") { | ||
test("small") { | ||
workspaceTest(TestCase) { eval => | ||
val Right((res, _)) = eval(TestCase.exe.small.assembly) | ||
runAssembly(res.path, TestCase.millSourcePath, checkExe = true) | ||
} | ||
} | ||
test("large-should-fail") { | ||
workspaceTest(TestCase) { eval => | ||
val Left(Result.Failure(msg, Some(res))) = eval(TestCase.exe.large.assembly) | ||
val expectedMsg = | ||
"""The created assembly jar contains more than 65535 ZIP entries. | ||
|JARs of that size are known to not work correctly with a prepended shell script. | ||
|Either reduce the entries count of the assembly or disable the prepended shell script with: | ||
| | ||
| def prependShellScript = "" | ||
|""".stripMargin | ||
assert(msg == expectedMsg) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if it's really worth keeping track of the number of entries incrementally? v.s. creating the assembly first, scanning it, and then failing if it has too many entries. If the performance of scanning the assembly is acceptable (milliseconds to tens of milliseconds?) then that would save us a bunch of book-keeping passing around
addedEntries
values, and a bunch of churn in replacingupstreamAssembly
withupstreamAssembly2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It probably depends. When we scan the jar right after it's written, it should be in the file system cache of any reasonable OS and scanning should be fast. On the other side, just remembering the added count is quasi for free, esp. when we assume, that the upstream-assembly is the larger portion of the assembly and keeps stable.
I think I want to experiment what the result looks like and how it performs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The issues with scanning afterwards:
noExe.large
took almost 3 seconds (75516 entries), although we just wrote itZipInputStream
andJarInputStream
aren't able to find anyZipEntry
in a prependend jar making scanning after packaging non-trivial