Description
Based on usage of Tapir in commercial project. Whenever one of the input/output types in the Endpoint
type is contains an intersection type the compilation type are ~4x longer when compared with the same type using tuple instead of intersection type.
In the original project compilation time of single endpoint val could take up to 1s - mostly due to additional implicit search for Codec instances and more complex method signatures. It is a blocker for finishing migration from Scala 2.13 to Scala 3, due to 4x longer compilation times (~90s Scala 2.13, 5 min Scala 3)
Compiler version
Tested only on fork #19897 containing prototype support for -Yprofile-trace. The issue can be observed in original codebase in all Scala 3.3 and 3.4 versions.
Minimized code
To be able to reproduce the compilation times observed in the original project the code cannot be fully minimized. Tapir uses implicits to infer result types for concatenation of input/output parameters, requiring us to include this mechanism in simplified form. Whole code takes ~200 lines of code.
Reordering in
/out
method calls order can impact compilation times.
// Repro.scala
import scala.concurrent.Future
object Repro {
trait ApiError
trait AuthInput
trait AuthOutput
trait AuthenticatedRequestContext
trait ObjectId
trait Output
type AuthenticatedEndpoint =
PartialServerEndpointWithSecurityOutput[
AuthInput,
AuthenticatedRequestContext,
Unit,
ApiError,
AuthOutput,
Unit,
Any,
Future
]
class BaseEndpoints {
val authenticatedEndpoint: AuthenticatedEndpoint = ???
}
class Endpoints(val baseEndpoints: BaseEndpoints) extends Tapir {
def jsonBody[T]: EndpointIO[String] = ???
trait Foo
trait Tag[T]
// Type annotation based on compiler outputs using -Xprint:typer
val baseEndpointWithIntersection
: Endpoints.this.baseEndpoints.authenticatedEndpoint.EndpointType[
AuthInput,
ObjectId & Foo,
ApiError,
Unit,
Any
] = baseEndpoints.authenticatedEndpoint.in(path[ObjectId & Foo]("arg"))
// Type annotation based on compiler outputs using -Xprint:typer
val baseEndpointNoIntersection: Endpoints.this.baseEndpoints.authenticatedEndpoint.EndpointType[
AuthInput,
(ObjectId, Foo),
ApiError,
Unit,
Any
] = baseEndpoints.authenticatedEndpoint.in(path[(ObjectId, Foo)]("arg"))
def queryString: EndpointInput[String] = ???
def queryMaybeString: EndpointInput[Option[String]] = ???
// Takes ~130ms - initial object to enforce classloading so the later measurments can be more
object T0 {
val groupCalendarNoLogic =
baseEndpoints.authenticatedEndpoint
.in("calendar")
.in(queryString.and(queryMaybeString))
.get
.out(jsonBody[Output])
}
// Takes ~260 ms
object T1 {
val groupCalendarNoLogic =
baseEndpointWithIntersection
.in("calendar")
.in(queryString.and(queryMaybeString))
.get
.out(jsonBody[Output])
}
// Takes 34ms
object T2 {
val groupCalendarNoLogic =
baseEndpointNoIntersection
.in("calendar")
.in(queryString.and(queryMaybeString))
.get
.out(jsonBody[Output])
}
// Same as T1, take ~140ms, faster due to caching in compiler + JVM warmup
object T1Bis {
val groupCalendarNoLogic =
baseEndpointWithIntersection
.in("calendar")
.in(queryString.and(queryMaybeString))
.get
.out(jsonBody[Output])
}
// Same as T2, takes ~18ms
object T2Bis {
val groupCalendarNoLogic =
baseEndpointNoIntersection
.in("calendar")
.in(queryString.and(queryMaybeString))
.get
.out(jsonBody[Output])
}
}
}
// api.scala
trait Tapir {
implicit def stringToPath(s: String): EndpointInput[Unit] = ???
def path[T](name: String): EndpointInput[T] = ???
def stringBody: EndpointIO[String] = ???
}
trait EndpointInput[T] {
def and[U, TU](other: EndpointInput[U])(implicit concat: ParamConcat.Aux[T, U, TU]): EndpointInput[TU]
}
sealed trait EndpointOutput[T] {
def and[J, IJ](other: EndpointOutput[J])(implicit concat: ParamConcat.Aux[T, J, IJ]): EndpointOutput[IJ] = ???
}
sealed trait EndpointIO[T] extends EndpointInput[T] with EndpointOutput[T] {
def and[J, IJ](other: EndpointIO[J])(implicit concat: ParamConcat.Aux[T, J, IJ]): EndpointIO[IJ] = ???
}
trait PartialServerEndpointWithSecurityOutput[ SECURITY_INPUT, PRINCIPAL, INPUT, ERROR_OUTPUT, SECURITY_OUTPUT, OUTPUT, -R, F[_]]
extends EndpointInputsOps[SECURITY_INPUT, INPUT, ERROR_OUTPUT, OUTPUT, R]
with EndpointOutputsOps[SECURITY_INPUT, INPUT, ERROR_OUTPUT, OUTPUT, R]
{ outer =>
override type EndpointType[_A, _I, _E, _O, -_R] =
PartialServerEndpointWithSecurityOutput[_A, PRINCIPAL, _I, _E, SECURITY_OUTPUT, _O, _R, F]
}
trait EndpointInputsOps[A, I, E, O, -R] {
type EndpointType[_A, _I, _E, _O, -_R]
def get: EndpointType[A, I, E, O, R] = ???
def in[J, IJ](i: EndpointInput[J])(implicit concat: ParamConcat.Aux[I, J, IJ]): EndpointType[A, IJ, E, O, R] = ???
}
trait EndpointOutputsOps[A, I, E, O, -R] {
type EndpointType[_A, _I, _E, _O, -_R]
def out[P, OP](i: EndpointOutput[P])(implicit ts: ParamConcat.Aux[O, P, OP]): EndpointType[A, I, E, OP, R] = ???
}
// typelevel.scala
import TupleOps.{AppendOne, FoldLeft}
trait ParamConcat[T, U] { type Out }
object ParamConcat extends LowPriorityTupleConcat3 {
implicit def concatUnitLeft[U](implicit ua: TupleArity[U]): Aux[Unit, U, U] = ???
}
trait LowPriorityTupleConcat3 extends LowPriorityTupleConcat1 {
implicit def concatUnitRight[T](implicit ta: TupleArity[T]): Aux[T, Unit, T] = ???
}
trait LowPriorityTupleConcat1 extends LowPriorityTupleConcat0 {
implicit def concatSingleAndTuple[T, U, TU](implicit tc: TupleOps.JoinAux[Tuple1[T], U, TU], ua: TupleArity[U]): Aux[T, U, TU] = ???
}
trait LowPriorityTupleConcat0 {
type Aux[T, U, TU] = ParamConcat[T, U] { type Out = TU }
implicit def concatSingleAndSingle[T, U, TU](implicit tc: TupleOps.JoinAux[Tuple1[T], Tuple1[U], TU]): Aux[T, U, TU] = ???
}
trait TupleArity[T] { def arity: Int}
object TupleArity extends LowPriorityTupleArity {
implicit def tupleArity2[A1, A2]: TupleArity[(A1, A2)] = ???
}
trait LowPriorityTupleArity {
implicit def tupleArity1[A]: TupleArity[A] = ???
}
object TupleOps {
trait AppendOne[P, S] {
type Out
def apply(prefix: P, last: S): Out
}
object AppendOne extends TupleAppendOneInstances
trait FoldLeft[In, T, Op] {
type Out
def apply(zero: In, tuple: T): Out
}
object FoldLeft extends TupleFoldInstances
trait Join[P, S] {
type Out
def apply(prefix: P, suffix: S): Out
}
type JoinAux[P, S, O] = Join[P, S] { type Out = O }
object Join extends LowLevelJoinImplicits {
implicit def join0P[T]: JoinAux[Unit, T, T] = ???
object Fold {
implicit def step[T, A](implicit append: AppendOne[T, A]): Case[T, A, Fold.type] { type Out = append.Out } = ???
}
}
sealed abstract class LowLevelJoinImplicits {
implicit def join[P, S](implicit fold: FoldLeft[P, S, Join.Fold.type]): JoinAux[P, S, fold.Out] = ???
}
}
sealed trait Case[A, B, Op] { type Out }
abstract class TupleFoldInstances {
type Aux[In, T, Op, Out0] = FoldLeft[In, T, Op] { type Out = Out0 }
implicit def t1[In, A, Op](implicit f: Case[In, A, Op]): Aux[In, Tuple1[A], Op, f.Out] = ???
implicit def t2[In, T1, X, T2, Op](implicit fold: Aux[In, Tuple1[T1], Op, X], f: Case[X, T2, Op]): Aux[In, Tuple2[T1, T2], Op, f.Out] = ???
}
abstract class TupleAppendOneInstances {
type Aux[P, S, Out0] = AppendOne[P, S] { type Out = Out0 }
implicit def append1[T1, L]: Aux[Tuple1[T1], L, Tuple2[T1, L]] = ???
implicit def append2[T1, T2, L]: Aux[Tuple2[T1,T2], L, Tuple3[T1, T2, L]] = ???
}
Output
Compilation times based on -Yprofile-trace in Scala 3:
T0
- no intersection type or tuple type, doing classloading: 130ms
T1
/ T1Bis
- using intersection types - 257ms / 148 ms
T2
/ T2Bis
- using tuple types - 34 ms / 18 ms
Compilation times based on -Yprofile-trace in Scala 2.13:
T0
- no intersection type or tuple type, doing classloading: 134ms
T1
/ T1Bis
- using intersection types - 48ms / 40 ms
T2
/ T2Bis
- using tuple types - 45 ms / 39 ms
Scala 3.5-nightly -Yprofile-trace (modified to show Apply trees)
The blank spots after apply x
is probably a time spent in type comparer. Async-profiler flamegraph available below
Scala 2.13 -Yprofile-trace
Archive below contains original outputs of -Yprofile-trace and flamegraph generated by async-profiler
profiler-outputs.zip
Type presented in presenation compiler
Expectation
Intersection types should affect compilation times with no more then 20% overhead.