diff --git a/.github/workflows/ci.yml b/.github/workflows/CI.yml
similarity index 86%
rename from .github/workflows/ci.yml
rename to .github/workflows/CI.yml
index a1b6964..9a6f7b4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/CI.yml
@@ -8,7 +8,7 @@ on:
       # This is where pull requests from "bors try" are built.
       - trying
       # Build master branch.
-      - master
+      - main
 
 jobs:
   test:
@@ -18,7 +18,7 @@ jobs:
     strategy:
       matrix:
         version:
-          - '1.3'
+          - '1.4'
           - '1'
           - 'nightly'
         os:
@@ -40,14 +40,6 @@ jobs:
             arch: x86
     steps:
       - uses: actions/checkout@v2
-      - name: Install python
-        uses: actions/setup-python@v2
-        with:
-          python-version: '3.x'
-          architecture: ${{ matrix.arch }}
-      # Limitation of pip: https://pythonot.github.io/index.html#pip-installation
-      - run: python -m pip install cython numpy
-      - run: python -m pip install pot
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.version }}
@@ -63,8 +55,6 @@ jobs:
             ${{ runner.os }}-test-
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@latest
-        env:
-          PYTHON: python
       - uses: julia-actions/julia-runtest@latest
       - uses: julia-actions/julia-processcoverage@v1
         if: matrix.coverage
diff --git a/Project.toml b/Project.toml
index c6f2c66..97400d5 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "CalibrationErrorsDistributions"
 uuid = "20087e1a-bb94-462b-b900-33d17a750383"
 authors = ["David Widmann <david.widmann@it.uu.se>"]
-version = "0.1.7"
+version = "0.2.0"
 
 [deps]
 CalibrationErrors = "33913031-fe46-5864-950f-100836f47845"
@@ -12,13 +12,15 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 OptimalTransport = "7e02d93a-ae51-4f58-b602-d97af76e3b33"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+Tulip = "6dd1b50a-3aae-11e9-10b5-ef983d2400fa"
 
 [compat]
 CalibrationErrors = "0.5"
 Distances = "0.10.1"
 Distributions = "0.23, 0.24"
 KernelFunctions = "0.8, 0.9"
-OptimalTransport = "0.1.8"
+OptimalTransport = "0.2"
 PDMats = "0.10, 0.11"
 Reexport = "0.2, 1.0"
-julia = "1.3"
+Tulip = "0.7"
+julia = "1.4"
diff --git a/README.md b/README.md
index b6ddc08..721f3bb 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,10 @@
 Estimation of calibration errors for models that output probability distributions from
 [Distributions.jl](https://github.com/JuliaStats/Distributions.jl).
 
-[![Build Status](https://github.com/devmotion/CalibrationErrorsDistributions.jl/workflows/CI/badge.svg?branch=master)](https://github.com/devmotion/CalibrationErrorsDistributions.jl/actions?query=workflow%3ACI+branch%3Amaster)
+[![Build Status](https://github.com/devmotion/CalibrationErrorsDistributions.jl/workflows/CI/badge.svg?branch=main)](https://github.com/devmotion/CalibrationErrorsDistributions.jl/actions?query=workflow%3ACI+branch%3Amain)
 [![DOI](https://zenodo.org/badge/274106426.svg)](https://zenodo.org/badge/latestdoi/274106426)
-[![Coverage](https://codecov.io/gh/devmotion/CalibrationErrorsDistributions.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/devmotion/CalibrationErrorsDistributions.jl)
-[![Coverage](https://coveralls.io/repos/github/devmotion/CalibrationErrorsDistributions.jl/badge.svg?branch=master)](https://coveralls.io/github/devmotion/CalibrationErrorsDistributions.jl?branch=master)
+[![Coverage](https://codecov.io/gh/devmotion/CalibrationErrorsDistributions.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/devmotion/CalibrationErrorsDistributions.jl)
+[![Coverage](https://coveralls.io/repos/github/devmotion/CalibrationErrorsDistributions.jl/badge.svg?branch=main)](https://coveralls.io/github/devmotion/CalibrationErrorsDistributions.jl?branch=main)
 [![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle)
 [![Bors enabled](https://bors.tech/images/badge_small.svg)](https://app.bors.tech/repositories/24611)
 
@@ -26,9 +26,9 @@ statistical hypothesis tests of calibration.
 If you use CalibrationsErrorsDistributions.jl as part of your research, teaching, or other activities,
 please consider citing the following publication:
 
-Widmann, D., Lindsten, F., & Zachariah, D. (2019).
-[Calibration tests beyond classification](https://openreview.net/forum?id=-bxf89v3Nx).
-To be presented at *ICLR 2021*.
+Widmann, D., Lindsten, F., & Zachariah, D. (2019). [Calibration tests in multi-class
+classification: A unifying framework](https://proceedings.neurips.cc/paper/2019/hash/1c336b8080f82bcc2cd2499b4c57261d-Abstract.html). In
+*Advances in Neural Information Processing Systems 32 (NeurIPS 2019)* (pp. 12257–12267).
 
 Widmann, D., Lindsten, F., & Zachariah, D. (2021).
 [Calibration tests beyond classification](https://openreview.net/forum?id=-bxf89v3Nx).
diff --git a/src/CalibrationErrorsDistributions.jl b/src/CalibrationErrorsDistributions.jl
index 43eec3e..a862ddc 100644
--- a/src/CalibrationErrorsDistributions.jl
+++ b/src/CalibrationErrorsDistributions.jl
@@ -9,6 +9,7 @@ using Reexport
 using Distances: Distances
 using OptimalTransport: OptimalTransport
 using PDMats: PDMats
+using Tulip: Tulip
 
 using LinearAlgebra: LinearAlgebra
 
diff --git a/src/distances/wasserstein.jl b/src/distances/wasserstein.jl
index ce1f0c6..506b861 100644
--- a/src/distances/wasserstein.jl
+++ b/src/distances/wasserstein.jl
@@ -35,11 +35,6 @@ function (::SqWasserstein)(a::Laplace, b::Laplace)
     return abs2(μa - μb) + 2 * abs2(βa - βb)
 end
 
-# syntactic sugar
-function sqwasserstein(a::Distribution, b::Distribution)
-    return (SqWasserstein())(a, b)
-end
-
 # Wasserstein 2 distance
 struct Wasserstein <: DistributionsMetric end
 
@@ -51,16 +46,18 @@ function Distances.result_type(
 end
 
 function (::Wasserstein)(a::Distribution, b::Distribution)
-    return sqrt(sqwasserstein(a, b))
-end
-
-function wasserstein(a::Distribution, b::Distribution)
-    return (Wasserstein())(a, b)
+    return sqrt(SqWasserstein()(a, b))
 end
 
 # Mixture Wasserstein distances
-struct SqMixtureWasserstein <: DistributionsSemiMetric end
-struct MixtureWasserstein <: DistributionsMetric end
+struct SqMixtureWasserstein{S} <: DistributionsSemiMetric
+    lpsolver::S
+end
+struct MixtureWasserstein{S} <: DistributionsMetric
+    lpsolver::S
+end
+SqMixtureWasserstein() = SqMixtureWasserstein(Tulip.Optimizer())
+MixtureWasserstein() = MixtureWasserstein(Tulip.Optimizer())
 
 # result type (e.g., for pairwise computations)
 function Distances.result_type(
@@ -74,24 +71,11 @@ function Distances.result_type(
     return float(promote_type(T1, T2))
 end
 
-function (::SqMixtureWasserstein)(a::AbstractMixtureModel, b::AbstractMixtureModel)
-    probsa = probs(a)
-    componentsa = components(a)
-    probsb = probs(b)
-    componentsb = components(b)
-
-    C = Distances.pairwise(SqWasserstein(), componentsa, componentsb)
-    return OptimalTransport.emd2(probsa, probsb, C)
-end
-
-function sqmixturewasserstein(a::AbstractMixtureModel, b::AbstractMixtureModel)
-    return (SqMixtureWasserstein())(a, b)
-end
-
-function (::MixtureWasserstein)(a::AbstractMixtureModel, b::AbstractMixtureModel)
-    return sqrt(sqmixturewasserstein(a, b))
+function (s::SqMixtureWasserstein)(a::AbstractMixtureModel, b::AbstractMixtureModel)
+    C = Distances.pairwise(SqWasserstein(), components(a), components(b))
+    return OptimalTransport.emd2(probs(a), probs(b), C, s.lpsolver)
 end
 
-function mixturewasserstein(a::AbstractMixtureModel, b::AbstractMixtureModel)
-    return (MixtureWasserstein())(a, b)
+function (m::MixtureWasserstein)(a::AbstractMixtureModel, b::AbstractMixtureModel)
+    return sqrt(SqMixtureWasserstein(m.lpsolver)(a, b))
 end
diff --git a/test/Project.toml b/test/Project.toml
index d41d235..d7d9980 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -4,12 +4,12 @@ JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Tulip = "6dd1b50a-3aae-11e9-10b5-ef983d2400fa"
 
 [compat]
 Distances = "0.10.1"
 JuliaFormatter = "0.13"
 PDMats = "0.10, 0.11"
-SafeTestsets = "0.0"
+Tulip = "0.7"
 julia = "1.3"
diff --git a/test/distances/bures.jl b/test/distances/bures.jl
index 6eb0b88..5d82a34 100644
--- a/test/distances/bures.jl
+++ b/test/distances/bures.jl
@@ -1,9 +1,3 @@
-using CalibrationErrorsDistributions
-using PDMats
-
-using LinearAlgebra
-using Test
-
 @testset "bures.jl" begin
     function _sqbures(A, B)
         sqrt_A = sqrt(A)
diff --git a/test/distances/wasserstein.jl b/test/distances/wasserstein.jl
index 7c2f74f..6f3e691 100644
--- a/test/distances/wasserstein.jl
+++ b/test/distances/wasserstein.jl
@@ -1,16 +1,3 @@
-using CalibrationErrorsDistributions
-using Distances
-
-using Test
-
-using CalibrationErrorsDistributions:
-    sqwasserstein,
-    sqmixturewasserstein,
-    wasserstein,
-    mixturewasserstein,
-    Wasserstein,
-    SqWasserstein
-
 @testset "wasserstein.jl" begin
     @testset "SqWasserstein" begin
         μ1, μ2 = randn(2)
@@ -18,25 +5,26 @@ using CalibrationErrorsDistributions:
         normal1 = Normal(μ1, σ1)
         normal2 = Normal(μ2, σ2)
 
-        @test iszero(sqwasserstein(normal1, normal1))
-        @test iszero(sqwasserstein(normal2, normal2))
-        @test sqwasserstein(normal1, normal2) == (μ1 - μ2)^2 + (σ1 - σ2)^2
+        @test iszero(SqWasserstein()(normal1, normal1))
+        @test iszero(SqWasserstein()(normal2, normal2))
+        @test SqWasserstein()(normal1, normal2) == (μ1 - μ2)^2 + (σ1 - σ2)^2
 
         for (d1, d2) in Iterators.product((normal1, normal2), (normal1, normal2))
             mvnormal1 = MvNormal([mean(d1)], [std(d1)])
             mvnormal2 = MvNormal([mean(d2)], [std(d2)])
-            @test sqwasserstein(mvnormal1, mvnormal2) == sqwasserstein(d1, d2)
+            @test SqWasserstein()(mvnormal1, mvnormal2) == SqWasserstein()(d1, d2)
 
             mvnormal_fill1 = MvNormal(fill(mean(d1), 10), fill(std(d1), 10))
             mvnormal_fill2 = MvNormal(fill(mean(d2), 10), fill(std(d2), 10))
-            @test sqwasserstein(mvnormal_fill1, mvnormal_fill2) ≈ 10 * sqwasserstein(d1, d2)
+            @test SqWasserstein()(mvnormal_fill1, mvnormal_fill2) ≈
+                  10 * SqWasserstein()(d1, d2)
         end
 
         laplace1 = Laplace(μ1, σ1)
         laplace2 = Laplace(μ2, σ2)
-        @test iszero(sqwasserstein(laplace1, laplace1))
-        @test iszero(sqwasserstein(laplace2, laplace2))
-        @test sqwasserstein(laplace1, laplace2) == (μ1 - μ2)^2 + 2 * (σ1 - σ2)^2
+        @test iszero(SqWasserstein()(laplace1, laplace1))
+        @test iszero(SqWasserstein()(laplace2, laplace2))
+        @test SqWasserstein()(laplace1, laplace2) == (μ1 - μ2)^2 + 2 * (σ1 - σ2)^2
 
         # pairwise computations
         for (m, n) in ((1, 10), (10, 1), (10, 10))
@@ -44,7 +32,7 @@ using CalibrationErrorsDistributions:
             dists2 = [Normal(randn(), rand()) for _ in 1:n]
 
             # compute distance matrix
-            distmat = [sqwasserstein(x, y) for x in dists1, y in dists2]
+            distmat = [SqWasserstein()(x, y) for x in dists1, y in dists2]
 
             # out-of-place
             @test pairwise(SqWasserstein(), dists1, dists2) ≈ distmat
@@ -62,29 +50,29 @@ using CalibrationErrorsDistributions:
         normal1 = Normal(μ1, σ1)
         normal2 = Normal(μ2, σ2)
 
-        @test iszero(wasserstein(normal1, normal1))
-        @test iszero(wasserstein(normal2, normal2))
-        @test wasserstein(normal1, normal2) == sqrt(sqwasserstein(normal1, normal2))
+        @test iszero(Wasserstein()(normal1, normal1))
+        @test iszero(Wasserstein()(normal2, normal2))
+        @test Wasserstein()(normal1, normal2) == sqrt(SqWasserstein()(normal1, normal2))
 
         for (d1, d2) in Iterators.product((normal1, normal2), (normal1, normal2))
             mvnormal1 = MvNormal([mean(d1)], [std(d1)])
             mvnormal2 = MvNormal([mean(d2)], [std(d2)])
-            @test wasserstein(mvnormal1, mvnormal2) == wasserstein(d1, d2)
-            @test wasserstein(mvnormal1, mvnormal2) == sqrt(sqwasserstein(d1, d2))
+            @test Wasserstein()(mvnormal1, mvnormal2) == Wasserstein()(d1, d2)
+            @test Wasserstein()(mvnormal1, mvnormal2) == sqrt(SqWasserstein()(d1, d2))
 
             mvnormal_fill1 = MvNormal(fill(mean(d1), 10), fill(std(d1), 10))
             mvnormal_fill2 = MvNormal(fill(mean(d2), 10), fill(std(d2), 10))
-            @test wasserstein(mvnormal_fill1, mvnormal_fill2) ≈
-                  sqrt(10) * wasserstein(d1, d2)
-            @test wasserstein(mvnormal_fill1, mvnormal_fill2) ==
-                  sqrt(sqwasserstein(mvnormal_fill1, mvnormal_fill2))
+            @test Wasserstein()(mvnormal_fill1, mvnormal_fill2) ≈
+                  sqrt(10) * Wasserstein()(d1, d2)
+            @test Wasserstein()(mvnormal_fill1, mvnormal_fill2) ==
+                  sqrt(SqWasserstein()(mvnormal_fill1, mvnormal_fill2))
         end
 
         laplace1 = Laplace(μ1, σ1)
         laplace2 = Laplace(μ2, σ2)
-        @test iszero(wasserstein(laplace1, laplace1))
-        @test iszero(wasserstein(laplace2, laplace2))
-        @test wasserstein(laplace1, laplace2) == sqrt(sqwasserstein(laplace1, laplace2))
+        @test iszero(Wasserstein()(laplace1, laplace1))
+        @test iszero(Wasserstein()(laplace2, laplace2))
+        @test Wasserstein()(laplace1, laplace2) == sqrt(SqWasserstein()(laplace1, laplace2))
 
         # pairwise computations
         for (m, n) in ((1, 10), (10, 1), (10, 10))
@@ -92,7 +80,7 @@ using CalibrationErrorsDistributions:
             dists2 = [Normal(randn(), rand()) for _ in 1:n]
 
             # compute distance matrix
-            distmat = [wasserstein(x, y) for x in dists1, y in dists2]
+            distmat = [Wasserstein()(x, y) for x in dists1, y in dists2]
 
             # out-of-place
             @test pairwise(Wasserstein(), dists1, dists2) ≈ distmat
@@ -108,23 +96,24 @@ using CalibrationErrorsDistributions:
         for T in (Normal, Laplace)
             mixture1 = MixtureModel(T, [(randn(), rand())], [1.0])
             mixture2 = MixtureModel(T, [(randn(), rand())], [1.0])
-            @test sqmixturewasserstein(mixture1, mixture2) ≈
-                  sqwasserstein(first(components(mixture1)), first(components(mixture2)))
+            @test SqMixtureWasserstein()(mixture1, mixture2) ≈
+                  SqWasserstein()(first(components(mixture1)), first(components(mixture2)))
 
             mixture1 = MixtureModel(T, [(randn(), rand()), (randn(), rand())], [1.0, 0.0])
             mixture2 = MixtureModel(T, [(randn(), rand()), (randn(), rand())], [0.0, 1.0])
-            @test sqmixturewasserstein(mixture1, mixture2) ≈
-                  sqwasserstein(first(components(mixture1)), last(components(mixture2)))
+            @test SqMixtureWasserstein()(mixture1, mixture2) ≈
+                  SqWasserstein()(first(components(mixture1)), last(components(mixture2)))
 
             mixture1 = MixtureModel(T, fill((randn(), rand()), 10))
             mixture2 = MixtureModel(T, fill((randn(), rand()), 10))
-            @test sqmixturewasserstein(mixture1, mixture2) ≈
-                  sqwasserstein(first(components(mixture1)), first(components(mixture2)))
+            @test SqMixtureWasserstein()(mixture1, mixture2) ≈
+                  SqWasserstein()(first(components(mixture1)), first(components(mixture2))) rtol =
+                10 * sqrt(eps())
 
             mixture1 = MixtureModel(T, fill((randn(), rand()), 10))
             mixture2 = MixtureModel(T, [(randn(), rand())])
-            @test sqmixturewasserstein(mixture1, mixture2) ≈
-                  sqwasserstein(first(components(mixture1)), first(components(mixture2)))
+            @test SqMixtureWasserstein()(mixture1, mixture2) ≈
+                  SqWasserstein()(first(components(mixture1)), first(components(mixture2)))
         end
     end
 
@@ -132,31 +121,32 @@ using CalibrationErrorsDistributions:
         for T in (Normal, Laplace)
             mixture1 = MixtureModel(T, [(randn(), rand())], [1.0])
             mixture2 = MixtureModel(T, [(randn(), rand())], [1.0])
-            @test mixturewasserstein(mixture1, mixture2) ≈
-                  wasserstein(first(components(mixture1)), first(components(mixture2)))
-            @test mixturewasserstein(mixture1, mixture2) ≈
-                  sqrt(sqmixturewasserstein(mixture1, mixture2))
+            @test MixtureWasserstein()(mixture1, mixture2) ≈
+                  Wasserstein()(first(components(mixture1)), first(components(mixture2)))
+            @test MixtureWasserstein()(mixture1, mixture2) ≈
+                  sqrt(SqMixtureWasserstein()(mixture1, mixture2))
 
             mixture1 = MixtureModel(T, [(randn(), rand()), (randn(), rand())], [1.0, 0.0])
             mixture2 = MixtureModel(T, [(randn(), rand()), (randn(), rand())], [0.0, 1.0])
-            @test mixturewasserstein(mixture1, mixture2) ≈
-                  wasserstein(first(components(mixture1)), last(components(mixture2)))
-            @test mixturewasserstein(mixture1, mixture2) ≈
-                  sqrt(sqmixturewasserstein(mixture1, mixture2))
+            @test MixtureWasserstein()(mixture1, mixture2) ≈
+                  Wasserstein()(first(components(mixture1)), last(components(mixture2)))
+            @test MixtureWasserstein()(mixture1, mixture2) ≈
+                  sqrt(SqMixtureWasserstein()(mixture1, mixture2))
 
             mixture1 = MixtureModel(T, fill((randn(), rand()), 10))
             mixture2 = MixtureModel(T, fill((randn(), rand()), 10))
-            @test mixturewasserstein(mixture1, mixture2) ≈
-                  wasserstein(first(components(mixture1)), first(components(mixture2)))
-            @test mixturewasserstein(mixture1, mixture2) ≈
-                  sqrt(sqmixturewasserstein(mixture1, mixture2))
+            @test MixtureWasserstein()(mixture1, mixture2) ≈
+                  Wasserstein()(first(components(mixture1)), first(components(mixture2))) rtol =
+                10 * sqrt(eps())
+            @test MixtureWasserstein()(mixture1, mixture2) ≈
+                  sqrt(SqMixtureWasserstein()(mixture1, mixture2))
 
             mixture1 = MixtureModel(T, fill((randn(), rand()), 10))
             mixture2 = MixtureModel(T, [(randn(), rand())])
-            @test mixturewasserstein(mixture1, mixture2) ≈
-                  wasserstein(first(components(mixture1)), first(components(mixture2)))
-            @test mixturewasserstein(mixture1, mixture2) ≈
-                  sqrt(sqmixturewasserstein(mixture1, mixture2))
+            @test MixtureWasserstein()(mixture1, mixture2) ≈
+                  Wasserstein()(first(components(mixture1)), first(components(mixture2)))
+            @test MixtureWasserstein()(mixture1, mixture2) ≈
+                  sqrt(SqMixtureWasserstein()(mixture1, mixture2))
         end
     end
 end
diff --git a/test/kernels.jl b/test/kernels.jl
index e93278d..81dd3a7 100644
--- a/test/kernels.jl
+++ b/test/kernels.jl
@@ -1,9 +1,3 @@
-using CalibrationErrorsDistributions
-using CalibrationErrorsDistributions:
-    wasserstein, Wasserstein, mixturewasserstein, MixtureWasserstein
-
-using Test
-
 @testset "kernels.jl" begin
     @testset "WassersteinExponentialKernel" begin
         kernel = WassersteinExponentialKernel()
@@ -13,29 +7,26 @@ using Test
 
         # simple evaluation
         x, y = Normal(randn(), rand()), Normal(randn(), rand())
-        @test kernel(x, y) == exp(-wasserstein(x, y))
+        @test kernel(x, y) == exp(-Wasserstein()(x, y))
 
         # transformations
-        @test transform(kernel, 0.1)(x, y) == exp(-0.1 * wasserstein(x, y))
-        @test transform(kernel, ScaleTransform(0.1))(x, y) == exp(-0.1 * wasserstein(x, y))
+        @test (kernel ∘ ScaleTransform(0.1))(x, y) == exp(-0.1 * Wasserstein()(x, y))
     end
 
     @testset "MixtureWassersteinExponentialKernel" begin
         kernel = MixtureWassersteinExponentialKernel()
 
         # traits
-        @test KernelFunctions.metric(kernel) === MixtureWasserstein()
+        @test KernelFunctions.metric(kernel) isa MixtureWasserstein{<:Tulip.Optimizer}
 
         # simple evaluation
         x = MixtureModel(Normal, [(randn(), rand())])
         y = MixtureModel(Normal, [(randn(), rand())])
-        @test kernel(x, y) == exp(-mixturewasserstein(x, y))
+        @test kernel(x, y) == exp(-MixtureWasserstein()(x, y))
         @test kernel(x, y) ==
               WassersteinExponentialKernel()(first(components(x)), first(components(y)))
 
         # transformations
-        @test transform(kernel, 0.1)(x, y) == exp(-0.1 * mixturewasserstein(x, y))
-        @test transform(kernel, ScaleTransform(0.1))(x, y) ==
-              exp(-0.1 * mixturewasserstein(x, y))
+        @test (kernel ∘ ScaleTransform(0.1))(x, y) == exp(-0.1 * MixtureWasserstein()(x, y))
     end
 end
diff --git a/test/mvnormal.jl b/test/mvnormal.jl
index 4d00e07..97a80de 100644
--- a/test/mvnormal.jl
+++ b/test/mvnormal.jl
@@ -1,9 +1,3 @@
-using CalibrationErrorsDistributions
-using PDMats
-
-using LinearAlgebra
-using Test
-
 @testset "mvnormal.jl" begin
     @testset "consistency with Normal" begin
         nsamples = 1_000
@@ -33,29 +27,23 @@ using Test
 
         for kernel in (
             WassersteinExponentialKernel() ⊗ SqExponentialKernel(),
-            WassersteinExponentialKernel() ⊗ transform(SqExponentialKernel(), rand()),
-            WassersteinExponentialKernel() ⊗ transform(SqExponentialKernel(), [rand()]),
+            WassersteinExponentialKernel() ⊗
+            (SqExponentialKernel() ∘ ScaleTransform(rand())),
+            WassersteinExponentialKernel() ⊗
+            (SqExponentialKernel() ∘ ARDTransform([rand()])),
         )
             for estimator in
                 (BiasedSKCE(kernel), UnbiasedSKCE(kernel), BlockUnbiasedSKCE(kernel, 5))
-                skce_mvnormal = calibrationerror(
-                    estimator, predictions_mvnormal, targets_mvnormal
-                )
-                skce_normal = calibrationerror(
-                    estimator, predictions_normal, targets_normal
-                )
+                skce_mvnormal = estimator(predictions_mvnormal, targets_mvnormal)
+                skce_normal = estimator(predictions_normal, targets_normal)
                 @test skce_mvnormal ≈ skce_normal
             end
 
-            ucme_mvnormal = calibrationerror(
-                UCME(kernel, testpredictions_mvnormal, testtargets_mvnormal),
-                predictions_mvnormal,
-                targets_mvnormal,
+            ucme_mvnormal = UCME(kernel, testpredictions_mvnormal, testtargets_mvnormal)(
+                predictions_mvnormal, targets_mvnormal
             )
-            ucme_normal = calibrationerror(
-                UCME(kernel, testpredictions_normal, testtargets_normal),
-                predictions_normal,
-                targets_normal,
+            ucme_normal = UCME(kernel, testpredictions_normal, testtargets_normal)(
+                predictions_normal, targets_normal
             )
             @test ucme_mvnormal ≈ ucme_normal
         end
@@ -76,13 +64,14 @@ using Test
 
             for γ in (1.0, rand())
                 kernel1 =
-                    WassersteinExponentialKernel() ⊗ transform(SqExponentialKernel(), γ)
+                    WassersteinExponentialKernel() ⊗
+                    (SqExponentialKernel() ∘ ScaleTransform(γ))
                 kernel2 =
                     WassersteinExponentialKernel() ⊗
-                    transform(SqExponentialKernel(), fill(γ, dim))
+                    (SqExponentialKernel() ∘ ARDTransform(fill(γ, dim)))
                 kernel3 =
                     WassersteinExponentialKernel() ⊗
-                    transform(SqExponentialKernel(), LinearTransform(diagm(fill(γ, dim))))
+                    (SqExponentialKernel() ∘ LinearTransform(diagm(fill(γ, dim))))
 
                 # check evaluation of the first two observations
                 p1 = predictions[1]
@@ -105,18 +94,16 @@ using Test
 
                 # check estimates
                 for estimator in (UnbiasedSKCE, x -> UCME(x, testpredictions, testtargets))
-                    estimate1 = calibrationerror(estimator(kernel1), predictions, targets)
-                    estimate2 = calibrationerror(estimator(kernel2), predictions, targets)
-                    estimate3 = calibrationerror(estimator(kernel3), predictions, targets)
+                    estimate1 = estimator(kernel1)(predictions, targets)
+                    estimate2 = estimator(kernel2)(predictions, targets)
+                    estimate3 = estimator(kernel3)(predictions, targets)
                     @test estimate2 ≈ estimate1
                     @test estimate3 ≈ estimate1
                     if isone(γ)
-                        @test calibrationerror(
-                            estimator(
-                                WassersteinExponentialKernel() ⊗ SqExponentialKernel()
-                            ),
-                            predictions,
-                            targets,
+                        @test estimator(
+                            WassersteinExponentialKernel() ⊗ SqExponentialKernel()
+                        )(
+                            predictions, targets
                         ) ≈ estimate1
                     end
                 end
diff --git a/test/normal.jl b/test/normal.jl
index 8b2ac2c..800d9eb 100644
--- a/test/normal.jl
+++ b/test/normal.jl
@@ -1,7 +1,3 @@
-using CalibrationErrorsDistributions
-
-using Test
-
 @testset "normal.jl" begin
     @testset "SKCE: basic example" begin
         skce = UnbiasedSKCE(WassersteinExponentialKernel() ⊗ SqExponentialKernel())
@@ -9,12 +5,11 @@ using Test
         # only two predictions, i.e., one term in the estimator
         normal1 = Normal(0, 1)
         normal2 = Normal(1, 2)
-        @test @inferred(calibrationerror(skce, ([normal1, normal1], [0, 0]))) ≈
-              1 - sqrt(2) + 1 / sqrt(3)
-        @test @inferred(calibrationerror(skce, ([normal1, normal2], [1, 0]))) ≈
+        @test @inferred(skce([normal1, normal1], [0, 0])) ≈ 1 - sqrt(2) + 1 / sqrt(3)
+        @test @inferred(skce([normal1, normal2], [1, 0])) ≈
               exp(-sqrt(2)) *
               (exp(-1 / 2) - 1 / sqrt(2) - 1 / sqrt(5) + exp(-1 / 12) / sqrt(6))
-        @test @inferred(calibrationerror(skce, ([normal1, normal2], [0, 1]))) ≈
+        @test @inferred(skce([normal1, normal2], [0, 1])) ≈
               exp(-sqrt(2)) * (
             exp(-1 / 2) - exp(-1 / 4) / sqrt(2) - exp(-1 / 10) / sqrt(5) +
             exp(-1 / 12) / sqrt(6)
@@ -23,18 +18,18 @@ using Test
 
     @testset "SKCE: basic example (transformed)" begin
         skce = UnbiasedSKCE(
-            WassersteinExponentialKernel() ⊗ transform(SqExponentialKernel(), 0.5)
+            WassersteinExponentialKernel() ⊗ (SqExponentialKernel() ∘ ScaleTransform(0.5))
         )
 
         # only two predictions, i.e., one term in the estimator
         normal1 = Normal(0, 1)
         normal2 = Normal(1, 2)
-        @test @inferred(calibrationerror(skce, ([normal1, normal1], [0, 0]))) ≈
+        @test @inferred(skce([normal1, normal1], [0, 0])) ≈
               1 - 2 / sqrt(1.25) + 1 / sqrt(1.5)
-        @test @inferred(calibrationerror(skce, ([normal1, normal2], [1, 0]))) ≈
+        @test @inferred(skce([normal1, normal2], [1, 0])) ≈
               exp(-sqrt(2)) *
               (exp(-1 / 8) - 1 / sqrt(1.25) - 1 / sqrt(2) + exp(-1 / 18) / sqrt(2.25))
-        @test @inferred(calibrationerror(skce, ([normal1, normal2], [0, 1]))) ≈
+        @test @inferred(skce([normal1, normal2], [0, 1])) ≈
               exp(-sqrt(2)) * (
             exp(-1 / 8) - exp(-1 / 10) / sqrt(1.25) - exp(-1 / 16) / sqrt(2) +
             exp(-1 / 18) / sqrt(2.25)
@@ -44,11 +39,10 @@ using Test
     @testset "SKCE: basic properties" begin
         skce = UnbiasedSKCE(WassersteinExponentialKernel() ⊗ SqExponentialKernel())
 
-        estimates = Vector{Float64}(undef, 10_000)
-        for i in 1:length(estimates)
+        estimates = map(1:10_000) do _
             predictions = map(Normal, randn(20), rand(20))
             targets = map(rand, predictions)
-            estimates[i] = calibrationerror(skce, predictions, targets)
+            return skce(predictions, targets)
         end
 
         @test any(x -> x > zero(x), estimates)
@@ -65,7 +59,7 @@ using Test
         # two predictions
         normal1 = Normal(0, 1)
         normal2 = Normal(1, 2)
-        @test @inferred(calibrationerror(ucme, ([normal1, normal2], [0, 0.5]))) ≈
+        @test @inferred(ucme([normal1, normal2], [0, 0.5])) ≈
               (
             exp(-1 / sqrt(2)) * (exp(-1 / 2) - exp(-1 / 4) / sqrt(2)) +
             exp(-sqrt(5 / 2)) * (exp(-1 / 8) - 1 / sqrt(5))
@@ -77,7 +71,7 @@ using Test
             [Normal(0.5, 0.5), Normal(-1, 1.5)],
             [1, -0.5],
         )
-        @test @inferred(calibrationerror(ucme, ([normal1, normal2], [0, 0.5]))) ≈
+        @test @inferred(ucme([normal1, normal2], [0, 0.5])) ≈
               (
             (
                 exp(-1 / sqrt(2)) * (exp(-1 / 2) - exp(-1 / 4) / sqrt(2)) +
@@ -103,8 +97,10 @@ using Test
         testtargets = randn(ntestsamples)
 
         for γ in (1.0, rand())
-            kernel1 = WassersteinExponentialKernel() ⊗ transform(SqExponentialKernel(), γ)
-            kernel2 = WassersteinExponentialKernel() ⊗ transform(SqExponentialKernel(), [γ])
+            kernel1 =
+                WassersteinExponentialKernel() ⊗ (SqExponentialKernel() ∘ ScaleTransform(γ))
+            kernel2 =
+                WassersteinExponentialKernel() ⊗ (SqExponentialKernel() ∘ ARDTransform([γ]))
 
             # check evaluation of the first two observations
             p1 = predictions[1]
@@ -125,14 +121,12 @@ using Test
 
             # check estimates
             for estimator in (UnbiasedSKCE, x -> UCME(x, testpredictions, testtargets))
-                estimate1 = calibrationerror(estimator(kernel1), predictions, targets)
-                estimate2 = calibrationerror(estimator(kernel2), predictions, targets)
+                estimate1 = estimator(kernel1)(predictions, targets)
+                estimate2 = estimator(kernel2)(predictions, targets)
                 @test estimate2 ≈ estimate1
                 if isone(γ)
-                    @test calibrationerror(
-                        estimator(WassersteinExponentialKernel() ⊗ SqExponentialKernel()),
-                        predictions,
-                        targets,
+                    @test estimator(WassersteinExponentialKernel() ⊗ SqExponentialKernel())(
+                        predictions, targets
                     ) ≈ estimate1
                 end
             end
diff --git a/test/runtests.jl b/test/runtests.jl
index 0517a69..dd76739 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,20 +1,34 @@
-using SafeTestsets
-
+using CalibrationErrorsDistributions
+using Distances
+using LinearAlgebra
+using PDMats
 using Random
+using Test
+
+using CalibrationErrorsDistributions:
+    Wasserstein, SqWasserstein, MixtureWasserstein, SqMixtureWasserstein
+using Tulip: Tulip
+
 Random.seed!(1234)
 
-@safetestset "Bures metric" begin
-    include("distances/bures.jl")
-end
-@safetestset "Wasserstein distance" begin
-    include("distances/wasserstein.jl")
-end
-@safetestset "Kernels" begin
-    include("kernels.jl")
-end
-@safetestset "Normal" begin
-    include("normal.jl")
-end
-@safetestset "MvNormal" begin
-    include("mvnormal.jl")
+@testset "CalibrationErrorsDistributions" begin
+    @testset "distances" begin
+        @testset "Bures metric" begin
+            include("distances/bures.jl")
+        end
+        @testset "Wasserstein distance" begin
+            include("distances/wasserstein.jl")
+        end
+    end
+
+    @testset "Kernels" begin
+        include("kernels.jl")
+    end
+
+    @testset "Normal" begin
+        include("normal.jl")
+    end
+    @testset "MvNormal" begin
+        include("mvnormal.jl")
+    end
 end