Read model type from R (attempt number 2) [PLT-1174] (#5857)

* Read the CPU costing function types from R instead of hard-coding them * Read the CPU costing function types from R instead of hard-coding them * Tidying up * Tidying up * Remove some unnecessary error cases * Fix handling of nested costing functions * Delete trailing whitespace in R code * Comment * More experiments * Tidying up * Put the models back to make cost-model-test work * Remove use of duplicate field selectors * Move memory models into their own file * Update cost model tests * Update cost model tests * Restore cost model test * LinearCost -> LinearInX * Update conformance budget results * Tidy up cost modelling types * Rationalise costing function types a bit * Infer some constants from the benchmarking data * Infer some constants from the benchmarking data * Infer some constants from the benchmarking data * Restore original numbers in builtinCostModel.json * Restore original numbers in builtinCostModel.json * Comments on memory costing functions * Add changelog entry * Tidy up * Revert costing change pending configurability
IntersectMBO · Apr 11, 2024 · 47e3d15 · 47e3d15
1 parent d95daf3
commit 47e3d15
Show file tree

Hide file tree

Showing 22 changed files with 917 additions and 1,376 deletions.
diff --git a/...-core/changelog.d/20240326_184114_kenneth.mackenzie_read_model_type_from_R_2.md b/...-core/changelog.d/20240326_184114_kenneth.mackenzie_read_model_type_from_R_2.md
@@ -0,0 +1,5 @@
+### Changed
+
+- Improvements to costing infrastructure.
+
+
diff --git a/plutus-core/cost-model/budgeting-bench/Benchmarks/Strings.hs b/plutus-core/cost-model/budgeting-bench/Benchmarks/Strings.hs
@@ -150,25 +150,16 @@ benchTwoTextStrings name =
 
 -- Benchmark times for a function applied to equal arguments.  This is used for
 -- benchmarking EqualsString on the diagonal.
--- Copy the bytestring here, because otherwise it'll be exactly the same, and
+-- Copy the bytestring here, because otherwise it'll be exactly the same and
 -- the equality will short-circuit.
 benchSameTwoTextStrings :: DefaultFun -> Benchmark
 benchSameTwoTextStrings name =
     createTwoTermBuiltinBenchElementwise name [] inputs (fmap T.copy inputs)
     where inputs = makeSizedTextStrings seedA oneArgumentSizes
 
--- Benchmark times for a function applied to equal arguments.  This is used for
--- finding the average time of comparing strings of different length (which is quick).
-benchDifferentTextStringsElementwise :: DefaultFun -> Benchmark
-benchDifferentTextStringsElementwise name =
-    createTwoTermBuiltinBenchElementwise name [] inputs1 inputs2
-    where inputs1 = makeSizedTextStrings seedA oneArgumentSizes
-          inputs2 = makeSizedTextStrings seedB oneArgumentSizes
-
 makeBenchmarks :: StdGen -> [Benchmark]
 makeBenchmarks _gen = [ benchOneTextString EncodeUtf8
                       , benchOneUtf8ByteString DecodeUtf8
                       , benchTwoTextStrings AppendString
-                      , benchDifferentTextStringsElementwise EqualsString
                       , benchSameTwoTextStrings EqualsString
                       ]
diff --git a/plutus-core/cost-model/create-cost-model/BuiltinMemoryModels.hs b/plutus-core/cost-model/create-cost-model/BuiltinMemoryModels.hs
@@ -0,0 +1,153 @@
+-- editorconfig-checker-disable-file
+{-# LANGUAGE OverloadedStrings #-}
+
+-- | The memory models for the default set of builtins.  These are copied into
+-- builtinCostModel.json by generate-cost-model.
+
+module BuiltinMemoryModels (builtinMemoryModels, Id(..))
+where
+
+import PlutusCore.Crypto.BLS12_381.G1 qualified as G1
+import PlutusCore.Crypto.BLS12_381.G2 qualified as G2
+import PlutusCore.Crypto.BLS12_381.Pairing qualified as Pairing
+import PlutusCore.Crypto.Hash qualified as Hash
+import PlutusCore.Evaluation.Machine.BuiltinCostModel
+import PlutusCore.Evaluation.Machine.CostStream
+import PlutusCore.Evaluation.Machine.ExMemory
+import PlutusCore.Evaluation.Machine.ExMemoryUsage
+
+import Data.ByteString (ByteString)
+import Data.Coerce (coerce)
+
+-- Some utilities for calculating memory sizes.
+
+boolMemModel :: ModelTwoArguments
+boolMemModel = ModelTwoArgumentsConstantCost 1
+
+memoryUsageAsCostingInteger :: ExMemoryUsage a => a -> CostingInteger
+memoryUsageAsCostingInteger = coerce . sumCostStream . flattenCostRose . memoryUsage
+
+hashMemModel :: (ByteString -> ByteString) -> ModelOneArgument
+hashMemModel f = ModelOneArgumentConstantCost $ memoryUsageAsCostingInteger $ f ""
+
+toMemSize :: Int -> CostingInteger
+toMemSize n = fromIntegral $ n `div` 8
+
+-- Group order is 255 bits -> 32 bytes (4 words).
+-- Field size is 381 bits  -> 48 bytes (6 words)
+-- (with three spare bits used for encoding purposes).
+
+-- Sizes below from sizePoint, compressedSizePoint, and sizePT in
+-- Crypto.EllipticCurve.BLS12_381.Internal
+
+-- In-memory G1 points take up 144 bytes (18 words).
+-- These are projective points, so we have *three* 48-byte coordinates.
+g1MemSize :: CostingInteger
+g1MemSize = toMemSize G1.memSizeBytes
+
+-- Compressed G1 points take up 48 bytes (6 words)
+g1CompressedSize :: CostingInteger
+g1CompressedSize = toMemSize G1.compressedSizeBytes
+
+-- In-memory G2 points take up 288 bytes (36 words)
+g2MemSize :: CostingInteger
+g2MemSize = toMemSize G2.memSizeBytes
+
+-- Compressed G2 points take up 96 bytes (12 words)
+g2CompressedSize :: CostingInteger
+g2CompressedSize = toMemSize G2.compressedSizeBytes
+
+-- In-memory G2 points take up 576 bytes (72 words)
+mlResultMemSize :: CostingInteger
+mlResultMemSize = toMemSize Pairing.mlResultMemSizeBytes
+
+-- The memory models for the default builtins
+
+newtype Id a = Id { getId :: a }
+
+builtinMemoryModels :: BuiltinCostModelBase Id
+builtinMemoryModels = BuiltinCostModelBase
+  { paramAddInteger                      = Id $ ModelTwoArgumentsMaxSize $ OneVariableLinearFunction 1 1
+  , paramSubtractInteger                 = Id $ ModelTwoArgumentsMaxSize $ OneVariableLinearFunction 1 1
+  , paramMultiplyInteger                 = Id $ ModelTwoArgumentsAddedSizes $ OneVariableLinearFunction 0 1
+  , paramDivideInteger                   = Id $ ModelTwoArgumentsSubtractedSizes $ ModelSubtractedSizes 0 1 1
+  , paramQuotientInteger                 = Id $ ModelTwoArgumentsSubtractedSizes $ ModelSubtractedSizes 0 1 1
+  , paramRemainderInteger                = Id $ ModelTwoArgumentsSubtractedSizes $ ModelSubtractedSizes 0 1 1
+  , paramModInteger                      = Id $ ModelTwoArgumentsSubtractedSizes $ ModelSubtractedSizes 0 1 1
+  , paramEqualsInteger                   = Id $ boolMemModel
+  , paramLessThanInteger                 = Id $ boolMemModel
+  , paramLessThanEqualsInteger           = Id $ boolMemModel
+  , paramAppendByteString                = Id $ ModelTwoArgumentsAddedSizes $ OneVariableLinearFunction 0 1
+  , paramConsByteString                  = Id $ ModelTwoArgumentsAddedSizes $ OneVariableLinearFunction 0 1
+    -- sliceByteString doesn't actually allocate a new bytestring: it creates an
+    -- object containing a pointer into the original, together with a length.
+  , paramSliceByteString                 = Id $ ModelThreeArgumentsLinearInZ $ OneVariableLinearFunction 4 0
+  , paramLengthOfByteString              = Id $ ModelOneArgumentConstantCost 10
+  , paramIndexByteString                 = Id $ ModelTwoArgumentsConstantCost 4
+  , paramEqualsByteString                = Id $ boolMemModel
+  , paramLessThanByteString              = Id $ boolMemModel
+  , paramLessThanEqualsByteString        = Id $ boolMemModel
+  , paramSha2_256                        = Id $ hashMemModel Hash.sha2_256
+  , paramSha3_256                        = Id $ hashMemModel Hash.sha3_256
+  , paramBlake2b_256                     = Id $ hashMemModel Hash.blake2b_256
+  , paramVerifyEd25519Signature          = Id $ ModelThreeArgumentsConstantCost 10
+  , paramVerifyEcdsaSecp256k1Signature   = Id $ ModelThreeArgumentsConstantCost 10
+  , paramVerifySchnorrSecp256k1Signature = Id $ ModelThreeArgumentsConstantCost 10
+  , paramAppendString                    = Id $ ModelTwoArgumentsAddedSizes $ OneVariableLinearFunction 4 1
+  , paramEqualsString                    = Id $ boolMemModel
+  -- In the worst case two UTF-16 bytes encode to three UTF-8 bytes, so two
+  -- output words per input word should cover that.
+  , paramEncodeUtf8                      = Id $ ModelOneArgumentLinearInX $ OneVariableLinearFunction 4 2
+  -- In the worst case one UTF-8 byte decodes to two UTF-16 bytes
+  , paramDecodeUtf8                      = Id $ ModelOneArgumentLinearInX $ OneVariableLinearFunction 4 2
+  , paramIfThenElse                      = Id $ ModelThreeArgumentsConstantCost  1
+  , paramChooseUnit                      = Id $ ModelTwoArgumentsConstantCost    4
+  , paramTrace                           = Id $ ModelTwoArgumentsConstantCost   32
+  , paramFstPair                         = Id $ ModelOneArgumentConstantCost    32
+  , paramSndPair                         = Id $ ModelOneArgumentConstantCost    32
+  , paramChooseList                      = Id $ ModelThreeArgumentsConstantCost 32
+  , paramMkCons                          = Id $ ModelTwoArgumentsConstantCost   32
+  , paramHeadList                        = Id $ ModelOneArgumentConstantCost    32
+  , paramTailList                        = Id $ ModelOneArgumentConstantCost    32
+  , paramNullList                        = Id $ ModelOneArgumentConstantCost    32
+  , paramChooseData                      = Id $ ModelSixArgumentsConstantCost   32
+  , paramConstrData                      = Id $ ModelTwoArgumentsConstantCost   32
+  , paramMapData                         = Id $ ModelOneArgumentConstantCost    32
+  , paramListData                        = Id $ ModelOneArgumentConstantCost    32
+  , paramIData                           = Id $ ModelOneArgumentConstantCost    32
+  , paramBData                           = Id $ ModelOneArgumentConstantCost    32
+  , paramUnConstrData                    = Id $ ModelOneArgumentConstantCost    32
+  , paramUnMapData                       = Id $ ModelOneArgumentConstantCost    32
+  , paramUnListData                      = Id $ ModelOneArgumentConstantCost    32
+  , paramUnIData                         = Id $ ModelOneArgumentConstantCost    32
+  , paramUnBData                         = Id $ ModelOneArgumentConstantCost    32
+  , paramEqualsData                      = Id $ ModelTwoArgumentsConstantCost    1
+  , paramMkPairData                      = Id $ ModelTwoArgumentsConstantCost   32
+  , paramMkNilData                       = Id $ ModelOneArgumentConstantCost    32
+  , paramMkNilPairData                   = Id $ ModelOneArgumentConstantCost    32
+  , paramSerialiseData                   = Id $ ModelOneArgumentLinearInX $ OneVariableLinearFunction 0 2
+  , paramBls12_381_G1_add                = Id $ ModelTwoArgumentsConstantCost g1MemSize
+  , paramBls12_381_G1_neg                = Id $ ModelOneArgumentConstantCost  g1MemSize
+  , paramBls12_381_G1_scalarMul          = Id $ ModelTwoArgumentsConstantCost g1MemSize
+  , paramBls12_381_G1_equal              = Id $ boolMemModel
+  , paramBls12_381_G1_compress           = Id $ ModelOneArgumentConstantCost  g1CompressedSize
+  , paramBls12_381_G1_uncompress         = Id $ ModelOneArgumentConstantCost  g1MemSize
+  , paramBls12_381_G1_hashToGroup        = Id $ ModelTwoArgumentsConstantCost g1MemSize
+  , paramBls12_381_G2_add                = Id $ ModelTwoArgumentsConstantCost g2MemSize
+  , paramBls12_381_G2_neg                = Id $ ModelOneArgumentConstantCost  g2MemSize
+  , paramBls12_381_G2_scalarMul          = Id $ ModelTwoArgumentsConstantCost g2MemSize
+  , paramBls12_381_G2_equal              = Id $ boolMemModel
+  , paramBls12_381_G2_compress           = Id $ ModelOneArgumentConstantCost  g2CompressedSize
+  , paramBls12_381_G2_uncompress         = Id $ ModelOneArgumentConstantCost  g2MemSize
+  , paramBls12_381_G2_hashToGroup        = Id $ ModelTwoArgumentsConstantCost g2MemSize
+  , paramBls12_381_millerLoop            = Id $ ModelTwoArgumentsConstantCost mlResultMemSize
+  , paramBls12_381_mulMlResult           = Id $ ModelTwoArgumentsConstantCost mlResultMemSize
+  , paramBls12_381_finalVerify           = Id $ boolMemModel
+  , paramBlake2b_224                     = Id $ hashMemModel Hash.blake2b_224
+  , paramKeccak_256                      = Id $ hashMemModel Hash.keccak_256
+  -- integerToByteString e w n allocates a bytestring of length w if w is
+  -- nonzero and a bytestring just big enough to contain n otherwise, so we need
+  -- a special memory costing function to handle that.
+  , paramIntegerToByteString             = Id $ ModelThreeArgumentsLiteralInYOrLinearInZ $ OneVariableLinearFunction 0 1
+  , paramByteStringToInteger             = Id $ ModelTwoArgumentsLinearInY $ OneVariableLinearFunction 0 1
+  }