Skip to content

Remove string allocations in VersionConverter in System.Text.Json #55179

Closed
@N0D4N

Description

@N0D4N

Description

Current implementation of System.Text.Json.Serialization.Converters.VersionConverter allocates string on reading and on writing. However, we can remove this allocations and don't allocate inside VersionConverter at all.
In case of writing we cat take advantage of Version.TryFormat that accepts Span<char>, and since we know format/max string length of Version object instance, we can stackalloc char[] such span.
In case of reading we can separate raw bytes by '.', parse everything between them as Int32, and pass those int's to Version constructor.
So to prove this i created draft, custom, no-allocating implementation of VersionConverter, that works faster and reduces allocated memory. All code of benchmark, benchmark results and tests that confirm that current and custom implementation of VersionConverter give the same output are available in this repo

Configuration

See below in Benchmark results section

Benchmark comparing current and not-allocating implementation of VersionConverter

Click to see code
using System;
using System.Buffers;
using System.Buffers.Text;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Text.Json;
using System.Text.Json.Serialization;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Engines;
using BenchmarkDotNet.Running;

namespace FasterVersionConverter
{
	public class Program
	{
		static void Main(string[] args) => BenchmarkRunner.Run<Benchmark>();
	}

	[MemoryDiagnoser]
	public class Benchmark
	{
		[Params(TestCases.UnparsedTestCase1, TestCases.UnparsedTestCase2, TestCases.UnparsedTestCase3, TestCases.UnparsedTestCase4,
			TestCases.UnparsedTestCase5, TestCases.UnparsedTestCase6)]
		public string unparsedVersion;

		private static readonly JsonSerializerOptions optionsWithProposedConverter = new JsonSerializerOptions()
		{
			Converters =
			{
				new ProposedJsonVersionConverter()
			}
		};

		private static readonly Consumer _consumer = new Consumer();

		private Version _version;

		private VersionWrapper _versionWrapper;

		private string _json;

		[GlobalSetup]
		public void Setup()
		{
			this._version = Version.Parse(this.unparsedVersion);
			this._versionWrapper = new VersionWrapper()
			{
				Version = this._version
			};
			this._json = JsonSerializer.Serialize(this._versionWrapper);
		}

		[Benchmark]
		[BenchmarkCategory("Serialize", "Current")]
		public void CurrentConverterSerialize() => _consumer.Consume(JsonSerializer.Serialize(this._versionWrapper));

		[Benchmark]
		[BenchmarkCategory("Serialize", "Proposed")]
		public void ProposedConverterSerialize() => _consumer.Consume(JsonSerializer.Serialize(this._versionWrapper, optionsWithProposedConverter));

		[Benchmark]
		[BenchmarkCategory("Deserialize", "Current")]
		public void CurrentConverterDeserialize() => _consumer.Consume(JsonSerializer.Deserialize<VersionWrapper>(this._json));

		[Benchmark]
		[BenchmarkCategory("Deserialize", "Proposed")]
		public void ProposedConverterDeserialize() =>
			_consumer.Consume(JsonSerializer.Deserialize<VersionWrapper>(this._json, optionsWithProposedConverter));
	}

	public sealed class ProposedJsonVersionConverter : JsonConverter<Version>
	{
		private const int MaxStringRepresentationOfPositiveInt32 = 10; // int.MaxValue.ToString().Length

		private const int
			MaxStringLengthOfVersion = (MaxStringRepresentationOfPositiveInt32 * VersionComponentsCount) + 1 + 1 + 1; // 43, 1 is length of '.'

		private const int VersionComponentsCount = 4; // Major, Minor, Build, Revision

		public override Version Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
		{
			ReadOnlySpan<byte> rawVersion = reader.HasValueSequence ? reader.ValueSequence.ToArray() : reader.ValueSpan;
			Span<int?> versionComponents = stackalloc int?[VersionComponentsCount] {null, null, null, null}; // 86 bytes
			int indexOfDot = GetIndexOfDot(rawVersion);
			// we won't need to calculate IndexOf of backslash since Utf8JsonReader has internal field indicating if value has backslash
			if (rawVersion.IndexOf((byte) '\\') != -1 || indexOfDot == -1)
			{
				ThrowHelper.ThrowJsonException();
			}

			for (int i = 0; i < VersionComponentsCount; i++)
			{
				bool lastComponent = indexOfDot == -1;
				var readOnlySpan = lastComponent ? rawVersion : rawVersion.Slice(0, indexOfDot);
				if (TryGetVersionComponent(readOnlySpan, out int value))
				{
					versionComponents[i] = value;
					rawVersion = rawVersion.Slice(indexOfDot + 1);
					indexOfDot = GetIndexOfDot(rawVersion);
					if (lastComponent)
						break;
				}
				else
				{
					ThrowHelper.ThrowJsonException();
				}
			}

			var major = versionComponents[0];
			var minor = versionComponents[1];
			var build = versionComponents[2];
			var revision = versionComponents[3];
			if (major.HasValue && minor.HasValue && build.HasValue && revision.HasValue)
			{
				return new Version(major.Value, minor.Value, build.Value, revision.Value);
			}
			else if (major.HasValue && minor.HasValue && build.HasValue)
			{
				return new Version(major.Value, minor.Value, build.Value);
			}
			else if (major.HasValue && minor.HasValue)
			{
				return new Version(major.Value, minor.Value);
			}

			ThrowHelper.ThrowJsonException();
			return null;
		}

		[MethodImpl(MethodImplOptions.AggressiveInlining)]
		private static bool TryGetVersionComponent(ReadOnlySpan<byte> source, out int value) => Utf8Parser.TryParse(source, out value, out _);

		public static int GetIndexOfDot(ReadOnlySpan<byte> source) => source.IndexOf((byte) '.');

		public override void Write(Utf8JsonWriter writer, Version value, JsonSerializerOptions options)
		{
			/*
			 stackalloc of 43 chars will alloc 86 bytes since sizeof(char) == 2
			 
			maybe we can calculate length based on version value, like checking if
			optional Build and Revision property are present but i'm not sure,
			what will be better, to substract integers or
			stackalloc buffer that will be bigger than needed
			*/
			Span<char> span = stackalloc char[MaxStringLengthOfVersion];
			value.TryFormat(span, out int charsWritten);
			writer.WriteStringValue(span.Slice(0, charsWritten));
		}

		public static class ThrowHelper
		{
			[DoesNotReturn]
			public static void ThrowJsonException() => throw new JsonException();
		}
	}

	public class VersionWrapper
	{
		public Version Version { get; init; }
	}

	public static class TestCases
	{
		public const string UnparsedTestCase1 = "1.0";
		public const string UnparsedTestCase2 = UnparsedTestCase1 + ".0";
		public const string UnparsedTestCase3 = UnparsedTestCase2 + ".0";
		public const string UnparsedTestCase4 = "2147483647.2147483647";
		public const string UnparsedTestCase5 = UnparsedTestCase4 + ".2147483647";
		public const string UnparsedTestCase6 = UnparsedTestCase5 + ".2147483647";
	}
}

Benchmark results

Click to see benchmark results

Note

OS=neon 20.04 means KDE Neon which is based on Ubuntu 20.04 LTS

BenchmarkDotNet=v0.13.0, OS=neon 20.04
Intel Core i3-7130U CPU 2.70GHz (Kaby Lake), 1 CPU, 4 logical and 2 physical cores
.NET SDK=6.0.100-preview.5.21302.13
  [Host]     : .NET 6.0.0 (6.0.21.30105), X64 RyuJIT
  DefaultJob : .NET 6.0.0 (6.0.21.30105), X64 RyuJIT

Method unparsedVersion Mean Error StdDev Gen 0 Gen 1 Gen 2 Allocated
CurrentConverterSerialize 1.0 441.3 ns 0.22 ns 0.20 ns 0.1526 - - 240 B
ProposedConverterSerialize 1.0 430.0 ns 0.44 ns 0.41 ns 0.1326 - - 208 B
CurrentConverterDeserialize 1.0 487.3 ns 0.13 ns 0.11 ns 0.0553 - - 88 B
ProposedConverterDeserialize 1.0 434.7 ns 0.27 ns 0.24 ns 0.0353 - - 56 B
CurrentConverterSerialize 1.0.0 450.4 ns 0.36 ns 0.32 ns 0.1578 - - 248 B
ProposedConverterSerialize 1.0.0 458.7 ns 0.21 ns 0.18 ns 0.1373 - - 216 B
CurrentConverterDeserialize 1.0.0 520.6 ns 1.54 ns 1.20 ns 0.0553 - - 88 B
ProposedConverterDeserialize 1.0.0 465.3 ns 0.17 ns 0.14 ns 0.0353 - - 56 B
CurrentConverterSerialize 1.0.0.0 467.4 ns 0.51 ns 0.48 ns 0.1631 - - 256 B
ProposedConverterSerialize 1.0.0.0 460.8 ns 0.30 ns 0.27 ns 0.1373 - - 216 B
CurrentConverterDeserialize 1.0.0.0 557.7 ns 0.33 ns 0.31 ns 0.0610 - - 96 B
ProposedConverterDeserialize 1.0.0.0 470.9 ns 0.09 ns 0.08 ns 0.0353 - - 56 B
CurrentConverterSerialize 21474(...)83647 [21] 499.3 ns 0.58 ns 0.54 ns 0.1984 - - 312 B
ProposedConverterSerialize 21474(...)83647 [21] 491.3 ns 0.49 ns 0.41 ns 0.1574 - - 248 B
CurrentConverterDeserialize 21474(...)83647 [21] 566.1 ns 0.55 ns 0.49 ns 0.0763 - - 120 B
ProposedConverterDeserialize 21474(...)83647 [21] 522.3 ns 0.21 ns 0.19 ns 0.0353 - - 56 B
CurrentConverterSerialize 21474(...)83647 [32] 532.0 ns 0.91 ns 0.81 ns 0.2289 - - 360 B
ProposedConverterSerialize 21474(...)83647 [32] 507.1 ns 0.40 ns 0.36 ns 0.1726 - - 272 B
CurrentConverterDeserialize 21474(...)83647 [32] 627.8 ns 0.30 ns 0.28 ns 0.0916 - - 144 B
ProposedConverterDeserialize 21474(...)83647 [32] 534.8 ns 0.29 ns 0.28 ns 0.0353 - - 56 B
CurrentConverterSerialize 21474(...)83647 [43] 583.9 ns 0.40 ns 0.38 ns 0.2546 - - 400 B
ProposedConverterSerialize 21474(...)83647 [43] 558.4 ns 0.46 ns 0.41 ns 0.1831 - - 288 B
CurrentConverterDeserialize 21474(...)83647 [43] 692.8 ns 0.49 ns 0.44 ns 0.1068 - - 168 B
ProposedConverterDeserialize 21474(...)83647 [43] 584.8 ns 0.18 ns 0.14 ns 0.0353 - - 56 B

Analysis

Since memory allocated on deserialization is constant i assume it's cost of simply calling JsonSerializer.Deserialize and allocating Version and VersionWrapper objects.
However, memory allocated on serialization isn't constant, even if it's reduced by a bit, which is caused by needing to allocate string to handle result serialization.

Note

In case if this will be approved i would like to create PR to implement this.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions