Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions YamlDotNet.Test/Core/ParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,28 @@ public void VerifyTokenWithMultiDocTag()
StreamEnd);
}

[Fact]
public void VerifyTokenWithUrlEncodedTagContainingPlusSpaces()
{
AssertSequenceOfEventsFrom(Yaml.ParserForText("!(%20%20%20hello+you%20+) value"),
StreamStart,
DocumentStart(Implicit),
PlainScalar("value").T("!( hello you )"),
DocumentEnd(Implicit),
StreamEnd);
}

[Fact]
public void VerifyTokenWithUrlEncoded32BitsUnicodeTags()
{
AssertSequenceOfEventsFrom(Yaml.ParserForText("!hel%F4%8F%BF%BFlo%E2%99%A5+A%20 value"),
StreamStart,
DocumentStart(Implicit),
PlainScalar("value").T("!hel􏿿lo♥ A "),
DocumentEnd(Implicit),
StreamEnd);
}

[Theory]
[InlineData("|\n b-carriage-return,b-line-feed\r\n lll", "b-carriage-return,b-line-feed\nlll")]
[InlineData("|\n b-carriage-return\r lll", "b-carriage-return\nlll")]
Expand Down
6 changes: 6 additions & 0 deletions YamlDotNet.Test/RepresentationModel/YamlStreamTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@ public void FailBackreference()
RoundtripTest("fail-backreference.yaml");
}

[Fact]
public void Roundtrip32BitsUnicodeEscape()
{
RoundtripTest("unicode-32bits-escape.yaml");
}

[Fact]
public void AllAliasesMustBeResolved()
{
Expand Down
1 change: 1 addition & 0 deletions YamlDotNet.Test/YamlDotNet.Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@
<EmbeddedResource Include="files\guid.yaml" />
<EmbeddedResource Include="files\ordered-properties.yaml" />
<EmbeddedResource Include="files\multi-doc-tag.yaml" />
<EmbeddedResource Include="files\unicode-32bits-escape.yaml" />
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
Expand Down
2 changes: 2 additions & 0 deletions YamlDotNet.Test/files/unicode-32bits-escape.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- hel􏿿lo♥
- "hel\U0010fffflo\u2665"
25 changes: 24 additions & 1 deletion YamlDotNet/Core/Emitter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1160,12 +1160,25 @@ private void WriteDoubleQuotedScalar(string value, bool allowBreaks)
break;

default:
var code = (short)character;
var code = (ushort)character;
if (code <= 0xFF)
{
Write('x');
Write(code.ToString("X02", CultureInfo.InvariantCulture));
}
else if (IsHighSurrogate(character))
{
if (index + 1 < value.Length && IsLowSurrogate(value[index + 1]))
{
Write('U');
Write(char.ConvertToUtf32(character, value[index + 1]).ToString("X08", CultureInfo.InvariantCulture));
index++;
}
else
{
throw new SyntaxErrorException("While writing a quoted scalar, found an orphaned high surrogate.");
}
}
else
{
Write('u');
Expand Down Expand Up @@ -1342,6 +1355,16 @@ private static bool IsPrintable(char character)
(character >= '\xE000' && character <= '\xFFFD');
}

private static bool IsHighSurrogate(char c)
{
return 0xD800 <= c && c <= 0xDBFF;
}

private static bool IsLowSurrogate(char c)
{
return 0xDC00 <= c && c <= 0xDFFF;
}

#endregion

/// <summary>
Expand Down
26 changes: 17 additions & 9 deletions YamlDotNet/Core/Scanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1680,7 +1680,7 @@ private Token ScanFlowScalar(bool isSingleQuoted)

if (codeLength > 0)
{
uint character = 0;
int character = 0;

// Scan the character value.

Expand All @@ -1690,7 +1690,7 @@ private Token ScanFlowScalar(bool isSingleQuoted)
{
throw new SyntaxErrorException(start, cursor.Mark(), "While parsing a quoted scalar, did not find expected hexdecimal number.");
}
character = (uint)((character << 4) + analyzer.AsHex(k));
character = ((character << 4) + analyzer.AsHex(k));
}

// Check the value and write the character.
Expand All @@ -1700,7 +1700,7 @@ private Token ScanFlowScalar(bool isSingleQuoted)
throw new SyntaxErrorException(start, cursor.Mark(), "While parsing a quoted scalar, find invalid Unicode character escape code.");
}

value.Append((char)character);
value.Append(char.ConvertFromUtf32(character));

// Advance the pointer.

Expand Down Expand Up @@ -2145,6 +2145,11 @@ private string ScanTagUri(string head, Mark start)
{
tag.Append(ScanUriEscapes(start));
}
else if (analyzer.Check('+'))
{
tag.Append(' ');
Skip();
}
else
{
tag.Append(ReadCurrentCharacter());
Expand All @@ -2165,11 +2170,12 @@ private string ScanTagUri(string head, Mark start)
/// Decode an URI-escape sequence corresponding to a single UTF-8 character.
/// </summary>

private char ScanUriEscapes(Mark start)
private string ScanUriEscapes(Mark start)
{
// Decode the required number of characters.

var charBytes = new List<byte>();
byte[] charBytes = null;
int nextInsertionIndex = 0;
int width = 0;
do
{
Expand Down Expand Up @@ -2197,6 +2203,8 @@ private char ScanUriEscapes(Mark start)
{
throw new SyntaxErrorException(start, cursor.Mark(), "While parsing a tag, find an incorrect leading UTF-8 octet.");
}

charBytes = new byte[width];
}
else
{
Expand All @@ -2210,22 +2218,22 @@ private char ScanUriEscapes(Mark start)

// Copy the octet and move the pointers.

charBytes.Add((byte)octet);
charBytes[nextInsertionIndex++] = (byte)octet;

Skip();
Skip();
Skip();
}
while (--width > 0);

var characters = Encoding.UTF8.GetChars(charBytes.ToArray());
var result = Encoding.UTF8.GetString(charBytes, 0, nextInsertionIndex);

if (characters.Length != 1)
if (result.Length == 0 || result.Length > 2)
{
throw new SyntaxErrorException(start, cursor.Mark(), "While parsing a tag, find an incorrect UTF-8 sequence.");
}

return characters[0];
return result;
}

/// <summary>
Expand Down