Skip to content

Commit 4c85238

Browse files
author
ArthurHub
committed
handle Unicode html in clipboard
1 parent 7066007 commit 4c85238

File tree

2 files changed

+82
-37
lines changed

2 files changed

+82
-37
lines changed

Source/Demo/TestSamples/22.RTL.htm

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
1-
<html>
2-
<body>
3-
<div dir="rtl">
4-
<div>שלום עולם, יש ברבורים בעגם הזה</div>
5-
<br />
6-
<div>שלום עולם, יש ברבורים בעגם הזה</div>
7-
<div>שלום עולם, יש ברבורים בעגם הזה</div>
8-
</div>
9-
<hr/>
10-
<div dir="rtl">
11-
<div>שלום עולם,<span>hello world</span> יש ברבורים בעגם הזה</div>
12-
<br />
13-
<div>שלום עולם, יש ברבורים בעגם הזה</div>
14-
<div>שלום עולם, יש ברבורים בעגם הזה</div>
15-
</div>
16-
<hr/>
17-
</body>
18-
</html>
1+
<html>
2+
<body>
3+
<div dir="rtl">
4+
<div>שלום עולם, יש <b>ברבורים </b>בעגם הזה</div>
5+
<br />
6+
<div>שלום עולם, יש ברבורים בעגם הזה</div>
7+
<div>שלום עולם, יש ברבורים בעגם הזה</div>
8+
</div>
9+
<hr />
10+
<div dir="rtl">
11+
<div>שלום עולם,<span>hello world</span> יש ברבורים בעגם הזה</div>
12+
<br />
13+
<div>שלום עולם, יש ברבורים בעגם הזה</div>
14+
<div>שלום עולם, יש ברבורים בעגם הזה</div>
15+
</div>
16+
<hr />
17+
</body>
18+
</html>

Source/HtmlRenderer/Utils/ClipboardHelper.cs

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,13 @@ internal static class ClipboardHelper
2424
/// The string layout (<![CDATA[<<<]]>) also ensures that it can't appear in the body of the html because the <![CDATA[<]]> <br/>
2525
/// character must be escaped. <br/>
2626
/// </summary>
27-
private const string Header = @"Version:1.0
27+
private const string Header = @"Version:0.9
2828
StartHTML:<<<<<<<<1
2929
EndHTML:<<<<<<<<2
3030
StartFragment:<<<<<<<<3
3131
EndFragment:<<<<<<<<4
3232
StartSelection:<<<<<<<<3
33-
EndSelection:<<<<<<<<3
34-
SourceURL:about:blank";
33+
EndSelection:<<<<<<<<4";
3534

3635
/// <summary>
3736
/// html comment to point the beginning of html fragment
@@ -43,13 +42,24 @@ internal static class ClipboardHelper
4342
/// </summary>
4443
public const string EndFragment = @"<!--EndFragment-->";
4544

45+
/// <summary>
46+
/// Used to calculate characters byte count in UTF-8
47+
/// </summary>
48+
private static readonly char[] _byteCount = new char[1];
49+
4650
#endregion
4751

4852

4953
/// <summary>
50-
/// Create <see cref="DataObject"/> with given html and plain-text ready to be used for clipboard or drag and drop.
54+
/// Create <see cref="DataObject"/> with given html and plain-text ready to be used for clipboard or drag and drop.<br/>
55+
/// Handle missing <![CDATA[<html>]]> tags, specified start\end segments and Unicode characters.
5156
/// </summary>
5257
/// <remarks>
58+
/// <para>
59+
/// Windows Clipboard works with UTF-8 Unicode encoding while .NET strings use with UTF-16 so for clipboard to correctly
60+
/// decode Unicode string added to it from .NET we needs to be re-encoded it using UTF-8 encoding.
61+
/// </para>
62+
/// <para>
5363
/// Builds the CF_HTML header correctly for all possible HTMLs<br/>
5464
/// If given html contains start/end fragments then it will use them in the header:
5565
/// <code><![CDATA[<html><body><!--StartFragment-->hello <b>world</b><!--EndFragment--></body></html>]]></code>
@@ -72,15 +82,23 @@ internal static class ClipboardHelper
7282
/// ]]>
7383
/// </code>
7484
/// See format specification here: http://msdn.microsoft.com/library/default.asp?url=/workshop/networking/clipboard/htmlclipboard.asp
85+
/// </para>
7586
/// </remarks>
7687
/// <param name="html">a html fragment</param>
7788
/// <param name="plainText">the plain text</param>
7889
public static DataObject CreateDataObject(string html, string plainText)
7990
{
91+
html = html ?? String.Empty;
92+
var htmlFragment = GetHtmlDataString(html);
93+
94+
// re-encode the string so it will work correctly
95+
if (html.Length != Encoding.UTF8.GetByteCount(html))
96+
htmlFragment = Encoding.Default.GetString(Encoding.UTF8.GetBytes(htmlFragment));
97+
8098
var dataObject = new DataObject();
81-
var htmlFragment = !string.IsNullOrEmpty(html) ? GetHtmlDataString(html) : html;
8299
dataObject.SetData(DataFormats.Html, htmlFragment);
83100
dataObject.SetData(DataFormats.Text, plainText);
101+
dataObject.SetData(DataFormats.UnicodeText, plainText);
84102
return dataObject;
85103
}
86104

@@ -96,7 +114,7 @@ public static DataObject CreateDataObject(string html, string plainText)
96114
public static void CopyToClipboard(string html, string plainText)
97115
{
98116
var dataObject = CreateDataObject(html, plainText);
99-
Clipboard.SetDataObject(dataObject);
117+
Clipboard.SetDataObject(dataObject, true);
100118
}
101119

102120
/// <summary>
@@ -114,29 +132,31 @@ private static string GetHtmlDataString(string html)
114132
int fragmentStart, fragmentEnd;
115133
int fragmentStartIdx = html.IndexOf(StartFragment, StringComparison.OrdinalIgnoreCase);
116134
int fragmentEndIdx = html.LastIndexOf(EndFragment, StringComparison.OrdinalIgnoreCase);
135+
136+
// if html tag is missing add it surrounding the given html (critical)
137+
int htmlOpenIdx = html.IndexOf("<html", StringComparison.OrdinalIgnoreCase);
138+
int htmlOpenEndIdx = htmlOpenIdx > -1 ? html.IndexOf('>', htmlOpenIdx) + 1 : -1;
139+
int htmlCloseIdx = html.LastIndexOf("</html", StringComparison.OrdinalIgnoreCase);
140+
117141
if (fragmentStartIdx < 0 && fragmentEndIdx < 0)
118142
{
119-
int htmlOpenIdx = html.IndexOf("<html", StringComparison.OrdinalIgnoreCase);
120143
int bodyOpenIdx = html.IndexOf("<body", StringComparison.OrdinalIgnoreCase);
121-
int htmlOpenEndIdx = htmlOpenIdx > -1 ? html.IndexOf('>', htmlOpenIdx) + 1 : -1;
122144
int bodyOpenEndIdx = bodyOpenIdx > -1 ? html.IndexOf('>', bodyOpenIdx) + 1 : -1;
123145

124146
if (htmlOpenEndIdx < 0 && bodyOpenEndIdx < 0)
125147
{
126148
// the given html doesn't contain html or body tags so we need to add them and place start/end fragments around the given html only
127149
sb.Append("<html><body>");
128150
sb.Append(StartFragment);
129-
fragmentStart = sb.Length;
151+
fragmentStart = GetByteCount(sb);
130152
sb.Append(html);
131-
fragmentEnd = sb.Length;
153+
fragmentEnd = GetByteCount(sb);
132154
sb.Append(EndFragment);
133155
sb.Append("</body></html>");
134156
}
135157
else
136158
{
137-
// if html tag is missing add it surrounding the given html (critical)
138159
// insert start/end fragments in the proper place (related to html/body tags if exists) so the paste will work correctly
139-
int htmlCloseIdx = html.LastIndexOf("</html", StringComparison.OrdinalIgnoreCase);
140160
int bodyCloseIdx = html.LastIndexOf("</body", StringComparison.OrdinalIgnoreCase);
141161

142162
if (htmlOpenEndIdx < 0)
@@ -148,13 +168,13 @@ private static string GetHtmlDataString(string html)
148168
sb.Append(html, htmlOpenEndIdx > -1 ? htmlOpenEndIdx : 0, bodyOpenEndIdx - (htmlOpenEndIdx > -1 ? htmlOpenEndIdx : 0));
149169

150170
sb.Append(StartFragment);
151-
fragmentStart = sb.Length;
171+
fragmentStart = GetByteCount(sb);
152172

153173
var innerHtmlStart = bodyOpenEndIdx > -1 ? bodyOpenEndIdx : (htmlOpenEndIdx > -1 ? htmlOpenEndIdx : 0);
154174
var innerHtmlEnd = bodyCloseIdx > -1 ? bodyCloseIdx : (htmlCloseIdx > -1 ? htmlCloseIdx : html.Length);
155175
sb.Append(html, innerHtmlStart, innerHtmlEnd - innerHtmlStart);
156176

157-
fragmentEnd = sb.Length;
177+
fragmentEnd = GetByteCount(sb);
158178
sb.Append(EndFragment);
159179

160180
if (innerHtmlEnd < html.Length)
@@ -166,18 +186,43 @@ private static string GetHtmlDataString(string html)
166186
}
167187
else
168188
{
169-
fragmentStart = sb.Length + fragmentStartIdx + StartFragment.Length;
170-
fragmentEnd = sb.Length + fragmentEndIdx;
189+
// handle html with existing start\end fragments just need to calculate the correct bytes offset (surround with html tag if missing)
190+
if (htmlOpenEndIdx < 0)
191+
sb.Append("<html>");
192+
int start = GetByteCount(sb);
171193
sb.Append(html);
194+
fragmentStart = start + GetByteCount(sb, start, start + fragmentStartIdx) + StartFragment.Length;
195+
fragmentEnd = start + GetByteCount(sb, start, start + fragmentEndIdx);
196+
if (htmlCloseIdx < 0)
197+
sb.Append("</html>");
172198
}
173199

174200
// Back-patch offsets (scan only the header part for performance)
175-
sb.Replace("<<<<<<<<4", fragmentEnd.ToString("D9"), 0, Header.Length);
176-
sb.Replace("<<<<<<<<3", fragmentStart.ToString("D9"), 0, Header.Length);
177-
sb.Replace("<<<<<<<<2", sb.Length.ToString("D9"), 0, Header.Length);
178201
sb.Replace("<<<<<<<<1", Header.Length.ToString("D9"), 0, Header.Length);
202+
sb.Replace("<<<<<<<<2", GetByteCount(sb).ToString("D9"), 0, Header.Length);
203+
sb.Replace("<<<<<<<<3", fragmentStart.ToString("D9"), 0, Header.Length);
204+
sb.Replace("<<<<<<<<4", fragmentEnd.ToString("D9"), 0, Header.Length);
179205

180206
return sb.ToString();
181207
}
208+
209+
/// <summary>
210+
/// Calculates the number of bytes produced by encoding the string in the string builder in UTF-8 and not .NET default string encoding.
211+
/// </summary>
212+
/// <param name="sb">the string builder to count its string</param>
213+
/// <param name="start">optional: the start index to calculate from (default - start of string)</param>
214+
/// <param name="end">optional: the end index to calculate to (default - end of string)</param>
215+
/// <returns>the number of bytes required to encode the string in UTF-8</returns>
216+
private static int GetByteCount(StringBuilder sb, int start = 0, int end = -1)
217+
{
218+
int count = 0;
219+
end = end > -1 ? end : sb.Length;
220+
for (int i = start; i < end; i++)
221+
{
222+
_byteCount[0] = sb[i];
223+
count += Encoding.UTF8.GetByteCount(_byteCount);
224+
}
225+
return count;
226+
}
182227
}
183228
}

0 commit comments

Comments
 (0)