@@ -24,14 +24,13 @@ internal static class ClipboardHelper
2424 /// The string layout (<![CDATA[<<<]]>) also ensures that it can't appear in the body of the html because the <![CDATA[<]]> <br/>
2525 /// character must be escaped. <br/>
2626 /// </summary>
27- private const string Header = @"Version:1.0
27+ private const string Header = @"Version:0.9
2828StartHTML:<<<<<<<<1
2929EndHTML:<<<<<<<<2
3030StartFragment:<<<<<<<<3
3131EndFragment:<<<<<<<<4
3232StartSelection:<<<<<<<<3
33- EndSelection:<<<<<<<<3
34- SourceURL:about:blank" ;
33+ EndSelection:<<<<<<<<4" ;
3534
3635 /// <summary>
3736 /// html comment to point the beginning of html fragment
@@ -43,13 +42,24 @@ internal static class ClipboardHelper
4342 /// </summary>
4443 public const string EndFragment = @"<!--EndFragment-->" ;
4544
45+ /// <summary>
46+ /// Used to calculate characters byte count in UTF-8
47+ /// </summary>
48+ private static readonly char [ ] _byteCount = new char [ 1 ] ;
49+
4650 #endregion
4751
4852
4953 /// <summary>
50- /// Create <see cref="DataObject"/> with given html and plain-text ready to be used for clipboard or drag and drop.
54+ /// Create <see cref="DataObject"/> with given html and plain-text ready to be used for clipboard or drag and drop.<br/>
55+ /// Handle missing <![CDATA[<html>]]> tags, specified start\end segments and Unicode characters.
5156 /// </summary>
5257 /// <remarks>
58+ /// <para>
59+ /// Windows Clipboard works with UTF-8 Unicode encoding while .NET strings use with UTF-16 so for clipboard to correctly
60+ /// decode Unicode string added to it from .NET we needs to be re-encoded it using UTF-8 encoding.
61+ /// </para>
62+ /// <para>
5363 /// Builds the CF_HTML header correctly for all possible HTMLs<br/>
5464 /// If given html contains start/end fragments then it will use them in the header:
5565 /// <code><![CDATA[<html><body><!--StartFragment-->hello <b>world</b><!--EndFragment--></body></html>]]></code>
@@ -72,15 +82,23 @@ internal static class ClipboardHelper
7282 /// ]]>
7383 /// </code>
7484 /// See format specification here: http://msdn.microsoft.com/library/default.asp?url=/workshop/networking/clipboard/htmlclipboard.asp
85+ /// </para>
7586 /// </remarks>
7687 /// <param name="html">a html fragment</param>
7788 /// <param name="plainText">the plain text</param>
7889 public static DataObject CreateDataObject ( string html , string plainText )
7990 {
91+ html = html ?? String . Empty ;
92+ var htmlFragment = GetHtmlDataString ( html ) ;
93+
94+ // re-encode the string so it will work correctly
95+ if ( html . Length != Encoding . UTF8 . GetByteCount ( html ) )
96+ htmlFragment = Encoding . Default . GetString ( Encoding . UTF8 . GetBytes ( htmlFragment ) ) ;
97+
8098 var dataObject = new DataObject ( ) ;
81- var htmlFragment = ! string . IsNullOrEmpty ( html ) ? GetHtmlDataString ( html ) : html ;
8299 dataObject . SetData ( DataFormats . Html , htmlFragment ) ;
83100 dataObject . SetData ( DataFormats . Text , plainText ) ;
101+ dataObject . SetData ( DataFormats . UnicodeText , plainText ) ;
84102 return dataObject ;
85103 }
86104
@@ -96,7 +114,7 @@ public static DataObject CreateDataObject(string html, string plainText)
96114 public static void CopyToClipboard ( string html , string plainText )
97115 {
98116 var dataObject = CreateDataObject ( html , plainText ) ;
99- Clipboard . SetDataObject ( dataObject ) ;
117+ Clipboard . SetDataObject ( dataObject , true ) ;
100118 }
101119
102120 /// <summary>
@@ -114,29 +132,31 @@ private static string GetHtmlDataString(string html)
114132 int fragmentStart , fragmentEnd ;
115133 int fragmentStartIdx = html . IndexOf ( StartFragment , StringComparison . OrdinalIgnoreCase ) ;
116134 int fragmentEndIdx = html . LastIndexOf ( EndFragment , StringComparison . OrdinalIgnoreCase ) ;
135+
136+ // if html tag is missing add it surrounding the given html (critical)
137+ int htmlOpenIdx = html . IndexOf ( "<html" , StringComparison . OrdinalIgnoreCase ) ;
138+ int htmlOpenEndIdx = htmlOpenIdx > - 1 ? html . IndexOf ( '>' , htmlOpenIdx ) + 1 : - 1 ;
139+ int htmlCloseIdx = html . LastIndexOf ( "</html" , StringComparison . OrdinalIgnoreCase ) ;
140+
117141 if ( fragmentStartIdx < 0 && fragmentEndIdx < 0 )
118142 {
119- int htmlOpenIdx = html . IndexOf ( "<html" , StringComparison . OrdinalIgnoreCase ) ;
120143 int bodyOpenIdx = html . IndexOf ( "<body" , StringComparison . OrdinalIgnoreCase ) ;
121- int htmlOpenEndIdx = htmlOpenIdx > - 1 ? html . IndexOf ( '>' , htmlOpenIdx ) + 1 : - 1 ;
122144 int bodyOpenEndIdx = bodyOpenIdx > - 1 ? html . IndexOf ( '>' , bodyOpenIdx ) + 1 : - 1 ;
123145
124146 if ( htmlOpenEndIdx < 0 && bodyOpenEndIdx < 0 )
125147 {
126148 // the given html doesn't contain html or body tags so we need to add them and place start/end fragments around the given html only
127149 sb . Append ( "<html><body>" ) ;
128150 sb . Append ( StartFragment ) ;
129- fragmentStart = sb . Length ;
151+ fragmentStart = GetByteCount ( sb ) ;
130152 sb . Append ( html ) ;
131- fragmentEnd = sb . Length ;
153+ fragmentEnd = GetByteCount ( sb ) ;
132154 sb . Append ( EndFragment ) ;
133155 sb . Append ( "</body></html>" ) ;
134156 }
135157 else
136158 {
137- // if html tag is missing add it surrounding the given html (critical)
138159 // insert start/end fragments in the proper place (related to html/body tags if exists) so the paste will work correctly
139- int htmlCloseIdx = html . LastIndexOf ( "</html" , StringComparison . OrdinalIgnoreCase ) ;
140160 int bodyCloseIdx = html . LastIndexOf ( "</body" , StringComparison . OrdinalIgnoreCase ) ;
141161
142162 if ( htmlOpenEndIdx < 0 )
@@ -148,13 +168,13 @@ private static string GetHtmlDataString(string html)
148168 sb . Append ( html , htmlOpenEndIdx > - 1 ? htmlOpenEndIdx : 0 , bodyOpenEndIdx - ( htmlOpenEndIdx > - 1 ? htmlOpenEndIdx : 0 ) ) ;
149169
150170 sb . Append ( StartFragment ) ;
151- fragmentStart = sb . Length ;
171+ fragmentStart = GetByteCount ( sb ) ;
152172
153173 var innerHtmlStart = bodyOpenEndIdx > - 1 ? bodyOpenEndIdx : ( htmlOpenEndIdx > - 1 ? htmlOpenEndIdx : 0 ) ;
154174 var innerHtmlEnd = bodyCloseIdx > - 1 ? bodyCloseIdx : ( htmlCloseIdx > - 1 ? htmlCloseIdx : html . Length ) ;
155175 sb . Append ( html , innerHtmlStart , innerHtmlEnd - innerHtmlStart ) ;
156176
157- fragmentEnd = sb . Length ;
177+ fragmentEnd = GetByteCount ( sb ) ;
158178 sb . Append ( EndFragment ) ;
159179
160180 if ( innerHtmlEnd < html . Length )
@@ -166,18 +186,43 @@ private static string GetHtmlDataString(string html)
166186 }
167187 else
168188 {
169- fragmentStart = sb . Length + fragmentStartIdx + StartFragment . Length ;
170- fragmentEnd = sb . Length + fragmentEndIdx ;
189+ // handle html with existing start\end fragments just need to calculate the correct bytes offset (surround with html tag if missing)
190+ if ( htmlOpenEndIdx < 0 )
191+ sb . Append ( "<html>" ) ;
192+ int start = GetByteCount ( sb ) ;
171193 sb . Append ( html ) ;
194+ fragmentStart = start + GetByteCount ( sb , start , start + fragmentStartIdx ) + StartFragment . Length ;
195+ fragmentEnd = start + GetByteCount ( sb , start , start + fragmentEndIdx ) ;
196+ if ( htmlCloseIdx < 0 )
197+ sb . Append ( "</html>" ) ;
172198 }
173199
174200 // Back-patch offsets (scan only the header part for performance)
175- sb . Replace ( "<<<<<<<<4" , fragmentEnd . ToString ( "D9" ) , 0 , Header . Length ) ;
176- sb . Replace ( "<<<<<<<<3" , fragmentStart . ToString ( "D9" ) , 0 , Header . Length ) ;
177- sb . Replace ( "<<<<<<<<2" , sb . Length . ToString ( "D9" ) , 0 , Header . Length ) ;
178201 sb . Replace ( "<<<<<<<<1" , Header . Length . ToString ( "D9" ) , 0 , Header . Length ) ;
202+ sb . Replace ( "<<<<<<<<2" , GetByteCount ( sb ) . ToString ( "D9" ) , 0 , Header . Length ) ;
203+ sb . Replace ( "<<<<<<<<3" , fragmentStart . ToString ( "D9" ) , 0 , Header . Length ) ;
204+ sb . Replace ( "<<<<<<<<4" , fragmentEnd . ToString ( "D9" ) , 0 , Header . Length ) ;
179205
180206 return sb . ToString ( ) ;
181207 }
208+
209+ /// <summary>
210+ /// Calculates the number of bytes produced by encoding the string in the string builder in UTF-8 and not .NET default string encoding.
211+ /// </summary>
212+ /// <param name="sb">the string builder to count its string</param>
213+ /// <param name="start">optional: the start index to calculate from (default - start of string)</param>
214+ /// <param name="end">optional: the end index to calculate to (default - end of string)</param>
215+ /// <returns>the number of bytes required to encode the string in UTF-8</returns>
216+ private static int GetByteCount ( StringBuilder sb , int start = 0 , int end = - 1 )
217+ {
218+ int count = 0 ;
219+ end = end > - 1 ? end : sb . Length ;
220+ for ( int i = start ; i < end ; i ++ )
221+ {
222+ _byteCount [ 0 ] = sb [ i ] ;
223+ count += Encoding . UTF8 . GetByteCount ( _byteCount ) ;
224+ }
225+ return count ;
226+ }
182227 }
183228}
0 commit comments