66using System . Collections . Concurrent ;
77using System . Collections ;
88using System . IO . Pipes ;
9+ using System ;
10+ using System . ComponentModel . Design . Serialization ;
911
1012namespace Signum . Utilities ;
1113
@@ -181,8 +183,7 @@ public static IEnumerable<T> ReadStream<T>(Stream stream, Encoding? encoding = n
181183
182184 var members = CsvMemberCache < T > . Members ;
183185 var parsers = members . Select ( m => GetParser ( defCulture , m , defOptions . ParserFactory ) ) . ToList ( ) ;
184-
185- Regex regex = GetRegex ( defCulture , defOptions . RegexTimeout , defOptions . ListSeparator ) ;
186+ Regex valueRegex = GetRegex ( isLine : false , defCulture , defOptions . RegexTimeout , defOptions . ListSeparator ) ;
186187
187188 if ( defOptions . AsumeSingleLine )
188189 {
@@ -199,66 +200,68 @@ public static IEnumerable<T> ReadStream<T>(Stream stream, Encoding? encoding = n
199200 if ( csvLine == null )
200201 yield break ;
201202
202- Match ? m = null ;
203- T ? t = null ;
204- try
203+ if ( csvLine . Length > 0 )
205204 {
206- m = regex . Match ( csvLine ) ;
207- if ( m . Length > 0 )
205+ T ? t = null ;
206+ try
208207 {
209- t = ReadObject < T > ( m , members , parsers ) ;
208+ var m = valueRegex . EnumerateMatches ( csvLine ) ;
209+
210+ t = ReadObject < T > ( m , csvLine . AsSpan ( ) , members , parsers ) ;
210211 }
211- }
212- catch ( Exception e )
213- {
214- e . Data [ "row" ] = line ;
212+ catch ( Exception e )
213+ {
214+ e . Data [ "row" ] = line ;
215215
216- if ( defOptions . SkipError == null || ! defOptions . SkipError ( e , m ) )
217- throw new ParseCsvException ( e ) ;
218- }
216+ if ( defOptions . SkipError == null || ! defOptions . SkipError ( e , csvLine ) )
217+ throw new ParseCsvException ( e ) ;
218+ }
219219
220- if ( t != null )
221- yield return t ;
220+ if ( t != null )
221+ yield return t ;
222222
223+ }
223224 line ++ ;
224225 }
225226 }
226227 }
227228 else
228229 {
230+ Regex lineRegex = GetRegex ( isLine : true , defCulture , defOptions . RegexTimeout , defOptions . ListSeparator ) ;
231+
229232 using ( StreamReader sr = new StreamReader ( stream , encoding ) )
230233 {
231234 string str = sr . ReadToEnd ( ) ;
232235
233- var matches = regex . Matches ( str ) . Cast < Match > ( ) ;
234-
235- if ( skipLines > 0 )
236- matches = matches . Skip ( skipLines ) ;
237-
238- int line = skipLines ;
239- foreach ( var m in matches )
236+ int i = 0 ;
237+ foreach ( Match m in lineRegex . Matches ( str ) )
240238 {
239+ if ( i < skipLines )
240+ continue ;
241+
241242 if ( m . Length > 0 )
242243 {
243244 T ? t = null ;
244245 try
245246 {
247+ var line = m . Value ;
248+
246249 if ( options ? . Constructor != null )
247- t = options . Constructor ( m ) ;
250+ t = options . Constructor ( line ) ;
248251 else
249- t = ReadObject < T > ( m , members , parsers ) ;
252+ t = ReadObject < T > ( valueRegex . EnumerateMatches ( line ) , line , members , parsers ) ;
250253 }
251254 catch ( Exception e )
252255 {
253- e . Data [ "row" ] = line ;
256+ e . Data [ "row" ] = i ;
254257
255- if ( defOptions . SkipError == null || ! defOptions . SkipError ( e , m ) )
258+ if ( defOptions . SkipError == null || ! defOptions . SkipError ( e , str . Substring ( m . Index , m . Length ) ) )
256259 throw new ParseCsvException ( e ) ;
257260 }
258261 if ( t != null )
259262 yield return t ;
260263 }
261- line ++ ;
264+ i ++ ;
262265 }
263266 }
264267 }
@@ -271,18 +274,20 @@ public static T ReadLine<T>(string csvLine, CultureInfo? culture = null, CsvRead
271274
272275 var defCulture = GetDefaultCulture ( culture ) ;
273276
274- Regex regex = GetRegex ( defCulture , defOptions . RegexTimeout ) ;
277+ Regex regex = GetRegex ( isLine : false , defCulture , defOptions . RegexTimeout ) ;
275278
276- Match m = regex . Match ( csvLine ) ;
279+ var vme = regex . EnumerateMatches ( csvLine ) ;
277280
278281 var members = CsvMemberCache < T > . Members ;
279282
280- return ReadObject < T > ( m ,
283+ return ReadObject < T > ( vme ,
284+ csvLine . AsSpan ( ) ,
281285 members ,
282286 members . Select ( c => GetParser ( defCulture , c , defOptions . ParserFactory ) ) . ToList ( ) ) ;
283287 }
284288
285- private static Func < string , object ? > GetParser < T > ( CultureInfo culture , CsvMemberInfo < T > column , Func < CsvMemberInfo < T > , CultureInfo , Func < string , object ? > ? > ? parserFactory )
289+
290+ private static ValueParser GetParser < T > ( CultureInfo culture , CsvMemberInfo < T > column , Func < CsvMemberInfo < T > , CultureInfo , ValueParser ? > ? parserFactory )
286291 {
287292 if ( parserFactory != null )
288293 {
@@ -294,43 +299,51 @@ public static T ReadLine<T>(string csvLine, CultureInfo? culture = null, CsvRead
294299
295300 var type = column . IsCollection ? column . MemberInfo . ReturningType ( ) . ElementType ( ) ! : column . MemberInfo . ReturningType ( ) ;
296301
297- return str => ConvertTo ( str , type , culture , column . Format ) ;
302+ return GetBasicParser ( type . UnNullify ( ) , culture , column . Format ) ;
298303 }
299304
300- static T ReadObject < T > ( Match m , List < CsvMemberInfo < T > > members , List < Func < string , object ? > > parsers )
301- {
302- var vals = m . Groups [ "val" ] . Captures ;
303-
304- if ( vals . Count < members . Count )
305- throw new FormatException ( "Only {0} columns found (instead of {1}) in line: {2}" . FormatWith ( vals . Count , members . Count , m . Value ) ) ;
305+ public delegate object ? ValueParser ( ReadOnlySpan < char > str ) ;
306306
307+ static T ReadObject < T > ( Regex . ValueMatchEnumerator vme , ReadOnlySpan < char > line , List < CsvMemberInfo < T > > members , List < ValueParser > parsers )
308+ {
307309 T t = Activator . CreateInstance < T > ( ) ;
308310
309- for ( int i = 0 ; i < members . Count ; i ++ )
311+ bool endsInCollection = false ;
312+ int i = 0 ;
313+ foreach ( var v in vme )
310314 {
315+ if ( members . Count <= i )
316+ continue ;
317+
318+ var value = line . Slice ( v . Index , v . Length ) ;
311319 var member = members [ i ] ;
312320 var parser = parsers [ i ] ;
313- string ? str = null ;
314321 try
315322 {
316323 if ( ! member . IsCollection )
317324 {
318- str = DecodeCsv ( vals [ i ] . Value ) ;
325+ value = DecodeCsv ( value ) ;
319326
320- object ? val = parser ( str ) ;
327+ object ? val = parser ( value ) ;
321328
322329 member . MemberEntry . Setter ! ( t , val ) ;
323330 }
324331 else
325332 {
333+ if ( i != members . Count - 1 )
334+ throw new InvalidOperationException ( $ "Collection { member . MemberInfo } should be the last member") ;
335+ endsInCollection = true ;
326336 var list = ( IList ) Activator . CreateInstance ( member . MemberInfo . ReturningType ( ) ) ! ;
327337
328- for ( int j = i ; j < vals . Count ; j ++ )
329- {
330- str = DecodeCsv ( vals [ j ] . Value ) ;
331-
332- object ? val = parser ( str ) ;
338+ value = DecodeCsv ( value ) ;
339+ object ? val = parser ( value ) ;
340+ list . Add ( val ) ;
333341
342+ foreach ( var v2 in vme )
343+ {
344+ value = line . Slice ( v2 . Index , v2 . Length ) ;
345+ value = DecodeCsv ( value ) ;
346+ val = parser ( value ) ;
334347 list . Add ( val ) ;
335348 }
336349
@@ -339,11 +352,17 @@ static T ReadObject<T>(Match m, List<CsvMemberInfo<T>> members, List<Func<string
339352 }
340353 catch ( Exception e )
341354 {
342- e . Data [ "value" ] = str ;
355+ e . Data [ "value" ] = new String ( value ) ;
343356 e . Data [ "member" ] = members [ i ] . MemberInfo . Name ;
344357 throw ;
345358 }
359+
360+ i ++ ;
346361 }
362+
363+ if ( ! endsInCollection && i != members . Count )
364+ throw new FormatException ( "Only {0} columns found (instead of {1}) in line: {2}" . FormatWith ( i , members . Count , new string ( line ) ) ) ;
365+
347366 return t ;
348367 }
349368
@@ -369,7 +388,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
369388 var defCulture = GetDefaultCulture ( culture ) ;
370389 var defOptions = options ?? new CsvReadOptions ( ) ;
371390
372- Regex regex = GetRegex ( defCulture , defOptions . RegexTimeout , defOptions . ListSeparator ) ;
391+ Regex valueRegex = GetRegex ( false , defCulture , defOptions . RegexTimeout , defOptions . ListSeparator ) ;
373392 if ( defOptions . AsumeSingleLine )
374393 {
375394 using ( StreamReader sr = new StreamReader ( stream , encoding ) )
@@ -386,7 +405,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
386405 string [ ] ? t = null ;
387406 try
388407 {
389- m = regex . Match ( csvLine ) ;
408+ m = valueRegex . Match ( csvLine ) ;
390409 if ( m . Length > 0 )
391410 {
392411 t = m . Groups [ "val" ] . Captures . Select ( c => c . Value ) . ToArray ( ) ;
@@ -396,7 +415,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
396415 {
397416 e . Data [ "row" ] = line ;
398417
399- if ( defOptions . SkipError == null || ! defOptions . SkipError ( e , m ) )
418+ if ( defOptions . SkipError == null || ! defOptions . SkipError ( e , csvLine ) )
400419 throw new ParseCsvException ( e ) ;
401420 }
402421
@@ -413,7 +432,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
413432 {
414433 string str = sr . ReadToEnd ( ) ;
415434
416- var matches = regex . Matches ( str ) . Cast < Match > ( ) ;
435+ var matches = valueRegex . Matches ( str ) . Cast < Match > ( ) ;
417436
418437 int line = 0 ;
419438 foreach ( var m in matches )
@@ -429,7 +448,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
429448 {
430449 e . Data [ "row" ] = line ;
431450
432- if ( defOptions . SkipError == null || ! defOptions . SkipError ( e , m ) )
451+ if ( defOptions . SkipError == null || ! defOptions . SkipError ( e , m . Value ) )
433452 throw new ParseCsvException ( e ) ;
434453 }
435454 if ( t != null )
@@ -530,16 +549,17 @@ public class MyFileCSV
530549 """ ;
531550 }
532551
533-
534- static ConcurrentDictionary < char , Regex > regexCache = new ConcurrentDictionary < char , Regex > ( ) ;
535- const string BaseRegex = @"^((?<val>'(?:[^']+|'')*'|[^;\r\n]*)) ?((?!($|\r\n));(?<val>'(?:[^']+|'')*'|[^;\r\n]* ))*($|\r\n)" ;
536- static Regex GetRegex ( CultureInfo culture , TimeSpan timeout , char ? listSeparator = null )
552+ static ConcurrentDictionary < ( bool multiLine , char separator , TimeSpan timeout ) , Regex > regexCache = new ( ) ;
553+ readonly static string ValueRegex = "'(?:[^']+|'')*'|[^; \r \n ]*" . Replace ( ' \' ' , '"' ) ;
554+ readonly static string LineRegex = $ @ "^({ ValueRegex } ) ?((?!($|\r\n));({ ValueRegex } ))*($|\r\n)";
555+ static Regex GetRegex ( bool isLine , CultureInfo culture , TimeSpan timeout , char ? listSeparator = null )
537556 {
538557 char separator = listSeparator ?? GetListSeparator ( culture ) ;
539558
540- return regexCache . GetOrAdd ( separator , s =>
541- new Regex ( BaseRegex . Replace ( ' \' ' , '"' ) . Replace ( ';' , s ) , RegexOptions . Multiline | RegexOptions . ExplicitCapture , timeout ) ) ;
559+ return regexCache . GetOrAdd ( ( isLine , separator , timeout ) , a =>
560+ new Regex ( ( isLine ? LineRegex : ValueRegex ) . Replace ( ';' , a . separator ) , RegexOptions . Multiline | RegexOptions . ExplicitCapture , a . timeout ) ) ;
542561 }
562+
543563
544564 private static char GetListSeparator ( CultureInfo culture )
545565 {
@@ -570,62 +590,60 @@ static CsvMemberCache()
570590 public static List < CsvMemberInfo < T > > Members ;
571591 }
572592
573- static string DecodeCsv ( string s )
593+
594+
595+ static ReadOnlySpan < char > DecodeCsv ( ReadOnlySpan < char > s )
574596 {
575597 if ( s . StartsWith ( "\" " ) && s . EndsWith ( "\" " ) )
576598 {
577- string str = s [ 1 ..^ 1 ] . Replace ( "\" \" " , "\" " ) ;
599+ string str = new string ( s [ 1 ..^ 1 ] ) . Replace ( "\" \" " , "\" " ) ;
578600
579601 return Regex . Replace ( str , "(?<!\r )\n " , "\r \n " ) ;
580602 }
581603
582604 return s ;
583605 }
584606
585- static object ? ConvertTo ( string s , Type type , CultureInfo culture , string ? format )
607+ static ValueParser GetBasicParser ( Type type , CultureInfo culture , string ? format )
586608 {
587- Type ? baseType = Nullable . GetUnderlyingType ( type ) ;
588- if ( baseType != null )
609+ return type switch
589610 {
590- if ( ! s . HasText ( ) )
591- return null ;
592-
593- type = baseType ;
594- }
595-
596- if ( type . IsEnum )
597- return Enum . Parse ( type , s ) ;
598-
599- if ( type == typeof ( DateTime ) )
600- if ( format == null )
601- return DateTime . Parse ( s , culture ) ;
602- else
603- return DateTime . ParseExact ( s , format , culture ) ;
604-
605- if ( type == typeof ( DateOnly ) )
606- if ( format == null )
607- return DateOnly . Parse ( s , culture ) ;
608- else
609- return DateOnly . ParseExact ( s , format , culture ) ;
610-
611- if ( type == typeof ( Guid ) )
612- return Guid . Parse ( s ) ;
613-
614- return Convert . ChangeType ( s , type , culture ) ;
611+ _ when type == typeof ( string ) => str => str . Length == 0 ? null : str . ToString ( ) ,
612+ _ when type == typeof ( byte ) => str => str . Length == 0 ? null : byte . Parse ( str , NumberStyles . Integer , culture ) ,
613+ _ when type == typeof ( sbyte ) => str => str . Length == 0 ? null : sbyte . Parse ( str , NumberStyles . Integer , culture ) ,
614+ _ when type == typeof ( short ) => str => str . Length == 0 ? null : short . Parse ( str , NumberStyles . Integer , culture ) ,
615+ _ when type == typeof ( ushort ) => str => str . Length == 0 ? null : ushort . Parse ( str , NumberStyles . Integer , culture ) ,
616+ _ when type == typeof ( int ) => str => str . Length == 0 ? null : int . Parse ( str , NumberStyles . Integer , culture ) ,
617+ _ when type == typeof ( uint ) => str => str . Length == 0 ? null : uint . Parse ( str , NumberStyles . Integer , culture ) ,
618+ _ when type == typeof ( long ) => str => str . Length == 0 ? null : long . Parse ( str , NumberStyles . Integer , culture ) ,
619+ _ when type == typeof ( ulong ) => str => str . Length == 0 ? null : ulong . Parse ( str , NumberStyles . Integer , culture ) ,
620+ _ when type == typeof ( float ) => str => str . Length == 0 ? null : float . Parse ( str , NumberStyles . Float , culture ) ,
621+ _ when type == typeof ( double ) => str => str . Length == 0 ? null : double . Parse ( str , NumberStyles . Float , culture ) ,
622+ _ when type == typeof ( decimal ) => str => str . Length == 0 ? null : decimal . Parse ( str , NumberStyles . Number , culture ) ,
623+ _ when type == typeof ( DateTime ) => str => str . Length == 0 ? null : DateTime . ParseExact ( str , format , culture ) ,
624+ _ when type == typeof ( DateTimeOffset ) => str => str . Length == 0 ? null : DateTimeOffset . ParseExact ( str , format , culture ) ,
625+ _ when type == typeof ( DateOnly ) => str => str . Length == 0 ? null : DateOnly . ParseExact ( str , format , culture ) ,
626+ _ when type == typeof ( TimeOnly ) => str => str . Length == 0 ? null : TimeOnly . ParseExact ( str , format , culture ) ,
627+ _ when type == typeof ( Guid ) => str => str . Length == 0 ? null : Guid . Parse ( str . ToString ( ) ) ,
628+ _ when type . IsEnum => str => str . Length == 0 ? null : Enum . Parse ( type , str ) ,
629+ _ => str => Convert . ChangeType ( new string ( str ) , type , culture )
630+ } ;
615631 }
616632}
617633
618634public class CsvReadOptions < T > : CsvReadOptions
619635 where T : class
620636{
621- public Func < CsvMemberInfo < T > , CultureInfo , Func < string , object ? > ? > ? ParserFactory ;
622- public Func < Match , T > ? Constructor ;
637+ public Func < CsvMemberInfo < T > , CultureInfo , Csv . ValueParser ? > ? ParserFactory ;
638+ public CsvConstructor < T > ? Constructor ;
623639}
624640
641+ public delegate T CsvConstructor < T > ( ReadOnlySpan < char > line ) ;
642+
625643public class CsvReadOptions
626644{
627- public bool AsumeSingleLine = false ;
628- public Func < Exception , Match ? , bool > ? SkipError ;
645+ public bool AsumeSingleLine = true ; //Breaking change!
646+ public Func < Exception , string , bool > ? SkipError ;
629647 public TimeSpan RegexTimeout = Regex . InfiniteMatchTimeout ;
630648 public char ? ListSeparator ;
631649}
0 commit comments