Skip to content

Commit

Permalink
[fix]CSV读写支持多行文本。fix: #40
Browse files Browse the repository at this point in the history
  • Loading branch information
nnhy committed Nov 12, 2024
1 parent 49a62ae commit 7b10b61
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 124 deletions.
2 changes: 1 addition & 1 deletion NewLife.Core/Data/DbTable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ public void LoadCsv(String file)
using var csv = new CsvFile(file, false);
var cs = csv.ReadLine();
if (cs != null) Columns = cs;
Rows = csv.ReadAll();
Rows = csv.ReadAll().Cast<Object?[]>().ToList();
}
#endregion

Expand Down
98 changes: 51 additions & 47 deletions NewLife.Core/IO/CsvFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ public virtual async ValueTask DisposeAsync()
#endregion

#region 读取
private Int32 _columnCount;
/// <summary>读取一行</summary>
/// <returns></returns>
public String[]? ReadLine()
Expand All @@ -119,56 +120,47 @@ public virtual async ValueTask DisposeAsync()

// 直接分解,引号合并
var arr = line.Split(Separator);
// 如果字段数不足,可能有换行符,读取后面的行
while (_columnCount > 0 && arr.Length < _columnCount)
{
var next = _reader?.ReadLine();
if (next == null) break;

line += Environment.NewLine + next;

arr = line.Split(Separator);
}
for (var i = 0; i < arr.Length; i++)
{
var str = (arr[i] + "").Trim();
if (str.StartsWith("\""))
var txt = (arr[i] + "").Trim();
if (txt.Length >= 2 && txt[0] == '\"' && txt[^1] == '\"')
{
var txt = "";
if (str.EndsWith("\"") && !str.EndsWith("\"\""))
txt = str.Trim('\"');
else
{
// 找到下一个以引号结尾的项
for (var j = i + 1; j < arr.Length; j++)
{
if (arr[j].EndsWith("\""))
{
txt = arr.Skip(i).Take(j - i + 1).Join(Separator + "").Trim('\"');

// 跳过去一大步
i = j;
break;
}
}
}
txt = txt[1..^1];

// 两个引号是一个引号的转义
txt = txt.Replace("\"\"", "\"");
list.Add(txt);
}
else
list.Add(str);

list.Add(txt);
}

// 记录列数
if (_columnCount == 0 && list.Count > 0) _columnCount = list.Count;

return list.ToArray();
}

/// <summary>读取所有行</summary>
/// <returns></returns>
public String[][] ReadAll()
public IEnumerable<String[]> ReadAll()
{
var list = new List<String[]>();

while (true)
{
var line = ReadLine();
if (line == null) break;

list.Add(line);
yield return line;
}

return list.ToArray();
}

private StreamReader? _reader;
Expand Down Expand Up @@ -232,28 +224,40 @@ protected virtual String BuildLine(IEnumerable<Object?> line)
{
if (sb.Length > 0) sb.Append(Separator);

var str = item switch
if (item is DateTime dt)
{
String str2 => str2,
DateTime dt => dt.ToFullString(""),
Boolean b => b ? "1" : "0",
_ => item + "",
};

// 避免出现科学计数问题 数据前增加制表符"\t"
// 不同软件显示不太一样 wps超过9位就自动转为科学计数,有的软件是超过11位,所以采用最小范围9
var reg = new Regex("^\\d+$");
if (str.Length > 9 && reg.Match(str).Success)
sb.Append(dt.ToFullString(""));
}
else if (item is Boolean b)
{
str = $"\t{str}";
sb.Append(b ? "1" : "0");
}

if (str.Contains('"'))
sb.AppendFormat("\"{0}\"", str.Replace("\"", "\"\""));
else if (str.Contains(Separator) || str.Contains('\r') || str.Contains('\n'))
sb.AppendFormat("\"{0}\"", str);
else
sb.Append(str);
{
if (item is not String str) str = item + "";

// 避免出现科学计数问题 数据前增加制表符"\t"
// 不同软件显示不太一样 wps超过9位就自动转为科学计数,有的软件是超过11位,所以采用最小范围9
if (str.Length > 9 && Int64.TryParse(str, out _))
{
sb.Append('\t');
sb.Append(str);
}
else if (str.Contains('"'))
{
sb.Append('\"');
sb.Append(str.Replace("\"", "\"\""));
sb.Append('\"');
}
else if (str.Contains(Separator) || str.Contains('\r') || str.Contains('\n'))
{
sb.Append('\"');
sb.Append(str);
sb.Append('\"');
}
else
sb.Append(str);
}
}

return sb.Return(true);
Expand Down
195 changes: 119 additions & 76 deletions XUnitTest.Core/IO/CsvFileTests.cs
Original file line number Diff line number Diff line change
@@ -1,93 +1,136 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using NewLife;
using NewLife.IO;
using Xunit;

namespace XUnitTest.IO
namespace XUnitTest.IO;

public class CsvFileTests
{
public class CsvFileTests
[Fact]
public void MemoryTest()
{
var ms = new MemoryStream();

var list = new List<Object[]>
{
new Object[] { 1234, "Stone", true, DateTime.Now },
new Object[] { 5678, "NewLife", false, DateTime.Today }
};

{
using var csv = new CsvFile(ms, true);
csv.Separator = ',';
csv.Encoding = Encoding.UTF8;

csv.WriteLine(["Code", "Name", "Enable", "CreateTime"]);
csv.WriteAll(list);
}

var txt = ms.ToArray().ToStr();
var lines = txt.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries);
Assert.Equal(3, lines.Length);
Assert.Equal("Code,Name,Enable,CreateTime", lines[0]);
Assert.Equal($"1234,Stone,1,{((DateTime)list[0][3]).ToFullString()}", lines[1]);
Assert.Equal($"5678,NewLife,0,{((DateTime)list[1][3]).ToFullString()}", lines[2]);

{
ms.Position = 0;
using var csv = new CsvFile(ms);
var headers = csv.ReadLine();
var all = csv.ReadAll().ToList();

Assert.Equal(4, headers.Length);
Assert.Equal("Code", headers[0]);
Assert.Equal("Name", headers[1]);

Assert.Equal(2, all.Count);
}
}

[Fact]
public void FileTest()
{
var file = "data/test.csv";

var list = new List<Object[]>
{
new Object[] { 1234, "Stone", true, DateTime.Now },
new Object[] { 5678, "NewLife", false, DateTime.Today }
};

{
using var csv = new CsvFile(file, true);
csv.Separator = ',';
csv.Encoding = Encoding.UTF8;

csv.WriteLine(["Code", "Name", "Enable", "CreateTime"]);
csv.WriteAll(list);
}

var lines = File.ReadAllLines(file.GetFullPath());
Assert.Equal(3, lines.Length);
Assert.Equal("Code,Name,Enable,CreateTime", lines[0]);
Assert.Equal($"1234,Stone,1,{((DateTime)list[0][3]).ToFullString()}", lines[1]);
Assert.Equal($"5678,NewLife,0,{((DateTime)list[1][3]).ToFullString()}", lines[2]);

{
using var csv = new CsvFile(file);
var headers = csv.ReadLine();
var all = csv.ReadAll().ToList();

Assert.Equal(4, headers.Length);
Assert.Equal("Code", headers[0]);
Assert.Equal("Name", headers[1]);

Assert.Equal(2, all.Count);
}
}

[Fact]
public void BigString()
{
[Fact]
public void MemoryTest()
var ms = new MemoryStream();

var list = new List<Object[]>
{
new Object[] { 1234, "Stone", true, DateTime.Now },
new Object[] { 5678, "Hello\r\n\r\nNewLife in \"2025\"", false, DateTime.Today }
};

{
var ms = new MemoryStream();

var list = new List<Object[]>
{
new Object[] { 1234, "Stone", true, DateTime.Now },
new Object[] { 5678, "NewLife", false, DateTime.Today }
};

{
using var csv = new CsvFile(ms, true);
csv.Separator = ',';
csv.Encoding = Encoding.UTF8;

csv.WriteLine(new[] { "Code", "Name", "Enable", "CreateTime" });
csv.WriteAll(list);
}

var txt = ms.ToArray().ToStr();
var lines = txt.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries);
Assert.Equal(3, lines.Length);
Assert.Equal("Code,Name,Enable,CreateTime", lines[0]);
Assert.Equal($"1234,Stone,1,{((DateTime)list[0][3]).ToFullString()}", lines[1]);
Assert.Equal($"5678,NewLife,0,{((DateTime)list[1][3]).ToFullString()}", lines[2]);

{
ms.Position = 0;
using var csv = new CsvFile(ms);
var headers = csv.ReadLine();
var all = csv.ReadAll();

Assert.Equal(4, headers.Length);
Assert.Equal("Code", headers[0]);
Assert.Equal("Name", headers[1]);

Assert.Equal(2, all.Length);
}
using var csv = new CsvFile(ms, true);
csv.Separator = ',';
csv.Encoding = Encoding.UTF8;

csv.WriteLine(["Code", "Name", "Enable", "CreateTime"]);
csv.WriteAll(list);
}

[Fact]
public void FileTest()
var txt = ms.ToArray().ToStr();
var lines = txt.Split(Environment.NewLine);
Assert.Equal(6, lines.Length);
Assert.Equal("Code,Name,Enable,CreateTime", lines[0]);
Assert.Equal($"1234,Stone,1,{((DateTime)list[0][3]).ToFullString()}", lines[1]);
Assert.Equal($"5678,\"Hello", lines[2]);
Assert.Empty(lines[3]);
Assert.Equal($"NewLife in \"\"2025\"\"\",0,{((DateTime)list[1][3]).ToFullString()}", lines[4]);

{
var file = "data/test.csv";

var list = new List<Object[]>
{
new Object[] { 1234, "Stone", true, DateTime.Now },
new Object[] { 5678, "NewLife", false, DateTime.Today }
};

{
using var csv = new CsvFile(file, true);
csv.Separator = ',';
csv.Encoding = Encoding.UTF8;

csv.WriteLine(new[] { "Code", "Name", "Enable", "CreateTime" });
csv.WriteAll(list);
}

var lines = File.ReadAllLines(file.GetFullPath());
Assert.Equal(3, lines.Length);
Assert.Equal("Code,Name,Enable,CreateTime", lines[0]);
Assert.Equal($"1234,Stone,1,{((DateTime)list[0][3]).ToFullString()}", lines[1]);
Assert.Equal($"5678,NewLife,0,{((DateTime)list[1][3]).ToFullString()}", lines[2]);

{
using var csv = new CsvFile(file);
var headers = csv.ReadLine();
var all = csv.ReadAll();

Assert.Equal(4, headers.Length);
Assert.Equal("Code", headers[0]);
Assert.Equal("Name", headers[1]);

Assert.Equal(2, all.Length);
}
ms.Position = 0;
using var csv = new CsvFile(ms);
var headers = csv.ReadLine();
var all = csv.ReadAll().ToList();

Assert.Equal(4, headers.Length);
Assert.Equal("Code", headers[0]);
Assert.Equal("Name", headers[1]);

Assert.Equal(2, all.Count);
}
}
}

0 comments on commit 7b10b61

Please sign in to comment.