@@ -9,12 +9,18 @@ import (
9
9
"encoding/csv"
10
10
"html"
11
11
"io"
12
+ "regexp"
13
+ "strings"
12
14
13
15
"code.gitea.io/gitea/modules/markup"
16
+ "code.gitea.io/gitea/modules/util"
14
17
)
15
18
19
+ var quoteRegexp = regexp .MustCompile (`["'][\s\S]+?["']` )
20
+
16
21
func init () {
17
22
markup .RegisterParser (Parser {})
23
+
18
24
}
19
25
20
26
// Parser implements markup.Parser for orgmode
@@ -28,12 +34,13 @@ func (Parser) Name() string {
28
34
29
35
// Extensions implements markup.Parser
30
36
func (Parser ) Extensions () []string {
31
- return []string {".csv" }
37
+ return []string {".csv" , ".tsv" }
32
38
}
33
39
34
40
// Render implements markup.Parser
35
- func (Parser ) Render (rawBytes []byte , urlPrefix string , metas map [string ]string , isWiki bool ) []byte {
41
+ func (p Parser ) Render (rawBytes []byte , urlPrefix string , metas map [string ]string , isWiki bool ) []byte {
36
42
rd := csv .NewReader (bytes .NewReader (rawBytes ))
43
+ rd .Comma = p .bestDelimiter (rawBytes )
37
44
var tmpBlock bytes.Buffer
38
45
tmpBlock .WriteString (`<table class="table">` )
39
46
for {
@@ -50,9 +57,57 @@ func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string,
50
57
tmpBlock .WriteString (html .EscapeString (field ))
51
58
tmpBlock .WriteString ("</td>" )
52
59
}
53
- tmpBlock .WriteString ("<tr>" )
60
+ tmpBlock .WriteString ("</ tr>" )
54
61
}
55
62
tmpBlock .WriteString ("</table>" )
56
63
57
64
return tmpBlock .Bytes ()
58
65
}
66
+
67
+ // bestDelimiter scores the input CSV data against delimiters, and returns the best match.
68
+ // Reads at most 10k bytes & 10 lines.
69
+ func (p Parser ) bestDelimiter (data []byte ) rune {
70
+ maxLines := 10
71
+ maxBytes := util .Min (len (data ), 1e4 )
72
+ text := string (data [:maxBytes ])
73
+ text = quoteRegexp .ReplaceAllLiteralString (text , "" )
74
+ lines := strings .SplitN (text , "\n " , maxLines + 1 )
75
+ lines = lines [:util .Min (maxLines , len (lines ))]
76
+
77
+ delimiters := []rune {',' , ';' , '\t' , '|' }
78
+ bestDelim := delimiters [0 ]
79
+ bestScore := 0.0
80
+ for _ , delim := range delimiters {
81
+ score := p .scoreDelimiter (lines , delim )
82
+ if score > bestScore {
83
+ bestScore = score
84
+ bestDelim = delim
85
+ }
86
+ }
87
+
88
+ return bestDelim
89
+ }
90
+
91
+ // scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV
92
+ func (Parser ) scoreDelimiter (lines []string , delim rune ) (score float64 ) {
93
+ countTotal := 0
94
+ countLineMax := 0
95
+ linesNotEqual := 0
96
+
97
+ for _ , line := range lines {
98
+ if len (line ) == 0 {
99
+ continue
100
+ }
101
+
102
+ countLine := strings .Count (line , string (delim ))
103
+ countTotal += countLine
104
+ if countLine != countLineMax {
105
+ if countLineMax != 0 {
106
+ linesNotEqual ++
107
+ }
108
+ countLineMax = util .Max (countLine , countLineMax )
109
+ }
110
+ }
111
+
112
+ return float64 (countTotal ) * (1 - float64 (linesNotEqual )/ float64 (len (lines )))
113
+ }
0 commit comments