Skip to content

Commit d4371ab

Browse files
authored
ISSUE 227: add parser_settings.debug flag and enable csv2 reader to optionally inject debug (line) info into record IDR. (#228)
Decided to make the debug flag a global setting in `parser_settings` and leave its type to be an int (>= 0) for future flexibility. Could've done with a string/enum to make it more defined and strict, but given it is an adv setting and current usage is so scarce thus leaving it flexible until further requirements arise. For `csv2` reader, if `parser_settings.debug` is 0 or omitted, which is the vast vast majority of existing and future `csv2` schemas, no behavior changes; if `parser_settings.debug` isn't 0, then a `__debug` node will be added to the record IDR structure, underneath which, currently only `line_num` debug info will be added. This design is flexible for all future adoptions in all other file format readers, yet has zero impact on any existing schemas.
1 parent da04593 commit d4371ab

28 files changed

+240
-48
lines changed

doc/csv2_in_depth.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,27 @@ Then the output for the above sample input will be:
486486
]
487487
```
488488

489+
## Debug Info
490+
If debug information is desired to be added into each record's IDR therefore becoming available for
491+
`transform_declarations` use, one can specify `"debug": 1` in `parser_settings` section of the schema:
492+
```
493+
{
494+
"parser_settings": {
495+
"version": "omni.2.1",
496+
"file_format_type": "csv2",
497+
"debug": 1
498+
},
499+
...
500+
}
501+
```
502+
If the `csv2` parser detects `"debug": 1` setting, it will add the following debug info into the
503+
current record's IDR structure:
504+
505+
- xpath: `__debug/line_num`: contains the starting line number in the CSV file of the current
506+
record.
507+
508+
Check this [sample](../extensions/omniv21/samples/csv2/1_single_row.schema.json) for usage pattern.
509+
489510
## Migration from `'csv'` Schemas
490511

491512
If one looks at the documentation for the old `csv` schema [here](./csv_in_depth.md), you notice

extensions/omniv21/fileformat/csv/format.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/jf-tech/omniparser/extensions/omniv21/fileformat"
1414
"github.com/jf-tech/omniparser/extensions/omniv21/transform"
1515
v21validation "github.com/jf-tech/omniparser/extensions/omniv21/validation"
16+
"github.com/jf-tech/omniparser/header"
1617
"github.com/jf-tech/omniparser/validation"
1718
)
1819

@@ -97,7 +98,10 @@ func (f *csvFileFormat) validateColumns(columns []Column) error {
9798
}
9899

99100
func (f *csvFileFormat) CreateFormatReader(
100-
name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) {
101+
_ header.Header,
102+
name string,
103+
r io.Reader,
104+
runtime interface{}) (fileformat.FormatReader, error) {
101105
csv := runtime.(*csvFormatRuntime)
102106
return NewReader(name, r, csv.Decl, csv.XPath)
103107
}

extensions/omniv21/fileformat/csv/format_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"github.com/jf-tech/omniparser/errs"
1515
"github.com/jf-tech/omniparser/extensions/omniv21/transform"
16+
"github.com/jf-tech/omniparser/header"
1617
"github.com/jf-tech/omniparser/idr"
1718
)
1819

@@ -157,6 +158,7 @@ func TestValidateSchema(t *testing.T) {
157158

158159
func TestCreateFormatReader(t *testing.T) {
159160
r, err := NewCSVFileFormat("test").CreateFormatReader(
161+
header.Header{},
160162
"test-input",
161163
strings.NewReader(
162164
lf("A|B|C")+

extensions/omniv21/fileformat/edi/format.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/jf-tech/omniparser/extensions/omniv21/fileformat"
1414
"github.com/jf-tech/omniparser/extensions/omniv21/transform"
1515
v21validation "github.com/jf-tech/omniparser/extensions/omniv21/validation"
16+
"github.com/jf-tech/omniparser/header"
1617
"github.com/jf-tech/omniparser/validation"
1718
)
1819

@@ -74,7 +75,10 @@ func (f *ediFileFormat) validateFileDecl(decl *FileDecl) error {
7475
}
7576

7677
func (f *ediFileFormat) CreateFormatReader(
77-
name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) {
78+
_ header.Header,
79+
name string,
80+
r io.Reader,
81+
runtime interface{}) (fileformat.FormatReader, error) {
7882
edi := runtime.(*ediFormatRuntime)
7983
return NewReader(name, r, edi.Decl, edi.XPath)
8084
}

extensions/omniv21/fileformat/edi/format_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"github.com/stretchr/testify/assert"
1212

1313
"github.com/jf-tech/omniparser/extensions/omniv21/transform"
14+
"github.com/jf-tech/omniparser/header"
1415
"github.com/jf-tech/omniparser/idr"
1516
)
1617

@@ -172,7 +173,8 @@ func TestCreateFormatReader(t *testing.T) {
172173
}`
173174
rt, err := format.ValidateSchema(fileFormatEDI, []byte(fileDecl), &transform.Decl{XPath: strs.StrPtr(".")})
174175
assert.NoError(t, err)
175-
reader, err := format.CreateFormatReader("test", strings.NewReader("ISA*e1*e2*e3\nISA*e4*e5*e6\n"), rt)
176+
reader, err := format.CreateFormatReader(
177+
header.Header{}, "test", strings.NewReader("ISA*e1*e2*e3\nISA*e4*e5*e6\n"), rt)
176178
assert.NoError(t, err)
177179
n, err := reader.Read()
178180
assert.NoError(t, err)

extensions/omniv21/fileformat/fileformat.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55

66
"github.com/jf-tech/omniparser/errs"
77
"github.com/jf-tech/omniparser/extensions/omniv21/transform"
8+
"github.com/jf-tech/omniparser/header"
89
"github.com/jf-tech/omniparser/idr"
910
)
1011

@@ -18,7 +19,10 @@ type FileFormat interface {
1819

1920
// CreateFormatReader creates an FormatReader which reads records of input data for this file format.
2021
CreateFormatReader(
21-
inputName string, input io.Reader, formatRuntime interface{}) (FormatReader, error)
22+
schemaHeader header.Header,
23+
inputName string,
24+
input io.Reader,
25+
formatRuntime interface{}) (FormatReader, error)
2226
}
2327

2428
// FormatReader is an interface for reading a specific input format in omni schema handler. We'll have

extensions/omniv21/fileformat/fixedlength/format.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/jf-tech/omniparser/extensions/omniv21/fileformat"
1515
"github.com/jf-tech/omniparser/extensions/omniv21/transform"
1616
v21validation "github.com/jf-tech/omniparser/extensions/omniv21/validation"
17+
"github.com/jf-tech/omniparser/header"
1718
"github.com/jf-tech/omniparser/validation"
1819
)
1920

@@ -128,7 +129,10 @@ func (f *fixedLengthFileFormat) validateColumns(cols []*ColumnDecl) error {
128129
}
129130

130131
func (f *fixedLengthFileFormat) CreateFormatReader(
131-
name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) {
132+
_ header.Header,
133+
name string,
134+
r io.Reader,
135+
runtime interface{}) (fileformat.FormatReader, error) {
132136
rt := runtime.(*fixedLengthFormatRuntime)
133137
return NewReader(name, r, rt.Decl, rt.XPath)
134138
}

extensions/omniv21/fileformat/fixedlength/format_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"github.com/jf-tech/omniparser/errs"
1515
"github.com/jf-tech/omniparser/extensions/omniv21/transform"
16+
"github.com/jf-tech/omniparser/header"
1617
"github.com/jf-tech/omniparser/idr"
1718
)
1819

@@ -308,6 +309,7 @@ func TestValidateSchema(t *testing.T) {
308309

309310
func TestCreateFormatReader(t *testing.T) {
310311
r, err := NewFixedLengthFileFormat("test").CreateFormatReader(
312+
header.Header{},
311313
"test",
312314
strings.NewReader("abcd\n1234\n"),
313315
&fixedLengthFormatRuntime{

extensions/omniv21/fileformat/flatfile/csv/.snapshots/TestRead-multiple_records

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,40 @@
11
{
22
"Children": [
3+
{
4+
"Children": [
5+
{
6+
"Children": [
7+
{
8+
"Children": null,
9+
"Data": "1",
10+
"FirstChild": null,
11+
"FormatSpecific": null,
12+
"LastChild": null,
13+
"NextSibling": null,
14+
"Parent": "(ElementNode line_num)",
15+
"PrevSibling": null,
16+
"Type": "TextNode"
17+
}
18+
],
19+
"Data": "line_num",
20+
"FirstChild": "(TextNode '1')",
21+
"FormatSpecific": null,
22+
"LastChild": "(TextNode '1')",
23+
"NextSibling": null,
24+
"Parent": "(ElementNode __debug)",
25+
"PrevSibling": null,
26+
"Type": "ElementNode"
27+
}
28+
],
29+
"Data": "__debug",
30+
"FirstChild": "(ElementNode line_num)",
31+
"FormatSpecific": null,
32+
"LastChild": "(ElementNode line_num)",
33+
"NextSibling": "(ElementNode r1c1)",
34+
"Parent": "(ElementNode r1)",
35+
"PrevSibling": null,
36+
"Type": "ElementNode"
37+
},
338
{
439
"Children": [
540
{
@@ -20,7 +55,7 @@
2055
"LastChild": "(TextNode 'v1')",
2156
"NextSibling": "(ElementNode r1c2)",
2257
"Parent": "(ElementNode r1)",
23-
"PrevSibling": null,
58+
"PrevSibling": "(ElementNode __debug)",
2459
"Type": "ElementNode"
2560
},
2661
{
@@ -48,7 +83,7 @@
4883
}
4984
],
5085
"Data": "r1",
51-
"FirstChild": "(ElementNode r1c1)",
86+
"FirstChild": "(ElementNode __debug)",
5287
"FormatSpecific": null,
5388
"LastChild": "(ElementNode r1c2)",
5489
"NextSibling": null,
@@ -58,6 +93,41 @@
5893
},
5994
{
6095
"Children": [
96+
{
97+
"Children": [
98+
{
99+
"Children": [
100+
{
101+
"Children": null,
102+
"Data": "3",
103+
"FirstChild": null,
104+
"FormatSpecific": null,
105+
"LastChild": null,
106+
"NextSibling": null,
107+
"Parent": "(ElementNode line_num)",
108+
"PrevSibling": null,
109+
"Type": "TextNode"
110+
}
111+
],
112+
"Data": "line_num",
113+
"FirstChild": "(TextNode '3')",
114+
"FormatSpecific": null,
115+
"LastChild": "(TextNode '3')",
116+
"NextSibling": null,
117+
"Parent": "(ElementNode __debug)",
118+
"PrevSibling": null,
119+
"Type": "ElementNode"
120+
}
121+
],
122+
"Data": "__debug",
123+
"FirstChild": "(ElementNode line_num)",
124+
"FormatSpecific": null,
125+
"LastChild": "(ElementNode line_num)",
126+
"NextSibling": "(ElementNode r1c1)",
127+
"Parent": "(ElementNode r1)",
128+
"PrevSibling": null,
129+
"Type": "ElementNode"
130+
},
61131
{
62132
"Children": [
63133
{
@@ -78,12 +148,12 @@
78148
"LastChild": "(TextNode '')",
79149
"NextSibling": null,
80150
"Parent": "(ElementNode r1)",
81-
"PrevSibling": null,
151+
"PrevSibling": "(ElementNode __debug)",
82152
"Type": "ElementNode"
83153
}
84154
],
85155
"Data": "r1",
86-
"FirstChild": "(ElementNode r1c1)",
156+
"FirstChild": "(ElementNode __debug)",
87157
"FormatSpecific": null,
88158
"LastChild": "(ElementNode r1c1)",
89159
"NextSibling": null,

extensions/omniv21/fileformat/flatfile/csv/format.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/jf-tech/omniparser/extensions/omniv21/fileformat"
1515
"github.com/jf-tech/omniparser/extensions/omniv21/transform"
1616
v21validation "github.com/jf-tech/omniparser/extensions/omniv21/validation"
17+
"github.com/jf-tech/omniparser/header"
1718
"github.com/jf-tech/omniparser/validation"
1819
)
1920

@@ -76,7 +77,10 @@ func (f *csvFormat) validateFileDecl(decl *FileDecl) error {
7677
}
7778

7879
func (f *csvFormat) CreateFormatReader(
79-
name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) {
80+
schemaHeader header.Header,
81+
name string,
82+
r io.Reader,
83+
runtime interface{}) (fileformat.FormatReader, error) {
8084
rt := runtime.(*csvFormatRuntime)
8185
targetXPathExpr, err := func() (*xpath.Expr, error) {
8286
if rt.XPath == "" || rt.XPath == "." {
@@ -87,7 +91,7 @@ func (f *csvFormat) CreateFormatReader(
8791
if err != nil {
8892
return nil, f.FmtErr("xpath '%s' on 'FINAL_OUTPUT' is invalid: %s", rt.XPath, err.Error())
8993
}
90-
return NewReader(name, r, rt.Decl, targetXPathExpr), nil
94+
return NewReader(schemaHeader, name, r, rt.Decl, targetXPathExpr), nil
9195
}
9296

9397
func (f *csvFormat) FmtErr(format string, args ...interface{}) error {

extensions/omniv21/fileformat/flatfile/csv/format_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
"github.com/jf-tech/omniparser/errs"
1414
"github.com/jf-tech/omniparser/extensions/omniv21/transform"
15+
"github.com/jf-tech/omniparser/header"
1516
"github.com/jf-tech/omniparser/idr"
1617
)
1718

@@ -284,6 +285,7 @@ func TestCreateFormatReader(t *testing.T) {
284285
&transform.Decl{XPath: finalOutputXPath})
285286
assert.NoError(t, err)
286287
reader, err := format.CreateFormatReader(
288+
header.Header{},
287289
"test-input",
288290
strings.NewReader("abcd|efgh|jklm\n123|456|789\n"),
289291
runtime)
@@ -301,6 +303,7 @@ func TestCreateFormatReader(t *testing.T) {
301303

302304
// test CreateFormatReader called with invalid target xpath.
303305
reader, err := NewCSVFileFormat("test-schema").CreateFormatReader(
306+
header.Header{},
304307
"test-input",
305308
strings.NewReader("abcd\n1234\n"),
306309
&csvFormatRuntime{XPath: "["})

0 commit comments

Comments
 (0)