Skip to content

Commit d30ed5e

Browse files
committed
CSV/XLS import: handle missing or duplicate column names
1 parent 408f5c0 commit d30ed5e

14 files changed

+123
-12
lines changed

src/SqlNotebook/Import/Xls/ImportXlsForm.cs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -264,20 +264,30 @@ private void LoadColumns() {
264264
maxColumnIndex = Math.Min(maxColumnIndex.Value, sheetInfo.DataTable.Columns.Count - 1);
265265

266266
List<string> columnNames = new();
267+
var columnNumber = 0;
267268
for (var columnIndex = minColumnIndex.Value; columnIndex <= maxColumnIndex.Value; columnIndex++) {
268-
var columnName = $"column{columnIndex - minColumnIndex.Value + 1}";
269+
columnNumber++;
270+
var name = $"column{columnIndex - minColumnIndex.Value + 1}";
269271
if (_columnNamesCheck.Checked) {
270272
try {
271-
columnName = sheetInfo.DataTable.Rows[minRowIndex.Value][columnIndex].ToString();
273+
name = sheetInfo.DataTable.Rows[minRowIndex.Value][columnIndex].ToString();
272274
} catch {
273275
// Ignore for now. We will show this error when they hit OK.
274276
}
275277
}
276-
if (string.IsNullOrWhiteSpace(columnName)) {
277-
continue;
278+
if (string.IsNullOrWhiteSpace(name)) {
279+
name = $"column{columnNumber}";
278280
}
279281

280-
columnNames.Add(columnName);
282+
// add a numeric suffix to each column name if necessary to make them all unique
283+
var testName = name;
284+
var testNum = 1;
285+
while (columnNames.Contains(testName)) {
286+
testNum++;
287+
testName = $"{name}_{testNum}";
288+
}
289+
290+
columnNames.Add(testName);
281291
}
282292

283293
IReadOnlyList<string> detectedTypes = null;

src/SqlNotebookScript/Interpreter/ImportCsvStmtRunner.cs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,22 @@ private List<string> ReadColumnNames(TextFieldParserBuffer parser) {
139139
}
140140
}
141141

142-
// add a numeric suffix to each column name if necessary to make them all unique
143-
foreach (var cell in cells) {
142+
var columnNumber = 0;
143+
foreach (var c in cells) {
144+
columnNumber++;
145+
var cell = c;
146+
147+
// fill in blank column names
148+
if (string.IsNullOrWhiteSpace(cell)) {
149+
cell = $"column{columnNumber}";
150+
}
151+
152+
// add a numeric suffix to each column name if necessary to make them all unique
144153
var testName = cell;
145154
var testNum = 1;
146155
while (srcColNames.Contains(testName)) {
147156
testNum++;
148-
testName = $"{cell}{testNum}";
157+
testName = $"{cell}_{testNum}";
149158
}
150159
srcColNames.Add(testName);
151160
}

src/SqlNotebookScript/Utils/XlsUtil.cs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,27 @@ public static List<string> ReadColumnNames(IReadOnlyList<object[]> rows, bool he
204204
if (rows.Count == 0) {
205205
columnNames.Add("column1");
206206
} else if (headerRow) {
207+
var columnNumber = 0;
207208
foreach (var originalName in rows[0]) {
209+
columnNumber++;
208210
var isNull = originalName is DBNull || originalName == null;
209-
if (isNull) {
210-
columnNames.Add("");
211-
} else {
212-
columnNames.Add(originalName.ToString());
211+
var name = "";
212+
if (!isNull) {
213+
name = originalName.ToString();
213214
}
215+
if (string.IsNullOrWhiteSpace(name)) {
216+
name = $"column{columnNumber}";
217+
}
218+
219+
// add a numeric suffix to each column name if necessary to make them all unique
220+
var testName = name;
221+
var testNum = 1;
222+
while (columnNames.Contains(testName)) {
223+
testNum++;
224+
testName = $"{name}_{testNum}";
225+
}
226+
227+
columnNames.Add(testName);
214228
}
215229
} else {
216230
for (int i = 0; i < rows[0].Length; i++) {

src/Tests/ScriptTest.g.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,15 @@ public sealed partial class ScriptTest {
4343
[TestMethod] public void Test_IMPORT_CSV_BLANK_VALUES_2_sql() => TestScript(@"IMPORT CSV BLANK_VALUES 2.sql");
4444
[TestMethod] public void Test_IMPORT_CSV_BLANK_VALUES_3_sql() => TestScript(@"IMPORT CSV BLANK_VALUES 3.sql");
4545
[TestMethod] public void Test_IMPORT_CSV_BLANK_VALUES_DEFAULT_sql() => TestScript(@"IMPORT CSV BLANK_VALUES DEFAULT.sql");
46+
[TestMethod] public void Test_IMPORT_CSV_duplicate_header_1_sql() => TestScript(@"IMPORT CSV duplicate header 1.sql");
47+
[TestMethod] public void Test_IMPORT_CSV_duplicate_header_2_sql() => TestScript(@"IMPORT CSV duplicate header 2.sql");
4648
[TestMethod] public void Test_IMPORT_CSV_FILE_ENCODING_sql() => TestScript(@"IMPORT CSV FILE_ENCODING.sql");
4749
[TestMethod] public void Test_IMPORT_CSV_HEADER_ROW_sql() => TestScript(@"IMPORT CSV HEADER_ROW.sql");
4850
[TestMethod] public void Test_IMPORT_CSV_IF_CONVERSION_FAILS_sql() => TestScript(@"IMPORT CSV IF_CONVERSION_FAILS.sql");
4951
[TestMethod] public void Test_IMPORT_CSV_missing_header_1_sql() => TestScript(@"IMPORT CSV missing header 1.sql");
5052
[TestMethod] public void Test_IMPORT_CSV_missing_header_2_sql() => TestScript(@"IMPORT CSV missing header 2.sql");
53+
[TestMethod] public void Test_IMPORT_CSV_missing_header_3_sql() => TestScript(@"IMPORT CSV missing header 3.sql");
54+
[TestMethod] public void Test_IMPORT_CSV_missing_header_4_sql() => TestScript(@"IMPORT CSV missing header 4.sql");
5155
[TestMethod] public void Test_IMPORT_CSV_SKIP_LINES_sql() => TestScript(@"IMPORT CSV SKIP_LINES.sql");
5256
[TestMethod] public void Test_IMPORT_CSV_TAKE_LINES_sql() => TestScript(@"IMPORT CSV TAKE_LINES.sql");
5357
[TestMethod] public void Test_IMPORT_CSV_TEMPORARY_TABLE_sql() => TestScript(@"IMPORT CSV TEMPORARY_TABLE.sql");
@@ -62,6 +66,8 @@ public sealed partial class ScriptTest {
6266
[TestMethod] public void Test_IMPORT_XLS_BLANK_VALUES_2_sql() => TestScript(@"IMPORT XLS BLANK_VALUES 2.sql");
6367
[TestMethod] public void Test_IMPORT_XLS_BLANK_VALUES_3_sql() => TestScript(@"IMPORT XLS BLANK_VALUES 3.sql");
6468
[TestMethod] public void Test_IMPORT_XLS_BLANK_VALUES_DEFAULT_sql() => TestScript(@"IMPORT XLS BLANK_VALUES DEFAULT.sql");
69+
[TestMethod] public void Test_IMPORT_XLS_duplicate_header_1_sql() => TestScript(@"IMPORT XLS duplicate header 1.sql");
70+
[TestMethod] public void Test_IMPORT_XLS_duplicate_header_2_sql() => TestScript(@"IMPORT XLS duplicate header 2.sql");
6571
[TestMethod] public void Test_IMPORT_XLS_FIRST_COLUMN_first_and_last__letter_sql() => TestScript(@"IMPORT XLS FIRST_COLUMN first and last, letter.sql");
6672
[TestMethod] public void Test_IMPORT_XLS_FIRST_COLUMN_first_and_last__number_sql() => TestScript(@"IMPORT XLS FIRST_COLUMN first and last, number.sql");
6773
[TestMethod] public void Test_IMPORT_XLS_FIRST_COLUMN_first_beyond_end_of_data_sql() => TestScript(@"IMPORT XLS FIRST_COLUMN first beyond end of data.sql");
@@ -77,6 +83,8 @@ public sealed partial class ScriptTest {
7783
[TestMethod] public void Test_IMPORT_XLS_LAST_ROW_sql() => TestScript(@"IMPORT XLS LAST_ROW.sql");
7884
[TestMethod] public void Test_IMPORT_XLS_missing_header_1_sql() => TestScript(@"IMPORT XLS missing header 1.sql");
7985
[TestMethod] public void Test_IMPORT_XLS_missing_header_2_sql() => TestScript(@"IMPORT XLS missing header 2.sql");
86+
[TestMethod] public void Test_IMPORT_XLS_missing_header_3_sql() => TestScript(@"IMPORT XLS missing header 3.sql");
87+
[TestMethod] public void Test_IMPORT_XLS_missing_header_4_sql() => TestScript(@"IMPORT XLS missing header 4.sql");
8088
[TestMethod] public void Test_IMPORT_XLS_STOP_AT_FIRST_BLANK_ROW_0_sql() => TestScript(@"IMPORT XLS STOP_AT_FIRST_BLANK_ROW 0.sql");
8189
[TestMethod] public void Test_IMPORT_XLS_STOP_AT_FIRST_BLANK_ROW_1_sql() => TestScript(@"IMPORT XLS STOP_AT_FIRST_BLANK_ROW 1.sql");
8290
[TestMethod] public void Test_IMPORT_XLS_STOP_AT_FIRST_BLANK_ROW_default_sql() => TestScript(@"IMPORT XLS STOP_AT_FIRST_BLANK_ROW default.sql");
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
a,,b,b,b
2+
1,2,3,4,5
8.56 KB
Binary file not shown.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
IMPORT CSV '<FILES>\duplicate-header.csv' INTO foo;
2+
3+
SELECT * FROM foo;
4+
5+
--output--
6+
a,column2,b,b_2,b_3
7+
1,2,3,4,5
8+
-
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
IMPORT CSV '<FILES>\duplicate-header.csv' INTO foo (a, column2, b, b_2, b_3);
2+
3+
SELECT * FROM foo;
4+
5+
--output--
6+
a,column2,b,b_2,b_3
7+
1,2,3,4,5
8+
-
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
IMPORT CSV '<FILES>\missing-header.csv' INTO foo;
2+
3+
SELECT * FROM foo;
4+
5+
--output--
6+
A,column2,B,column4,C
7+
111,222,333,444,555
8+
666,777,888,999,0
9+
-
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
IMPORT CSV '<FILES>\missing-header.csv' INTO foo (A, column2, B, column4, C);
2+
3+
SELECT * FROM foo;
4+
5+
--output--
6+
A,column2,B,column4,C
7+
111,222,333,444,555
8+
666,777,888,999,0
9+
-

0 commit comments

Comments
 (0)