Skip to content

Commit 408f5c0

Browse files
committed
"Detect types" button in CSV/XLS import will automatically set column type conversions
1 parent 30b1273 commit 408f5c0

File tree

4 files changed

+138
-15
lines changed

4 files changed

+138
-15
lines changed

src/SqlNotebook/Import/Csv/ImportCsvForm.cs

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public ImportCsvForm(string filePath, DatabaseSchema schema, NotebookManager man
4040
_optionsControl = new ImportCsvOptionsControl(schema) { AutoSize = true, AutoSizeMode = AutoSizeMode.GrowAndShrink };
4141
_optionsPanel.Controls.Add(_optionsControl);
4242

43-
_columnsControl = new ImportColumnsControl { Dock = DockStyle.Fill };
43+
_columnsControl = new ImportColumnsControl(allowDetectTypes: true) { Dock = DockStyle.Fill };
4444
_columnsLoadControl = new LoadingContainerControl { ContainedControl = _columnsControl, Dock = DockStyle.Fill };
4545
_columnsPanel.Controls.Add(_columnsLoadControl);
4646

@@ -152,9 +152,9 @@ private async Task UpdateColumns() {
152152
_columnsLoadControl.PushLoad();
153153

154154
try {
155-
var sourceColumns = await GetSourceColumns();
155+
var (sourceColumns, detectedTypes) = await GetSourceColumns();
156156
if (_columnsLoadId == loadId) {
157-
_columnsControl.SetSourceColumns(sourceColumns);
157+
_columnsControl.SetSourceColumns(sourceColumns, detectedTypes);
158158
UpdateTargetColumns();
159159
_columnsLoadControl.ClearError();
160160
_columnsError.Value = null;
@@ -174,7 +174,7 @@ private async Task UpdateColumns() {
174174
}
175175
}
176176

177-
private async Task<IReadOnlyList<string>> GetSourceColumns() {
177+
private async Task<(IReadOnlyList<string> Names, IReadOnlyList<string> DetectedTypes)> GetSourceColumns() {
178178
var tempTableName = Guid.NewGuid().ToString();
179179
try {
180180
var headerRow = _optionsControl.HasColumnHeaders.Value;
@@ -183,10 +183,11 @@ private async Task<IReadOnlyList<string>> GetSourceColumns() {
183183
var separator = _optionsControl.Separator.Value;
184184
return await Task.Run(() => {
185185
var importSql =
186-
@"IMPORT CSV @filePath INTO @tableName
187-
OPTIONS (SKIP_LINES: @skipLines, TAKE_LINES: 0, HEADER_ROW: @headerRow, TEMPORARY_TABLE: 1,
188-
FILE_ENCODING: @encoding, SEPARATOR: @sep);";
189-
_manager.ExecuteScriptNoOutput(importSql, new Dictionary<string, object> {
186+
@$"IMPORT CSV @filePath INTO @tableName
187+
OPTIONS (SKIP_LINES: @skipLines, TAKE_LINES: 1000, HEADER_ROW: @headerRow, TEMPORARY_TABLE: 1,
188+
FILE_ENCODING: @encoding, SEPARATOR: @sep);
189+
SELECT * FROM {SqlUtil.DoubleQuote(tempTableName)};";
190+
var output = _manager.ExecuteScript(importSql, new Dictionary<string, object> {
190191
["@filePath"] = _filePath,
191192
["@tableName"] = tempTableName,
192193
["@sep"] = separator,
@@ -195,10 +196,18 @@ private async Task<IReadOnlyList<string>> GetSourceColumns() {
195196
["@skipLines"] = skipLines
196197
});
197198

199+
IReadOnlyList<string> detectedTypes = null;
200+
try {
201+
detectedTypes = TypeDetection.DetectTypes(output.DataTables[0]);
202+
} catch {
203+
// Don't let this blow up the import.
204+
detectedTypes = Array.Empty<string>();
205+
}
206+
198207
using var dt = _manager.ExecuteScript($"PRAGMA TABLE_INFO ({tempTableName.DoubleQuote()})")
199208
.DataTables[0];
200209
var nameCol = dt.GetIndex("name");
201-
return dt.Rows.Select(x => x[nameCol].ToString()).Where(x => !string.IsNullOrEmpty(x)).ToList();
210+
return (dt.Rows.Select(x => x[nameCol].ToString()).Where(x => !string.IsNullOrEmpty(x)).ToList(), detectedTypes);
202211
});
203212
} finally {
204213
await Task.Run(() => {

src/SqlNotebook/Import/ImportColumnsControl.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ private static class GridColumn {
2222

2323
public NotifySlot Change = new NotifySlot();
2424
public Slot<bool> Error = new Slot<bool>();
25+
private IReadOnlyList<string> _detectedTypes = Array.Empty<string>();
2526

2627
public string SqlColumnList =>
2728
string.Join(",\r\n",
@@ -58,7 +59,8 @@ public ImportColumnsControl(bool allowDetectTypes = false) {
5859
(sender, e) => ValidateGridInput());
5960
}
6061

61-
public void SetSourceColumns(IReadOnlyList<string> columnNames) {
62+
public void SetSourceColumns(IReadOnlyList<string> columnNames, IReadOnlyList<string> detectedTypes = null) {
63+
_detectedTypes = detectedTypes;
6264
_table.BeginLoadData();
6365
_table.Clear();
6466
foreach (var columnName in columnNames) {
@@ -202,13 +204,21 @@ private void SetTypeMenu_Click(object sender, EventArgs e) {
202204
_grid.SelectedCells
203205
.Cast<DataGridViewCell>()
204206
.Select(x => ((DataRowView)x.OwningRow.DataBoundItem).Row)
207+
.Distinct()
205208
) {
206209
dataRow[GridColumn.Conversion] = type;
207210
}
208211
}
209212

210213
private void DetectTypesButton_Click(object sender, EventArgs e) {
211-
214+
var rowIndex = 0;
215+
foreach (DataGridViewRow gridRow in _grid.Rows) {
216+
if (rowIndex >= 0 && rowIndex < _detectedTypes.Count) {
217+
var dataRow = ((DataRowView)gridRow.DataBoundItem).Row;
218+
dataRow[GridColumn.Conversion] = _detectedTypes[rowIndex];
219+
}
220+
rowIndex++;
221+
}
212222
}
213223
}
214224

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Globalization;
4+
using SqlNotebookScript.DataTables;
5+
6+
namespace SqlNotebook.Import;
7+
8+
public static class TypeDetection {
9+
[Flags]
10+
private enum TypeFlag {
11+
Text = 1,
12+
Integer = 2,
13+
Real = 4,
14+
Date = 8,
15+
DateTime = 16
16+
}
17+
18+
public static IReadOnlyList<string> DetectTypes(SimpleDataTable table) {
19+
var types = new TypeFlag[table.Columns.Count];
20+
for (var i = 0; i < table.Columns.Count; i++) {
21+
types[i] = TypeFlag.Text | TypeFlag.Integer | TypeFlag.Real | TypeFlag.Date | TypeFlag.DateTime;
22+
}
23+
24+
foreach (var row in table.Rows) {
25+
for (var i = 0; i < table.Columns.Count; i++) {
26+
var value = Convert.ToString(row[i]);
27+
if (string.IsNullOrWhiteSpace(value)) {
28+
// ok for all types as a null
29+
continue;
30+
}
31+
var type = TypeFlag.Text;
32+
if (int.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out _)) {
33+
type |= TypeFlag.Integer;
34+
}
35+
if (double.TryParse(value, NumberStyles.Float, CultureInfo.InvariantCulture, out _)) {
36+
type |= TypeFlag.Real;
37+
}
38+
if (DateTime.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.None, out var dateTime)) {
39+
type |= TypeFlag.DateTime;
40+
if (dateTime == dateTime.Date) {
41+
type |= TypeFlag.Date;
42+
}
43+
}
44+
types[i] &= type;
45+
}
46+
}
47+
48+
var chosenTypes = new string[table.Columns.Count];
49+
if (table.Rows.Count == 0) {
50+
// special case: if there are no rows, use TEXT for all.
51+
for (var i = 0; i < table.Columns.Count; i++) {
52+
chosenTypes[i] = "TEXT";
53+
}
54+
} else {
55+
for (var i = 0; i < table.Columns.Count; i++) {
56+
var mask = types[i];
57+
if (mask.HasFlag(TypeFlag.Integer)) {
58+
chosenTypes[i] = "INTEGER";
59+
} else if (mask.HasFlag(TypeFlag.Real)) {
60+
chosenTypes[i] = "REAL";
61+
} else if (mask.HasFlag(TypeFlag.Date)) {
62+
chosenTypes[i] = "DATE";
63+
} else if (mask.HasFlag(TypeFlag.DateTime)) {
64+
chosenTypes[i] = "DATETIME";
65+
} else {
66+
chosenTypes[i] = "TEXT";
67+
}
68+
}
69+
}
70+
71+
return chosenTypes;
72+
}
73+
}

src/SqlNotebook/Import/Xls/ImportXlsForm.cs

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Text.RegularExpressions;
88
using System.Windows.Forms;
99
using SqlNotebook.Properties;
10+
using SqlNotebookScript.DataTables;
1011
using SqlNotebookScript.Utils;
1112

1213
namespace SqlNotebook.Import.Xls;
@@ -44,7 +45,7 @@ public ImportXlsForm(XlsInput input, NotebookManager manager, DatabaseSchema sch
4445

4546
_originalFilePanel.Controls.Add(_grid = DataGridViewUtil.NewDataGridView(
4647
rowHeadersVisible: true, autoGenerateColumns: false, allowSort: false));
47-
_columnsPanel.Controls.Add(_columnsControl = new() {
48+
_columnsPanel.Controls.Add(_columnsControl = new(allowDetectTypes: true) {
4849
Dock = DockStyle.Fill
4950
});
5051
Ui ui = new(this, 170, 50);
@@ -246,17 +247,19 @@ private void LoadColumns() {
246247
var sheetIndex = (int)_sheetCombo.SelectedValue;
247248
var sheetInfo = _input.Worksheets[sheetIndex];
248249

249-
int? minRowIndex = null, minColumnIndex = null, maxColumnIndex = null;
250+
int? minRowIndex = null, maxRowIndex = null, minColumnIndex = null, maxColumnIndex = null;
250251

251252
try {
252-
(minRowIndex, _) = GetValidatedMinMaxRowIndices(sheetInfo);
253+
(minRowIndex, maxRowIndex) = GetValidatedMinMaxRowIndices(sheetInfo);
253254
(minColumnIndex, maxColumnIndex) = GetValidatedMinMaxColumnIndices(sheetInfo);
254255
} catch {
255256
// Ignore for now. We will show this error when they hit OK.
256257
}
257258

258259
minRowIndex ??= 0;
259260
minColumnIndex ??= 0;
261+
maxRowIndex ??= int.MaxValue;
262+
maxRowIndex = Math.Min(maxRowIndex.Value, sheetInfo.DataTable.Rows.Count - 1);
260263
maxColumnIndex ??= int.MaxValue;
261264
maxColumnIndex = Math.Min(maxColumnIndex.Value, sheetInfo.DataTable.Columns.Count - 1);
262265

@@ -277,7 +280,35 @@ private void LoadColumns() {
277280
columnNames.Add(columnName);
278281
}
279282

280-
_columnsControl.SetSourceColumns(columnNames);
283+
IReadOnlyList<string> detectedTypes = null;
284+
try {
285+
using SimpleDataTableBuilder detectionTableBuilder = new(columnNames);
286+
var firstDataRow = minRowIndex.Value + (_columnNamesCheck.Checked ? 1 : 0);
287+
var numSampleRows = Math.Min(1000, maxRowIndex.Value - firstDataRow + 1);
288+
for (var rowIndex = firstDataRow; rowIndex < firstDataRow + numSampleRows; rowIndex++) {
289+
var row = new object[columnNames.Count];
290+
for (var columnIndex = minColumnIndex.Value; columnIndex <= maxColumnIndex.Value; columnIndex++) {
291+
var value = "";
292+
try {
293+
value = sheetInfo.DataTable.Rows[rowIndex][columnIndex].ToString();
294+
} catch {
295+
// ignore
296+
}
297+
if (columnIndex - minColumnIndex.Value < row.Length) {
298+
row[columnIndex - minColumnIndex.Value] = value;
299+
}
300+
}
301+
detectionTableBuilder.AddRow(row);
302+
}
303+
304+
using var detectionTable = detectionTableBuilder.Build();
305+
detectedTypes = TypeDetection.DetectTypes(detectionTable);
306+
} catch {
307+
// Don't let this blow up the import.
308+
detectedTypes = Array.Empty<string>();
309+
}
310+
311+
_columnsControl.SetSourceColumns(columnNames, detectedTypes);
281312
_columnsControl.SetTargetToNewTable();
282313
}
283314

0 commit comments

Comments
 (0)