diff --git a/src/components/Matcher.tsx b/src/components/Matcher.tsx index 91049c7..ea3f9c1 100644 --- a/src/components/Matcher.tsx +++ b/src/components/Matcher.tsx @@ -1,6 +1,7 @@ import { useEffect } from "react"; import { AppReducer, Match, MatchRow, ProcessingState } from "../state"; import { newEditDistance } from "../utils/match"; +import { filterByValue, joinNorm, uniq } from "../utils/uniq"; export function Progress({ progress }: { progress: number }) { const numBars = 40; @@ -47,50 +48,99 @@ export function Matcher({ } }) as [(string | undefined)[], (string | undefined)[]]; - const results: MatchRow[] = []; - for (let i = 0; i < leftValues.length; i++) { - timeouts.push( - setTimeout(() => { - // Rank everything - const matches: Match[] = []; - const row = results.length; - for (let j = 0; j < rightValues.length; j++) { - const score = newEditDistance(leftValues[i], rightValues[j]); - matches.push({ - score, - value: rightValues[j], - meta: rightMetas[j], - index: j, - col: 0, + const [leftJoins, rightJoins] = (["left", "right"] as const).map((side) => { + const join = app.columnSelections.join; + return join + ? app.tables[join[side].tableIndex].rows.map((row) => + joinNorm(row[join[side].column]) + ) + : null; + }); + const hasJoin = leftJoins != null && rightJoins != null; + const uniqueJoinValues = uniq([ + ...(leftJoins || []), + ...(rightJoins || []), + ]); + + const joins: [string, string[], string[]][] = hasJoin + ? uniqueJoinValues + .map<[string, string[], string[]]>((joiner) => [ + joiner, + filterByValue(leftJoins, joiner, leftValues), + filterByValue(rightJoins, joiner, rightValues), + ]) + .filter( + (x: [string, string[], string[]]) => + x[1].length > 0 && x[2].length > 0 + ) + : // Default join of just left and right values + [["default", leftValues, rightValues]]; + console.log(joins); + + const overallResults: [string, MatchRow[]][] = []; + + for (let joinIndex = 0; joinIndex < joins.length; joinIndex++) { + console.log("JOIN INDEXX", joinIndex); + const [joiner, leftValues, rightValues] = joins[joinIndex]; + const results: MatchRow[] = []; + for (let i = 0; i < leftValues.length; i++) { + timeouts.push( + setTimeout(() => { + // Rank everything + const matches: Match[] = []; + const row = results.length; + for (let j = 0; j < rightValues.length; j++) { + const score = newEditDistance(leftValues[i], rightValues[j]); + matches.push({ + score, + value: rightValues[j], + meta: rightMetas[j], + index: j, + col: 0, + row, + }); + } + matches.sort((a, b) => a.score - b.score); + for (let z = 0; z < matches.length; z++) { + // Assign match cols + matches[z].col = z; + } + results.push({ + value: leftValues[i], + meta: leftMetas[i], + index: i, + rankedMatches: matches, row, }); - } - matches.sort((a, b) => a.score - b.score); - for (let z = 0; z < matches.length; z++) { - // Assign match cols - matches[z].col = z; - } - results.push({ - value: leftValues[i], - meta: leftMetas[i], - index: i, - rankedMatches: matches, - row, - }); - // Update progress - if (i === leftValues.length - 1) { - reducer({ - type: "FinishProcessing", - results: results, - }); - } else { - reducer({ - type: "UpdateProgress", - progress: (i + 1) / leftValues.length, + + // Finalize results + if (i === leftValues.length - 1) { + overallResults.push([joiner, results]); + } + + // Update progress + console.log({ + i, + joinIndex, + iL: leftValues.length, + jL: joins.length, }); - } - }, 16) - ); + if (i === leftValues.length - 1 && joinIndex === joins.length - 1) { + console.log("DONE", overallResults); + reducer({ + type: "FinishProcessing", + results: overallResults, + }); + } else { + reducer({ + type: "UpdateProgress", + progress: + (joinIndex + (i + 1) / leftValues.length) / joins.length, + }); + } + }, 16) + ); + } } return () => { diff --git a/src/components/MatchingTable.tsx b/src/components/MatchingTable.tsx index d056ba9..3b5c06e 100644 --- a/src/components/MatchingTable.tsx +++ b/src/components/MatchingTable.tsx @@ -34,9 +34,12 @@ export function MatchingTable({ ); const [showMeta, setShowMeta] = useState<"show" | "hide">("show"); - if (app.matches.length === 0) return null; + const allJoins = Object.keys(app.matches); + const [join, setJoin] = useState(allJoins[0] || ""); - const numColumns = app.matches[0].rankedMatches.length + 1; + if (allJoins.length === 0) return null; + + const numColumns = app.matches[join][0].rankedMatches.length + 1; const columns: GridColumn[] = [ { title: `SOURCE — ${app.columnSelections.left.column} (${app.columnSelections.left.tableName})`, @@ -71,18 +74,18 @@ export function MatchingTable({ // } function getUserMatched(matchCell: Match): boolean { - return app.userMatches[`${matchCell.col},${matchCell.row}`]; + return app.userMatches[join][`${matchCell.col},${matchCell.row}`]; } function getAllUserMatches(): Match[] { - const entries = Object.entries(app.userMatches); + const entries = Object.entries(app.userMatches[join]); const results: Match[] = []; for (const [key, value] of entries) { if (!value) continue; const parts = key.split(","); const x = parseInt(parts[0]); const y = parseInt(parts[1]); - results.push(app.matches[y].rankedMatches[x]); + results.push(app.matches[join][y].rankedMatches[x]); } return results; } @@ -94,17 +97,24 @@ export function MatchingTable({ return userMatchTexts.includes(match.value); } - const matchEntries = Object.entries(app.userMatches); - const rowsWithMatches: { [row: number]: boolean } = {}; - for (const [key, value] of matchEntries) { - if (value) { - const row = parseInt(key.split(",")[1]); - rowsWithMatches[row] = true; + const matchEntries: [string, [string, boolean][]][] = Object.entries( + app.userMatches + ).map(([k, v]) => [k, Object.entries(v)]); + const rowsWithMatches: { [join: string]: { [row: number]: boolean } } = {}; + for (const [join, subEntries] of matchEntries) { + for (const [key, value] of subEntries) { + if (value) { + const row = parseInt(key.split(",")[1]); + if (rowsWithMatches[join] == null) { + rowsWithMatches[join] = {}; + } + rowsWithMatches[join][row] = true; + } } } - function rowHasMatch(row: number): boolean { - return rowsWithMatches[row]; + function rowHasMatch(row: number, subJoin = join): boolean { + return rowsWithMatches[subJoin]?.[row]; } function drawTextWithMatches( @@ -170,7 +180,7 @@ export function MatchingTable({ } const filteredMatchRows = new FilteredMatchRows( - app.matches, + app.matches[join], (cell) => colFilter === "all" ? true @@ -244,12 +254,13 @@ export function MatchingTable({ gridSelection.current.range, ...gridSelection.current.rangeStack, ], + join, }); if ( gridSelection.current.range.width === 1 && gridSelection.current.range.height === 1 && gridSelection.current.rangeStack.length === 0 && - gridSelection.current.range.y < app.matches.length - 1 + gridSelection.current.range.y < filteredMatchRows.numRows - 1 ) { // Advance to the next row setGridSelection({ @@ -290,6 +301,7 @@ export function MatchingTable({ ...gridSelection.current.rangeStack, ], data: filteredMatchRows, + join, forceState: false, }); } @@ -361,7 +373,7 @@ export function MatchingTable({ text, col === 1 || isUserMatch ? "" - : app.matches[matchCell.row].value, + : app.matches[join][matchCell.row].value, rect.x + 5, rect.y + rect.height / 2 + 1, sizeOffset @@ -371,7 +383,9 @@ export function MatchingTable({ ctx, "fill", text, - col === 1 || isUserMatch ? "" : app.matches[matchCell.row].value, + col === 1 || isUserMatch + ? "" + : app.matches[join][matchCell.row].value, rect.x + 5, rect.y + rect.height / 2 + 1, sizeOffset @@ -390,7 +404,18 @@ export function MatchingTable({ }} getCellContent={(cell) => { const [column, row] = cell; - const filteredMatches = filteredMatchRows.getRow(row); + let filteredMatches; + try { + filteredMatches = filteredMatchRows.getRow(row); + } catch (e) { + // This happens when the render is called while data changes + return { + kind: GridCellKind.Text, + data: "", + allowOverlay: false, + displayData: "", + }; + } if (column === 0) { return { @@ -453,6 +478,18 @@ export function MatchingTable({ + +