sqlparser · shenhuan2021 · Feb 23, 2026 · Jun 2, 2024 · Jun 2, 2024 · Jun 2, 2024
diff --git a/.cursor/rules/git-commit-styleguide.mdc b/.cursor/rules/git-commit-styleguide.mdc
@@ -0,0 +1,14 @@
+---
+alwaysApply: true
+---
+# Git Commit Message Style Guide
+
+When writing commit messages, follow these seven rules:
+
+1.  Separate subject from body with a blank line
+2.  Limit the subject line to 50 characters
+3.  Capitalize the subject line
+4.  Do not end the subject line with a period
+5.  Use the imperative mood in the subject line
+6.  Wrap the body at 72 characters
+7.  Use the body to explain what and why vs. how
diff --git a/.cursor/rules/refine-data-lineage-docs.mdc b/.cursor/rules/refine-data-lineage-docs.mdc
@@ -0,0 +1,44 @@
+---
+alwaysApply: true
+---
+# Data Lineage Documentation Refinement Guide
+
+When refining or refactoring data lineage documentation, remember that the primary audience is beginners in data governance. The language should be simple, clear, and use analogies to explain complex concepts. The document should be structured into two main parts, following the example set in [1-introduction.md](mdc:sqlflow_public/doc/basic-concepts/1-introduction.md).
+
+## Part 1: Current Data Lineage Model (v1)
+
+This section should explain the foundational concepts of the original SQLFlow data lineage model. Focus on simplicity and core ideas.
+
+-   **Core Concepts**: Explain data objects (`dbobjs`) and relationships (`relations`).
+-   **Relationship Types**: Clearly define `fdd` (direct flow) and `fdr` (indirect/impact flow) with simple SQL examples.
+-   **Effect Type (v1)**: When available in examples, explain `effectType` as the SQL statement/operation kind that produced the relationship (e.g., `select`, `insert`, `update`, `merge_update`, `create_view`). Use short callouts like “effectType: select” near the example so readers connect the edge to its producing statement.
+-   **Source of Truth**: Base this section on the information from the v1 schema and design documents.
+    -   v1 Schema: [data_lineage_schema_v1.json](mdc:gsp_java/docs/AI/cline_sqlflow/data_lineage_schema_v1.json)
+    -   v1 Design Explanation: [data_lineage_design_explanation_v1.md](mdc:gsp_java/docs/AI/cline_sqlflow/data_lineage_design_explanation_v1.md)
+
+## Part 2: Next-Generation Data Lineage Model (v2)
+
+This section should introduce the new, more powerful v2 schema as an evolution of the v1 model. Emphasize that it's an improvement and still under development.
+
+-   **Key Improvements**: Explain the benefits of the new model, such as enhanced precision, traceability, and scalability.
+-   **Concept Mapping**: Provide a clear mapping from v1 concepts to v2 concepts (e.g., `fdr` becomes `restricts` and `groups`).
+-   **New Features**: Introduce new concepts like `lineageObjects` with `qualifiedName`, atomic relationships, `observations` for evidence, and `transforms` for detailing logic.
+-   **Effect Type (v2)**: Use `effectType` on relationships to convey the nature/strength of the mapping. Where possible, add a brief parenthetical after examples, e.g., “effectType: EXACT_COPY”. Recommended guidance:
+    -   Simple alias or field passthrough: `EXACT_COPY` (no change in meaning)
+    -   Expression/function transforms (e.g., `ROUND`, `UPPER`): `WEAK_COPY` (value changed)
+    -   Aggregations (`SUM`, `COUNT`, `AVG`): `AGGREGATION` (or `WEAK_COPY` if `AGGREGATION` isn’t supported)
+    -   Multi-source expressions (e.g., `a + b`): `PARTIAL_COPY`
+    -   Uncertain or heuristic mapping: `AMBIGUOUS`
+    -   For detailed categories, please check ## 15. Lineage Categories（effectType）与 SQL 推导规则 in [data_lineage_design_explanation.md](mdc:gsp_java/docs/AI/cline_sqlflow/data_lineage_design_explanation.md)
+  Add a one-line rationale when helpful (e.g., “aggregation changes granularity”).
+-   **Source of Truth**: Base this section on the information from the v2 schema and design documents.
+    -   v2 Schema: [data_lineage_schema.json](mdc:gsp_java/docs/AI/cline_sqlflow/data_lineage_schema.json)
+    -   v2 Design Explanation: [data_lineage_design_explanation.md](mdc:gsp_java/docs/AI/cline_sqlflow/data_lineage_design_explanation.md)
+
+## Writing Tips (applies to both parts)
+
+-   Prefer short, concrete examples; add a compact effect type note where it clarifies intent.
+-   When showing function-based flows, include the `transforms.code` (e.g., `ROUND(salary)`) and set a plausible `effectType` as above.
+-   For statements producing multiple edges (e.g., `INSERT ... SELECT`), show separate 1→1 edges (v2) and mention a shared `statementKey`; add edge-level `effectType` where appropriate.
+
+The goal is to create a seamless document that first teaches the basics (v1) and then introduces the advanced, more detailed concepts (v2) as a natural progression, with `effectType` annotations to make relationships more precise and intuitive.
diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml
@@ -0,0 +1,74 @@
+name: Deploy GSP and SQLFlow Documentation to GitHub Pages
+
+on:
+  # Trigger the workflow on push events to the main branch
+  push:
+    branches:
+      - release/docs
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read  # Read access to checkout the code
+  pages: write   # Write access to deploy to Pages
+  id-token: write # Needed for OIDC token if using advanced deployment methods
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  # Build job
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        # If you have git-submodules
+        # with:
+        #   submodules: recursive
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.x # Use a recent Python 3 version
+          cache: 'pip' # Cache pip dependencies
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt # Install from requirements.txt 
+
+      # --- !!! ---
+      # Add steps here to generate automatic content if needed
+      # Example:
+      # - name: Generate Javadoc
+      #   run: |
+      #     echo "Running Javadoc generation..."
+      #     # Actual command to generate Javadoc into e.g., docs/reference/javadoc
+      #     mkdir -p docs/reference/javadoc
+      #     echo "<html><body>Generated Javadoc Placeholder</body></html>" > docs/reference/javadoc/index.html
+      # --- !!! ---
+
+      - name: Build MkDocs site
+        working-directory: ./site-docs
+        run: mkdocs build --verbose # Build into the 'site' directory
+
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3 # Use updated action
+        with:
+          # Upload entire site directory built by mkdocs
+          path: './site-docs/site'
+
+  # Deployment job
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }} # Output the deployed URL
+    runs-on: ubuntu-latest
+    needs: build # Run after the build job is successful
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4 # Use updated action for deployment
diff --git a/.gitignore b/.gitignore
@@ -24,3 +24,21 @@
 # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
 hs_err_pid*
 
+# Python virtual environments
+venv/
+.venv/
+env/
+.env/
+*/venv/    # Ignore venv directories in any subdirectory too (optional)
+*/.venv/   # Ignore .venv directories in any subdirectory too (optional)
+
+./site-docs/site
+.cache/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.pyw
+*.pyz
+*.pywz
+*.pyzw
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,14 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "chrome",
+            "request": "launch",
+            "name": "Open index.html",
+            "file": "f:\\depot\\github\\sqlflow_public\\widget\\index.html"
+        }
+    ]
+}