managedcode
diff --git a/‎.github/workflows/deploy-swa.yml‎
Lines changed: 31 additions & 0 deletions b/‎.github/workflows/deploy-swa.yml‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎.github/workflows/kg-build.yml‎
Lines changed: 60 additions & 0 deletions b/‎.github/workflows/kg-build.yml‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 131 additions & 0 deletions b/‎README.md‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎api/function_app.py‎
Lines changed: 102 additions & 0 deletions b/‎api/function_app.py‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎api/host.json‎
Lines changed: 7 additions & 0 deletions b/‎api/host.json‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎api/requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎api/requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎app/index.html‎
Lines changed: 42 additions & 0 deletions b/‎app/index.html‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎app/staticwebapp.config.json‎
Lines changed: 14 additions & 0 deletions b/‎app/staticwebapp.config.json‎
Lines changed: 14 additions & 0 deletions
@@ -0,0 +1,31 @@
+name: Deploy to Azure Static Web Apps
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'app/**'
+      - 'api/**'
+      - 'graph/**'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v5
+
+      - name: Deploy to Azure SWA
+        uses: Azure/static-web-apps-deploy@v1
+        with:
+          azure_static_web_apps_api_token: ${{ secrets.AZURE_STATIC_WEB_APPS_API_TOKEN }}
+          repo_token: ${{ secrets.GITHUB_TOKEN }}
+          action: upload
+          app_location: /app
+          api_location: /api
+          output_location: ""
+          skip_app_build: true
+          skip_api_build: false
@@ -0,0 +1,60 @@
+name: KG Build — Extract Knowledge Graph
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'content/**'
+      - 'ontology/**'
+      - 'tools/**'
+      - 'tests/**'
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  models: read
+
+jobs:
+  build-graph:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: pip install -r tools/requirements.txt
+
+      - name: Run tests
+        run: python -m pytest tests/ -v --tb=short
+
+      - name: Build knowledge graph
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          LLM_MODEL: openai/gpt-4o-mini
+        run: python -m tools.kg_build --repo-root . --base-url https://example.com
+
+      - name: Check for changes
+        id: changes
+        run: |
+          git add graph/
+          if git diff --cached --quiet; then
+            echo "changed=false" >> $GITHUB_OUTPUT
+          else
+            echo "changed=true" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Commit graph artifacts
+        if: steps.changes.outputs.changed == 'true'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git add graph/
+          git commit -m "chore: update knowledge graph [skip ci]"
+          git push
@@ -0,0 +1,131 @@
+# Markdown-LD Knowledge Bank
+
+A Git-based knowledge bank where human-authored Markdown articles are processed by an LLM CI pipeline to extract Linked Data (RDF/JSON-LD), served as static content on Azure Static Web Apps, with a serverless SPARQL endpoint.
+
+## Architecture
+
+```
+content/*.md → GitHub Actions → LLM (GitHub Models) → graph/*.jsonld + *.ttl
+                                                         ↓
+                                                  Azure Static Web Apps
+                                                    ├── Static site
+                                                    ├── Graph files
+                                                    └── SPARQL API (RDFLib)
+```
+
+## Quick Start
+
+### Prerequisites
+
+- Python 3.11+
+- Git
+- Azure CLI (for deployment)
+
+### Local Development
+
+```bash
+# Install dependencies
+pip install -r tools/requirements.txt
+
+# Run tests
+python -m pytest tests/ -v
+
+# Dry run (chunk only, no LLM)
+python -m tools.kg_build --dry-run
+
+# Full build (requires GITHUB_TOKEN)
+export GITHUB_TOKEN=your_token
+python -m tools.kg_build --repo-root . --base-url https://example.com
+```
+
+### Writing Articles
+
+Create Markdown files in `content/` with YAML frontmatter:
+
+```markdown
+---
+title: "Your Article Title"
+date_published: "2026-04-15"
+tags:
+  - knowledge-graphs
+  - rdf
+entity_hints:
+  - label: "RDF"
+    type: "schema:Thing"
+    sameAs: "https://www.wikidata.org/entity/Q54872"
+---
+
+# Your Content Here
+
+Write naturally. The LLM pipeline extracts entities and relationships.
+Use [[wikilinks]] to link between articles.
+```
+
+### Example SPARQL Queries
+
+**Find all entities mentioned in an article:**
+```sparql
+PREFIX schema: <https://schema.org/>
+SELECT ?entity ?name WHERE {
+  <https://example.com/2026/04/what-is-a-knowledge-graph/> schema:mentions ?entity .
+  ?entity schema:name ?name .
+}
+```
+
+**Find all articles about a topic:**
+```sparql
+PREFIX schema: <https://schema.org/>
+SELECT ?article ?title WHERE {
+  ?article a schema:Article ;
+           schema:mentions <https://example.com/id/knowledge-graph> ;
+           schema:name ?title .
+}
+```
+
+**Find connections between entities:**
+```sparql
+PREFIX schema: <https://schema.org/>
+SELECT ?subject ?predicate ?object WHERE {
+  ?subject ?predicate ?object .
+  FILTER(?predicate != rdf:type)
+}
+LIMIT 50
+```
+
+## Project Structure
+
+```
+├── content/          # Markdown articles (human-authored)
+├── ontology/         # JSON-LD context, vocabulary, SHACL shapes
+├── tools/            # Extraction pipeline (chunker, LLM client, post-processor)
+├── graph/            # Generated artifacts (committed by CI)
+│   ├── articles/     # Per-article JSON-LD and Turtle
+│   ├── views/        # Precomputed JSON views
+│   ├── cache/        # Per-chunk extraction cache
+│   └── manifest.json # Build metadata
+├── api/              # Azure Function (SPARQL endpoint)
+├── app/              # Static web app
+├── tests/            # Test suite
+└── .github/workflows/
+    ├── kg-build.yml  # KG extraction pipeline
+    └── deploy-swa.yml # Azure SWA deployment
+```
+
+## Key Design Decisions
+
+| Decision | Choice | Rationale |
+|----------|--------|-----------|
+| LLM Provider | GitHub Models (free) | Zero cost, GITHUB_TOKEN auth |
+| LLM Model | `openai/gpt-4o-mini` | Best quality/limit ratio (150 req/day) |
+| SPARQL Engine | RDFLib | Pure Python, small footprint, built-in JSON-LD |
+| Validation | pySHACL | Standard W3C SHACL, works with RDFLib |
+| Batching | 3-5 chunks/request | Stay under 8K input token limit |
+
+## Rate Limits
+
+GitHub Models free tier (GPT-4o-mini): 150 requests/day, 8K input tokens.
+The pipeline batches 3-5 chunks per request and caches results to stay within limits.
+
+## License
+
+MIT
@@ -0,0 +1,102 @@
+"""Azure Function: SPARQL endpoint using RDFLib.
+
+Loads all .ttl files from the graph/articles/ directory into a combined
+RDFLib Dataset, then serves SPARQL queries via HTTP GET/POST.
+"""
+
+import json
+import os
+import logging
+from pathlib import Path
+
+import azure.functions as func
+import rdflib
+from rdflib import Dataset, Graph
+
+app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)
+
+# Module-level cache: load graph once per cold start
+_dataset: Dataset | None = None
+
+
+def _load_dataset() -> Dataset:
+    """Load all Turtle files into an RDFLib Dataset."""
+    global _dataset
+    if _dataset is not None:
+        return _dataset
+
+    ds = Dataset()
+    graph_dir = Path(__file__).parent.parent / "graph" / "articles"
+
+    if graph_dir.exists():
+        for ttl_file in graph_dir.glob("*.ttl"):
+            try:
+                g = Graph()
+                g.parse(str(ttl_file), format="turtle")
+                for triple in g:
+                    ds.add(triple)
+                logging.info(f"Loaded {len(g)} triples from {ttl_file.name}")
+            except Exception as e:
+                logging.error(f"Failed to parse {ttl_file.name}: {e}")
+
+    logging.info(f"Total triples loaded: {len(ds)}")
+    _dataset = ds
+    return _dataset
+
+
+@app.route(route="sparql", methods=["GET", "POST"])
+def sparql_endpoint(req: func.HttpRequest) -> func.HttpResponse:
+    """Handle SPARQL queries per W3C SPARQL 1.1 Protocol."""
+    # Extract query
+    query = None
+    if req.method == "GET":
+        query = req.params.get("query")
+    elif req.method == "POST":
+        content_type = req.headers.get("Content-Type", "")
+        if "application/sparql-query" in content_type:
+            query = req.get_body().decode("utf-8")
+        elif "application/x-www-form-urlencoded" in content_type:
+            query = req.params.get("query") or req.form.get("query")
+        else:
+            # Try body as raw query
+            query = req.get_body().decode("utf-8")
+
+    if not query:
+        return func.HttpResponse(
+            json.dumps({"error": "Missing 'query' parameter"}),
+            status_code=400,
+            mimetype="application/json",
+        )
+
+    # Safety: block mutating queries
+    query_upper = query.strip().upper()
+    if any(kw in query_upper for kw in ["INSERT", "DELETE", "LOAD", "CLEAR", "DROP", "CREATE"]):
+        return func.HttpResponse(
+            json.dumps({"error": "Only SELECT and ASK queries are allowed"}),
+            status_code=403,
+            mimetype="application/json",
+        )
+
+    # Execute query
+    try:
+        ds = _load_dataset()
+        result = ds.query(query)
+        serialized = result.serialize(format="json")
+        if isinstance(serialized, bytes):
+            serialized = serialized.decode("utf-8")
+
+        return func.HttpResponse(
+            serialized,
+            mimetype="application/sparql-results+json",
+            headers={
+                "Access-Control-Allow-Origin": "*",
+                "Cache-Control": "public, max-age=300",
+            },
+        )
+    except Exception as e:
+        logging.error(f"SPARQL query error: {e}")
+        return func.HttpResponse(
+            json.dumps({"error": f"Query execution failed: {str(e)}"}),
+            status_code=400,
+            mimetype="application/json",
+        )
@@ -0,0 +1,7 @@
+{
+  "version": "2.0",
+  "extensionBundle": {
+    "id": "Microsoft.Azure.Functions.ExtensionBundle",
+    "version": "[4.*, 5.0.0)"
+  }
+}
@@ -0,0 +1 @@
+rdflib>=7.1.1,<8.0
@@ -0,0 +1,42 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Knowledge Bank</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 720px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; color: #333; }
+    h1 { border-bottom: 2px solid #0366d6; padding-bottom: 0.3rem; }
+    a { color: #0366d6; }
+    code { background: #f6f8fa; padding: 0.2em 0.4em; border-radius: 3px; }
+    pre { background: #f6f8fa; padding: 1rem; border-radius: 6px; overflow-x: auto; }
+  </style>
+</head>
+<body>
+  <h1>📚 Knowledge Bank</h1>
+  <p>A Git-based knowledge bank powered by Markdown articles and Linked Data.</p>
+
+  <h2>Resources</h2>
+  <ul>
+    <li><a href="/graph/views/entities.json">Entity Index</a> (JSON)</li>
+    <li><a href="/graph/views/articles_by_tag.json">Articles by Tag</a> (JSON)</li>
+    <li><a href="/graph/dataset.trig">Full Dataset</a> (TriG/RDF)</li>
+  </ul>
+
+  <h2>SPARQL Endpoint</h2>
+  <p>Query the knowledge graph at <code>/sparql?query=...</code></p>
+  <pre>PREFIX schema: &lt;https://schema.org/&gt;
+SELECT ?article ?title WHERE {
+  ?article a schema:Article ;
+           schema:name ?title .
+} LIMIT 50</pre>
+
+  <h2>About</h2>
+  <p>
+    Articles are written in Markdown under <code>/content</code>.
+    A CI pipeline extracts entities and relations using an LLM,
+    producing JSON-LD and Turtle files under <code>/graph</code>.
+    This site is hosted on Azure Static Web Apps (Free plan).
+  </p>
+</body>
+</html>
@@ -0,0 +1,14 @@
+{
+  "routes": [
+    { "route": "/sparql", "rewrite": "/api/sparql" }
+  ],
+  "navigationFallback": {
+    "rewrite": "/index.html"
+  },
+  "globalHeaders": {
+    "Cache-Control": "public, max-age=300"
+  },
+  "platform": {
+    "apiRuntime": "python:3.11"
+  }
+}