Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ data/**/*.json
pnpm-lock.yaml
package-lock.json
yarn.lock
*.gexf
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@
"read:people": "npx tsx data/microsoft/people/people-chunk-creation.ts",
"read:miro": "npx tsx data/miro/miro-chunk-creation.ts",
"read:github": "npx tsx data/github/gh-chunk-creation.ts",

"create:embeddings": "pnpm create:sp && pnpm create:confluence && pnpm create:people && pnpm create:miro",
"create:sp": "npx tsx data/sharepoint/sp-embeddings-creation.ts",
"create:confluence": "npx tsx data/confluence/cf-embeddings-creation.ts",
"create:people": "npx tsx data/microsoft/people/people-embeddings-creation.ts",
"create:miro": "npx tsx data/miro/miro-embeddings-creation.ts",
"create:github": "npx tsx data/github/gh-embeddings-creation.ts",

"chat": "npx tsx src/chat.ts"
"chat": "npx tsx src/chat.ts",
"create:graph": "npx tsx src/visualization/graph-creation.ts"
},
"keywords": [],
"author": "",
Expand All @@ -32,7 +31,8 @@
"cli-progress": "^3.12.0",
"dotenv": "^16.0.3",
"gpt-3-encoder": "^1.1.4",
"openai": "^3.2.1"
"openai": "^3.2.1",
"xmlbuilder": "^15.1.1"
},
"devDependencies": {
"@types/cli-progress": "^3.11.0",
Expand Down
38 changes: 38 additions & 0 deletions src/visualization/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
## Graphdaten als Netzwerk anzeigen lassen und untersuchen

Wir nutzen https://gephi.org/

Du kannst den generierten die .gexf Datei dort importieren.

## Zentralitätsmaße

Zentralitätsmaße geben Anhaltspunkte, wie wichtig ein Knoten (eine Entität: z.B. eine Person, ein Artikel,...)
im Netzwerk ist.

Beispiele (vereinfacht)

- Modularity - Analyse von Clustern und Communities
- Gradzentralität - Naives Maß - Anzahl Kanten, die von einem Knoten ausgehen
- Clusterkoeffizient - Wie viele "Dreiecke" / Cliquen gibt es?`

_Notiz_: Mit Hilfe von Auswahl geeigneter Layout Algorithmen kann man die entsprechenden Metriken dann auch visuell gut unterstützen.

## Use cases

- Wie kann ich den Dschungel an Informationen verstehen?
- Wie kann ich Intranet-Organisation verbessern:

- Welche Inhalte sind unterrepräsentiert im Netzwerk, sind aber eigentlich wichtig
- Welche Inhalte schlagen Brücken zwischen verschiedenen Communities / Clustern

- Organisationsnetzwerke analysieren

## Links zum Stöbern

Erklärungen (anschaulich) https://cambridge-intelligence.com/keylines-faqs-social-network-analysis/

"Six degrees of separation" - https://www.youtube.com/watch?v=-8w-3oe1uJo

http://networksciencebook.com/

https://networkit.github.io/
74 changes: 74 additions & 0 deletions src/visualization/graph-creation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import chunks from "../../data/embeddings.json";
import { EmbeddedSourceChunk } from "../../data/common/types";
import { promises as fs } from "fs";
import { create } from "xmlbuilder";
import { cosineSimilarity } from "../cosineSimilarity";
import { Graph, Node, Edge } from "./models/Graph";
import coloredLog from "../../data/common/coloredLog";

const threshold = 0.8;
const dataPath = "./src/visualization/graph.gexf";

function createGraph(chunks: EmbeddedSourceChunk[]): Graph {
const graph = new Graph();

chunks.forEach((chunk: EmbeddedSourceChunk) => {
const title = chunk.title;
graph.addNode({ id: title, label: title });

chunks.forEach((otherChunk: EmbeddedSourceChunk) => {
const otherTitle = otherChunk.title;
if (title !== otherTitle) {
const weight = cosineSimilarity(chunk.embedding, otherChunk.embedding);
if (weight >= threshold) {
graph.addEdge({
source: title,
target: otherTitle,
weight: weight.toString(),
});
}
}
});
});
return graph;
}

function buildXml(graph: Graph) {
const gexf = create("gexf", { version: "1.0", encoding: "UTF-8" });
const graphElem = gexf.ele("graph", {
defaultedgetype: "undirected",
mode: "static",
});

const nodes = graphElem.ele("nodes");
graph.nodes.forEach((node: Node) => {
nodes.ele("node", { label: node.label });
});

const edges = graphElem.ele("edges");
graph.edges.forEach((edge: Edge) => {
edges.ele("edge", {
source: edge.source,
target: edge.target,
weight: edge.weight,
});
});

return gexf.end({ pretty: true });
}

async function writeDateToGexfFile<T>(outputFilePath: string) {
try {
const graph = createGraph(chunks as EmbeddedSourceChunk[]);

const gexfContent: string = buildXml(graph);

await fs
.writeFile(outputFilePath, gexfContent)
.catch((err) => console.error(err));
} catch (error) {
coloredLog("Failed to save data to file", "error", error);
}
}

writeDateToGexfFile(dataPath);
27 changes: 27 additions & 0 deletions src/visualization/models/Graph.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
export type Node = {
id: string;
label: string;
};

export type Edge = {
source: string;
target: string;
weight: string;
};
export class Graph {
nodes: Node[];
edges: Edge[];

constructor() {
this.nodes = [];
this.edges = [];
}

addNode(node: Node): void {
this.nodes.push(node);
}

addEdge(edge: Edge): void {
this.edges.push(edge);
}
}