Skip to content

Commit 58f8e1a

Browse files
committed
feat(algorithms): add hub penalty options to mutual information computation
Add three new configuration options for MI adjustment to reduce influence of high-degree nodes (hubs) and common edge types: - useDegreeBasedPenalty: Exponential penalty using log(degree) - useIDFWeighting: IDF-style weighting from information retrieval - useEdgeTypeRarity: Penalty based on edge type probability These options help reduce the impact of ubiquitous nodes and edges in path ranking, improving recommendation diversity.
1 parent 3150e38 commit 58f8e1a

File tree

1 file changed

+84
-0
lines changed

1 file changed

+84
-0
lines changed

packages/algorithms/src/pathfinding/mutual-information.ts

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,37 @@ export interface MutualInformationConfig<N extends Node, E extends Edge = Edge>
121121
* @default 1e-10
122122
*/
123123
epsilon?: number;
124+
125+
/**
126+
* Enable degree-based penalty to reduce MI for high-degree nodes (hubs).
127+
* Penalizes edges connected to nodes with many connections.
128+
* @default false
129+
*/
130+
useDegreeBasedPenalty?: boolean;
131+
132+
/**
133+
* Penalty factor for degree-based MI reduction.
134+
* MI_adjusted = MI_base × exp(-degreeBasedPenaltyFactor × (log(deg(u)+1) + log(deg(v)+1)))
135+
* Higher values = stronger penalty for high-degree nodes.
136+
* @default 0.5
137+
*/
138+
degreeBasedPenaltyFactor?: number;
139+
140+
/**
141+
* Enable IDF-style weighting to reduce MI for high-degree nodes.
142+
* Uses inverse document frequency formula from information retrieval.
143+
* MI_adjusted = MI_base × log(N/(deg(u)+1)) × log(N/(deg(v)+1))
144+
* @default false
145+
*/
146+
useIDFWeighting?: boolean;
147+
148+
/**
149+
* Enable edge type rarity penalty.
150+
* Penalizes common edge types (like "has_concept") and boosts rare ones.
151+
* MI_adjusted = MI_base × (-log(P(edge_type)))
152+
* @default false
153+
*/
154+
useEdgeTypeRarity?: boolean;
124155
}
125156

126157
/**
@@ -493,6 +524,10 @@ export const precomputeMutualInformation = <N extends Node, E extends Edge>(
493524
temporalDecay = 0.001,
494525
referenceTime = Date.now(),
495526
epsilon = 1e-10,
527+
useDegreeBasedPenalty = false,
528+
degreeBasedPenaltyFactor = 0.5,
529+
useIDFWeighting = false,
530+
useEdgeTypeRarity = false,
496531
} = config;
497532

498533
const cache = new Map<string, number>();
@@ -556,6 +591,29 @@ export const precomputeMutualInformation = <N extends Node, E extends Edge>(
556591
}
557592
}
558593

594+
// Pre-compute node degrees for degree-based penalties
595+
const nodeDegrees = new Map<string, number>();
596+
const totalNodes = graph.getNodeCount();
597+
598+
if (useDegreeBasedPenalty || useIDFWeighting) {
599+
for (const edge of edges) {
600+
nodeDegrees.set(edge.source, (nodeDegrees.get(edge.source) ?? 0) + 1);
601+
nodeDegrees.set(edge.target, (nodeDegrees.get(edge.target) ?? 0) + 1);
602+
}
603+
}
604+
605+
// Pre-compute edge type rarity for edge type penalty
606+
const totalEdges = edges.length;
607+
const edgeTypeRarity = new Map<string, number>();
608+
609+
if (useEdgeTypeRarity && edgeTypeCounts) {
610+
for (const [edgeType, count] of edgeTypeCounts.entries()) {
611+
const probability = count / totalEdges;
612+
// Rarity = -log(P(edge_type))
613+
edgeTypeRarity.set(edgeType, -Math.log(probability + epsilon));
614+
}
615+
}
616+
559617
// Pre-compute neighbour sets for structural MI and hyperedges
560618
const neighbourCache = new Map<string, Set<string>>();
561619

@@ -675,6 +733,32 @@ export const precomputeMutualInformation = <N extends Node, E extends Edge>(
675733
modifier *= computeCommunityModifier(community1, community2, communityBoost);
676734
}
677735

736+
// Option 1: Degree-based exponential penalty
737+
// MI_adjusted = MI_base × exp(-α × (log(deg(u)+1) + log(deg(v)+1)))
738+
if (useDegreeBasedPenalty) {
739+
const sourceDegree = nodeDegrees.get(edge.source) ?? 0;
740+
const targetDegree = nodeDegrees.get(edge.target) ?? 0;
741+
const degreeSum = Math.log(sourceDegree + 1) + Math.log(targetDegree + 1);
742+
modifier *= Math.exp(-degreeBasedPenaltyFactor * degreeSum);
743+
}
744+
745+
// Option 2: IDF-style weighting
746+
// MI_adjusted = MI_base × log(N/(deg(u)+1)) × log(N/(deg(v)+1))
747+
if (useIDFWeighting) {
748+
const sourceDegree = nodeDegrees.get(edge.source) ?? 0;
749+
const targetDegree = nodeDegrees.get(edge.target) ?? 0;
750+
const sourceIDF = Math.log((totalNodes / (sourceDegree + 1)) + epsilon);
751+
const targetIDF = Math.log((totalNodes / (targetDegree + 1)) + epsilon);
752+
modifier *= sourceIDF * targetIDF;
753+
}
754+
755+
// Option 3: Edge type rarity penalty
756+
// MI_adjusted = MI_base × (-log(P(edge_type)))
757+
if (useEdgeTypeRarity) {
758+
const rarity = edgeTypeRarity.get(edge.type) ?? 1.0;
759+
modifier *= rarity;
760+
}
761+
678762
return modifier;
679763
};
680764

0 commit comments

Comments
 (0)