Skip to content

Commit f2fb683

Browse files
feat(details): render & detect language of language-less snippets
Also don't run computation on empty code & detect based off of the initial commented line
1 parent e9b41fb commit f2fb683

File tree

2 files changed

+66
-20
lines changed

2 files changed

+66
-20
lines changed

src/routes/[pid=pid]/[org]/[repo]/[id=number]/PageRenderer.svelte

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,13 @@
2020
});
2121
2222
const loadedLanguages = loadLanguages({
23-
svelte,
24-
typescript,
25-
javascript,
26-
html,
27-
css,
23+
sh: shell,
2824
json,
29-
shell,
30-
diff
25+
css,
26+
html,
27+
js: javascript,
28+
ts: typescript,
29+
svelte
3130
});
3231
</script>
3332

@@ -93,7 +92,8 @@
9392
transformerTrimCode,
9493
transformerLanguageDetection(loadedLanguages),
9594
transformerDiffMarking
96-
]
95+
],
96+
defaultLanguage: "text"
9797
} satisfies Parameters<typeof rehypeShikiFromHighlighter>[1]
9898
]
9999
};

src/routes/[pid=pid]/[org]/[repo]/[id=number]/syntax-highlighting.ts

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import { browser } from "$app/environment";
22
import posthog from "posthog-js";
3-
import type { LanguageRegistration, ShikiTransformer } from "shiki";
3+
import { type LanguageRegistration, type ShikiTransformer, isPlainLang } from "shiki";
4+
import { dlog } from "$lib/debug";
45

56
/**
67
* Pre-load the languages by returning regular expressions from language
78
* registrations.
89
*
910
* @param languages a set of languages and their associated registrations.
11+
* Languages must be sorted from the least to the most weighted.
1012
* @returns a set of languages and their associated regular expressions to test code against.
1113
*/
1214
export function loadLanguages(
@@ -59,27 +61,61 @@ export function detectLanguage(
5961
let highestRate = 0;
6062
let highestTotal = 0;
6163

64+
const trimmed = code.trim();
65+
if (!trimmed) return languageCandidate;
66+
67+
dlog("===== Starting determining language for:");
68+
dlog(code);
69+
dlog("=====");
70+
71+
// try detecting language based off of first line comment
72+
const firstLine = trimmed.split("\n").shift()?.trim();
73+
if (firstLine) {
74+
const isComment =
75+
(firstLine.startsWith("//") || firstLine.startsWith("#")) && !firstLine.includes(" ");
76+
if (isComment) {
77+
dlog(`First line comment: ${firstLine}`);
78+
const firstSplit = firstLine.split(".");
79+
if (firstSplit.length) {
80+
const extension = firstSplit.pop();
81+
if (extension && Object.keys(languages).includes(extension)) {
82+
dlog(`Found valid language from first comment: ${extension}`);
83+
return extension;
84+
}
85+
}
86+
}
87+
}
88+
89+
// otherwise, loop over regexes
6290
for (const [language, regexps] of Object.entries(languages)) {
6391
if (!regexps.length) continue;
64-
const matchesCount = regexps
65-
.map(regexp => {
66-
try {
67-
return code.match(regexp)?.length ?? 0;
68-
} catch {
69-
return 0;
70-
}
71-
})
72-
.reduce((acc, b) => acc + b, 0);
73-
const successRate = matchesCount / regexps.length;
92+
const compute = regexps.map<{ matches: boolean; count: number }>(regexp => {
93+
try {
94+
const match = code.match(regexp);
95+
return { matches: !!match, count: match?.length ?? 0 };
96+
} catch {
97+
return { matches: false, count: 0 };
98+
}
99+
});
100+
const matchesLength = compute.reduce((acc, item) => acc + item.count, 0);
101+
const matchesCount = compute.filter(item => item.matches).length;
102+
const successRate = matchesLength / matchesCount;
103+
dlog(
104+
`[${language}]\t${matchesLength} on ${matchesCount} regexes matches over ${regexps.length} regexes - success rate: ${Math.round((successRate * 100 + Number.EPSILON) * 100) / 100}%`
105+
);
74106
if (
75107
successRate > highestRate ||
76108
(successRate === highestRate && regexps.length > highestTotal)
77109
) {
110+
dlog(
111+
`New candidate found! Previous values: ${languageCandidate} - highest rate ${highestRate}, highest total regexes: ${highestTotal}`
112+
);
78113
languageCandidate = language;
79114
highestRate = successRate;
80115
highestTotal = regexps.length;
81116
}
82117
}
118+
dlog(`Done: result is ${languageCandidate}`);
83119
return languageCandidate;
84120
}
85121

@@ -103,6 +139,8 @@ export function transformerLanguageDetection(
103139
return {
104140
preprocess(code, options) {
105141
if (options.lang === "diff") {
142+
// tests:
143+
// - /issues/sveltejs/svelte/14280
106144
const cleanedCode = code
107145
.split("\n")
108146
.map(line => line.replace(/^[+-]/, ""))
@@ -117,8 +155,16 @@ export function transformerLanguageDetection(
117155
}
118156
options.lang = detectedLanguage;
119157
if (options.meta) options.meta["data-detected"] = true;
120-
return code;
158+
} else if (isPlainLang(options.lang)) {
159+
// tests:
160+
// - /issues/sveltejs/svelte/16072
161+
const detectedLanguage = detectLanguage(code, languages);
162+
if (detectedLanguage) {
163+
options.lang = detectedLanguage;
164+
if (options.meta) options.meta["data-detected"] = true;
165+
}
121166
}
167+
return code;
122168
},
123169
pre(node) {
124170
node.properties["data-language"] = this.options.lang

0 commit comments

Comments
 (0)