Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions apps/trustlab/public/robots.txt

This file was deleted.

3 changes: 3 additions & 0 deletions apps/trustlab/src/app/robots.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { getRobotsTxtContent } from "@/trustlab/lib/data";

export default getRobotsTxtContent;
5 changes: 5 additions & 0 deletions apps/trustlab/src/lib/data/common/seo.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { site } from "@/trustlab/utils";
import parseRobotsToMetadata from "@/trustlab/utils/parseRobotsTxt";

const HOMEPAGE_TITLES = ["home", "homepage", "index"];

Expand Down Expand Up @@ -60,4 +61,8 @@ export function getPageSeoFromMeta(page, settings) {
};
}

export function parseRobotsTxt(content = "") {
return parseRobotsToMetadata(content);
}

export default undefined;
7 changes: 6 additions & 1 deletion apps/trustlab/src/lib/data/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
export { getPageStaticPaths, getPageStaticProps } from "./local";
export {
getPageStaticPaths,
getPageStaticProps,
getServerSideProps,
getRobotsTxtContent,
} from "./local";

export default undefined;
18 changes: 18 additions & 0 deletions apps/trustlab/src/lib/data/local/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { getPageProps, getPagePaths } from "@/trustlab/lib/data/common";
import { parseRobotsTxt } from "@/trustlab/lib/data/common/seo";
import api from "@/trustlab/lib/payload";

export async function getPageStaticPaths() {
Expand All @@ -15,3 +16,20 @@ export async function getPageStaticProps(context) {
revalidate: 60,
};
}

export async function getServerSideProps(context) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this being used anywhere?

const props = await getPageProps(api, context);
if (!props) {
return { notFound: true };
}
return {
props,
};
}

export async function getRobotsTxtContent() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're parsing content to Robots object, shouldn't this be:

Suggested change
export async function getRobotsTxtContent() {
export async function getRobots() {

const siteSettings = await api.findGlobal("site-settings");
return parseRobotsTxt(siteSettings?.robotsTxt);
}

export default undefined;
32 changes: 32 additions & 0 deletions apps/trustlab/src/payload/globals/tabs/SeoTab.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,26 @@ import {
/* eslint-disable-next-line import/no-unresolved */
} from "@payloadcms/plugin-seo/fields";

import parseRobotsToMetadata from "@/trustlab/utils/parseRobotsTxt";

const validateRobotsTxt = (value) => {
if (!value?.trim()) {
return true;
}
const result = parseRobotsToMetadata(value, { collectDiagnostics: true });
if (!result.errors?.length) {
return true;
}
const message = result.errors
.map(({ line, directive, reason }) =>
[`line ${line}`, directive ? `directive "${directive}"` : null, reason]
.filter(Boolean)
.join(" "),
)
.join("; ");
return `Invalid robots.txt: ${message}`;
};

const SeoTab = {
label: "SEO",
fields: [
Expand Down Expand Up @@ -46,6 +66,18 @@ const SeoTab = {
}),
],
},
{
name: "robotsTxt",
label: "robots.txt content",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
label: "robots.txt content",
label: "robots.txt",

type: "code",
defaultValue: "User-agent: *\nDisallow: /",
admin: {
language: "plaintext",
rows: 14,
description: "Paste the exact robots.txt text to serve.",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
description: "Paste the exact robots.txt text to serve.",
description: "Enter the exact robots.txt text to serve.",

Why paste?

},
validate: validateRobotsTxt,
},
],
};

Expand Down
203 changes: 203 additions & 0 deletions apps/trustlab/src/utils/parseRobotsTxt.js
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, from the bots:


Looking at your code and the Next.js robots object interface, there's a mismatch. The Next.js robots metadata object expects a specific shape like:

type Robots = {
  rules: Rule | Rule[]
  sitemap?: string | string[]
  host?: string
}

type Rule = {
  userAgent?: string | string[]
  allow?: string | string[]
  disallow?: string | string[]
  crawlDelay?: number
}

Your parser is fairly close but has some unnecessary complexity for this use case. A few observations:

  1. cacheDelay, visitTime, cleanParam — Next.js doesn't use these, so they're dead weight if your only goal is producing a Next.js-compatible object.
  2. The regex-per-directive approach is overkill when you're already doing keyLower comparison — just compare strings directly.
  3. appendDirectiveValue / appendUserAgent add complexity to handle the "single value vs array" duality, but you could simplify by always collecting into arrays and flattening at the end.
  4. The autoUserAgent / ensureRule pattern is a bit convoluted for handling implicit * groups.

Here's a leaner version focused specifically on producing a Next.js-compatible robots object:Key improvements over your original:

/**
 * Parses a robots.txt string into a Next.js-compatible robots metadata object.
 * @see https://nextjs.org/docs/app/api-reference/file-conventions/metadata/robots#robots-object
 *
 * @param {string} raw - robots.txt content
 * @returns {{ rules: Rule | Rule[], sitemap?: string | string[], host?: string }}
 */
export default function parseRobotsToNextJs(raw = "") {
  const lines = String(raw).replace(/\r\n?/g, "\n").split("\n");
  const groups = []; // { userAgent: string[], allow: string[], disallow: string[], crawlDelay?: number }
  const sitemaps = [];
  let host = null;
  let cur = null;
  let curHasDirectives = false;

  const ensureGroup = () => {
    if (!cur) {
      cur = { userAgent: ["*"], allow: [], disallow: [] };
      groups.push(cur);
      curHasDirectives = false;
    }
    return cur;
  };

  for (const rawLine of lines) {
    const line = rawLine.replace(/#.*$/, "").trim();
    if (!line) continue;

    const sep = line.indexOf(":");
    if (sep === -1) continue;

    const key = line.slice(0, sep).trim().toLowerCase();
    const val = line.slice(sep + 1).trim();

    switch (key) {
      case "sitemap":
        if (val) sitemaps.push(val);
        break;
      case "host":
        if (val) host = val;
        break;
      case "user-agent":
        if (!cur || curHasDirectives) {
          cur = { userAgent: [val || "*"], allow: [], disallow: [] };
          groups.push(cur);
          curHasDirectives = false;
        } else {
          cur.userAgent.push(val || "*");
        }
        break;
      case "allow":
        ensureGroup().allow.push(val);
        curHasDirectives = true;
        break;
      case "disallow":
        ensureGroup().disallow.push(val);
        curHasDirectives = true;
        break;
      case "crawl-delay": {
        const n = Number(val);
        if (Number.isFinite(n)) ensureGroup().crawlDelay = n;
        curHasDirectives = true;
        break;
      }
      // Ignore unknown directives silently
    }
  }

  // Collapse single-element arrays to plain values for Next.js compat
  const unwrap = (arr) => (arr.length === 1 ? arr[0] : arr.length ? arr : undefined);

  const rules = groups.map((g) => {
    const rule = { userAgent: unwrap(g.userAgent) };
    const allow = unwrap(g.allow);
    const disallow = unwrap(g.disallow);
    if (allow !== undefined) rule.allow = allow;
    if (disallow !== undefined) rule.disallow = disallow;
    if (g.crawlDelay !== undefined) rule.crawlDelay = g.crawlDelay;
    return rule;
  });

  const result = { rules: rules.length === 1 ? rules[0] : rules };
  const sm = unwrap(sitemaps);
  if (sm !== undefined) result.sitemap = sm;
  if (host) result.host = host;
  return result;
}
  • ~60% less code by dropping unused directives and regex objects in favor of a simple switch on the lowercased key string.
  • Always collects into arrays first, then unwraps at the end — no juggling between scalar/array mid-parse.
  • Strips inline comments (# ...) from lines, which your original didn't handle.
  • Output matches the Next.js shape exactlyrules is a single Rule when there's one group, otherwise Rule[]; same for sitemap.
  • No temporary markers like autoUserAgent that need cleanup.

If you also need diagnostics for debugging malformed files, you could add that back as an opt-in, but for a "parse robots.txt → Next.js metadata" utility, this is all you need.

Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
const USER_AGENT_REGEX = /^user-agent$/i;
const ALLOW_REGEX = /^allow$/i;
const DISALLOW_REGEX = /^disallow$/i;
const CRAWL_DELAY_REGEX = /^crawl-delay$/i;
const CACHE_DELAY_REGEX = /^cache-delay$/i;
const VISIT_TIME_REGEX = /^visit-time$/i;
const SITEMAP_REGEX = /^sitemap$/i;
const HOST_REGEX = /^host$/i;
const CLEAN_PARAM_REGEX = /^clean-param$/i;

const KNOWN_DIRECTIVES = new Set([
"user-agent",
"allow",
"disallow",
"crawl-delay",
"cache-delay",
"visit-time",
"sitemap",
"host",
"clean-param",
]);

const normalizeString = (value) =>
typeof value === "string" ? value : `${value ?? ""}`;

const appendDirectiveValue = (previous, value) => {
if (previous === undefined || previous === null || previous === "") {
return value;
}
if (Array.isArray(previous)) {
return [...previous, value];
}
return [previous, value];
};

const appendUserAgent = (previous, value) => {
const normalized = value || "*";
if (!previous || previous === "") {
return normalized;
}
if (Array.isArray(previous)) {
return [...previous, normalized];
}
if (previous === normalized) {
return previous;
}
return [previous, normalized];
};

const startRule = (rules, userAgent, meta = {}) => {
const rule = { userAgent: userAgent || "*", ...meta };
rules.push(rule);
return rule;
};

const updateRule = (rule, next) => Object.assign(rule, next);

export default function parseRobotsToMetadata(rawContent = "", options = {}) {
const { collectDiagnostics = false } = options;
const diagnostics = collectDiagnostics ? [] : undefined;
const content = normalizeString(rawContent);
const rules = [];
const sitemap = [];
const cleanParams = [];
let host = null;
let current = null;
let currentHasDirectives = false;

const ensureRule = () => {
if (!current) {
current = startRule(rules, "*", { autoUserAgent: true });
currentHasDirectives = false;
}
return current;
};

content
.replace(/\r\n?/g, "\n")
.split("\n")
.forEach((rawLine, index) => {
const line = rawLine.trim();
if (!line || line.startsWith("#")) {
return;
}

const separatorIndex = line.indexOf(":");
if (separatorIndex === -1) {
if (diagnostics) {
diagnostics.push({
line: index + 1,
directive: line,
reason: 'Missing ":" separator',
});
}
return;
}

const key = line.slice(0, separatorIndex).trim();
const keyLower = key.toLowerCase();
const value = line.slice(separatorIndex + 1).trim();

if (!KNOWN_DIRECTIVES.has(keyLower)) {
if (diagnostics) {
diagnostics.push({
line: index + 1,
directive: key,
reason: "Unknown directive",
});
}
return;
}

if (SITEMAP_REGEX.test(key)) {
if (value) {
sitemap.push(value);
}
return;
}

if (HOST_REGEX.test(key)) {
if (value) {
host = value;
}
return;
}

if (CLEAN_PARAM_REGEX.test(key)) {
if (value) {
cleanParams.push(value);
}
return;
}

if (USER_AGENT_REGEX.test(key)) {
if (current && current.autoUserAgent) {
updateRule(current, { userAgent: value || "*" });
delete current.autoUserAgent;
} else if (!current || currentHasDirectives) {
current = startRule(rules, value || "*");
} else {
updateRule(current, {
userAgent: appendUserAgent(current.userAgent, value || "*"),
});
}
currentHasDirectives = false;
return;
}

const rule = ensureRule();

if (ALLOW_REGEX.test(key)) {
updateRule(rule, { allow: appendDirectiveValue(rule.allow, value) });
currentHasDirectives = true;
return;
}

if (DISALLOW_REGEX.test(key)) {
updateRule(rule, {
disallow: appendDirectiveValue(rule.disallow, value),
});
currentHasDirectives = true;
return;
}

if (CRAWL_DELAY_REGEX.test(key)) {
const numeric = Number(value);
updateRule(rule, {
crawlDelay: Number.isFinite(numeric) ? numeric : value,
});
currentHasDirectives = true;
return;
}

if (CACHE_DELAY_REGEX.test(key)) {
const numeric = Number(value);
updateRule(rule, {
cacheDelay: Number.isFinite(numeric) ? numeric : value,
});
currentHasDirectives = true;
return;
}

if (VISIT_TIME_REGEX.test(key)) {
updateRule(rule, {
visitTime: appendDirectiveValue(rule.visitTime, value),
});
currentHasDirectives = true;
}
});

const sanitizedRules = rules.map((ruleEntry) => {
const { autoUserAgent, ...cleanRule } = ruleEntry;
return cleanRule;
});

return {
rules: sanitizedRules,
sitemap,
host,
cleanParams,
errors: diagnostics,
};
}
Loading