Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .oxlintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
"max-lines": "off",
"max-depth": "off"
},
"overrides": [
{
"files": ["next-env.d.ts"],
"rules": {
"triple-slash-reference": "off"
}
}
],
"env": {
"browser": true,
"node": true
Expand Down
161 changes: 161 additions & 0 deletions app/api/_lib/browser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import {
ReconResponse,
reconResponseSchema,
VerbindungResponse,
VerbindungResponseSchema,
} from "@/utils/schemas";
import { validateJson } from "@/utils/validateJson";
import UserAgent from "user-agents";
import { z, ZodType } from "zod/v4";
import { Browser, HTTPResponse, Page } from "puppeteer";
import puppeteer from "puppeteer-extra";
import StealthPlugin from "puppeteer-extra-plugin-stealth";

puppeteer.use(StealthPlugin());

/**
* Sets a random user agent and viewport size for a Puppeteer page
* @param page - The Puppeteer page instance
*/
export const setRandomUserAgent = async (page: Page): Promise<UserAgent> => {
const randomUserAgent = new UserAgent();
const { userAgent, platform, viewportWidth, viewportHeight } =
randomUserAgent.data;

await page.setUserAgent({
userAgent,
platform,
});
await page.setViewport({
width: viewportWidth,
height: viewportHeight,
});

return randomUserAgent;
};

/**
* Sets up a new browser instance and page with random user agent.
* @returns A tuple containing the browser and page instances.
*/
export const setupBrowserAndPage = async (): Promise<[Browser, Page]> => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const randomUserAgent = await setRandomUserAgent(page);

console.log(
`🌐 Browser was setup with user agent "${randomUserAgent.data.userAgent}", platform "${randomUserAgent.data.platform}" and viewport ${randomUserAgent.data.viewportWidth}x${randomUserAgent.data.viewportHeight}.`
);

return [browser, page];
};

/**
* Intercepts a specific bahn.de HTTP response and validates it against a schema.
* @param page - The Puppeteer page instance.
* @param schema - Zod schema to validate the response body against.
* @param pathname - The URL pathname to wait for.
* @param timeout - Timeout in milliseconds (default: 15000).
* @returns The validated response data.
*/
export const interceptResponse = <T extends ZodType>(
page: Page,
schema: T,
pathname: string,
timeout = 15000
): Promise<z.output<typeof schema>> => {
return new Promise((resolve, reject) => {
// Set up a timeout to avoid waiting indefinitely
const failTimer = setTimeout(() => {
// Clean up the event listener to prevent memory leaks
page.off("response", onPageResponse);

reject(
new Error(
`Timeout of ${timeout}ms exceeded while awaiting response at "${pathname}".`
)
);
}, timeout);

async function onPageResponse(response: HTTPResponse) {
const url = new URL(response.url());
if (!url.host.includes("bahn.de") || url.pathname !== pathname) {
return;
}

// Clear the timeout and clean up the event listener
clearTimeout(failTimer);
page.off("response", onPageResponse);

try {
const json = await response.json();
resolve(validateJson(schema, json));
} catch (error) {
reject(error);
}
}

// We're registering a function so we can unsubscribe once we got what we want.
page.on("response", onPageResponse);
});
};

export const interceptReconResponse = async (
page: Page
): Promise<ReconResponse> => {
const reconResponse = await interceptResponse(
page,
reconResponseSchema,
"/web/api/angebote/recon"
);
console.log(`🌐 Browser has intercepted Recon response.`);
return reconResponse;
};

export const interceptVerbindungResponse = async (
page: Page,
vbid: string
): Promise<VerbindungResponse> => {
const verbindungResponse = await interceptResponse(
page,
VerbindungResponseSchema,
"/web/api/angebote/verbindung/" + vbid
);
console.log(`🌐 Browser has intercepted Verbindung response.`);
return verbindungResponse;
};

/**
* Gets Recon and Verbindung responses by navigating to a bahn.de booking URL with a browser.
* @param vbid - The journey ID to retrieve data for.
* @returns A tuple containing the recon and verbindungen responses.
*/
export const getReconAndVerbindungenBrowserResponses = async (
vbid: string
): Promise<[ReconResponse, VerbindungResponse]> => {
const urlToVisit = `https://www.bahn.de/buchung/start?vbid=${vbid}`;
const [browser, page] = await setupBrowserAndPage();

console.log(`🌐 Browser is visiting "${urlToVisit}".`);

let reconResponse: ReconResponse;
let verbindungenResponse: VerbindungResponse;
try {
[reconResponse, verbindungenResponse] = await Promise.all([
interceptReconResponse(page),
interceptVerbindungResponse(page, vbid),
page.goto(urlToVisit, {
waitUntil: "networkidle0",
}),
]);
} catch (error) {
console.error(`❌ Error during browser operations:`, error);
throw error;
} finally {
// Ensure the browser is closed even if an error occurs
await browser.close();
console.log(`🌐 Browser was closed.`);
}

return [reconResponse, verbindungenResponse];
};
161 changes: 82 additions & 79 deletions app/api/parse-url/route.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { fetchAndValidateJson } from "@/utils/fetchAndValidateJson";
import { parseHinfahrtReconWithAPI } from "@/utils/parseHinfahrtRecon";
import { vbidSchema } from "@/utils/schemas";
import { ReconResponse, VerbindungResponse } from "@/utils/schemas";
import type { ExtractedData } from "@/utils/types";
import { apiErrorHandler } from "../_lib/error-handler";
import { getReconAndVerbindungenBrowserResponses } from "../_lib/browser";

// POST-Route für URL-Parsing
const handler = async (request: Request) => {
Expand All @@ -15,13 +14,26 @@ const handler = async (request: Request) => {
{ status: 400 }
);
}
const vbid = new URL(url).searchParams.get("vbid");
if (!vbid) {
return Response.json(
{ error: "URL is missing required vbid parameter" },
{ status: 400 }
);
}

const journeyDetails = extractJourneyDetails(
await getResolvedUrlBrowserless(url)
);

if ("error" in journeyDetails) {
return Response.json({ error: journeyDetails.error });
let journeyDetails: ExtractedData;
try {
journeyDetails = await extractJourneyDetailsByVbid(vbid);
} catch (error) {
console.error(
`❌ Error extracting journey details by vbid ${vbid}:`,
error
);
return Response.json(
{ error: "Failed to extract journey details." },
{ status: 500 }
);
}

if (!journeyDetails.fromStationId || !journeyDetails.toStationId) {
Expand Down Expand Up @@ -78,56 +90,57 @@ const parseDateTime = (value: string | null) => {
return { date: value };
};

function extractJourneyDetails(url: string) {
try {
const urlObj = new URL(url);
const hash = urlObj.hash;

const details: ExtractedData = {
fromStation: null,
fromStationId: null,
toStation: null,
toStationId: null,
date: null,
time: null,
class: null,
};

// Extract from hash parameters (consistent approach)
const params = new URLSearchParams(hash.replace("#", ""));

const soidValue = params.get("soid");
const zoidValue = params.get("zoid");
const dateValue = params.get("hd");
const timeValue = params.get("ht");
const classValue = params.get("kl");

if (soidValue) {
details.fromStationId = extractStationId(soidValue);
details.fromStation = extractStationName(soidValue);
}

if (zoidValue) {
details.toStationId = extractStationId(zoidValue);
details.toStation = extractStationName(zoidValue);
}

// Handle date/time extraction
const dateTimeInfo = parseDateTime(dateValue);
if (dateTimeInfo.date) details.date = dateTimeInfo.date;
if (dateTimeInfo.time && !details.time) details.time = dateTimeInfo.time;
if (timeValue && !details.time) details.time = timeValue;

if (classValue) details.class = parseInt(classValue, 10);

return details;
} catch (error) {
console.error("❌ Error extracting journey details:", error);
return {
error: "Failed to extract journey details",
details: (error as Error).message,
};
async function extractJourneyDetailsByVbid(
vbid: string
): Promise<ExtractedData> {
const [reconResponse, vbidResponse] =
await getReconAndVerbindungenBrowserResponses(vbid);
const journeySearchUrl = buildJourneySearchUrl(reconResponse, vbidResponse);
return extractJourneyDetails(journeySearchUrl);
}

function extractJourneyDetails(url: string): ExtractedData {
const urlObj = new URL(url);
const hash = urlObj.hash;

const details: ExtractedData = {
fromStation: null,
fromStationId: null,
toStation: null,
toStationId: null,
date: null,
time: null,
class: null,
};

// Extract from hash parameters (consistent approach)
const params = new URLSearchParams(hash.replace("#", ""));

const soidValue = params.get("soid");
const zoidValue = params.get("zoid");
const dateValue = params.get("hd");
const timeValue = params.get("ht");
const classValue = params.get("kl");

if (soidValue) {
details.fromStationId = extractStationId(soidValue);
details.fromStation = extractStationName(soidValue);
}

if (zoidValue) {
details.toStationId = extractStationId(zoidValue);
details.toStation = extractStationName(zoidValue);
}

// Handle date/time extraction
const dateTimeInfo = parseDateTime(dateValue);
if (dateTimeInfo.date) details.date = dateTimeInfo.date;
if (dateTimeInfo.time && !details.time) details.time = dateTimeInfo.time;
if (timeValue && !details.time) details.time = timeValue;

if (classValue) details.class = parseInt(classValue, 10);

return details;
}

function displayJourneyInfo(journeyDetails: ExtractedData) {
Expand All @@ -151,32 +164,22 @@ function displayJourneyInfo(journeyDetails: ExtractedData) {
console.log(formatInfo);
}

async function getResolvedUrlBrowserless(url: string) {
const vbid = new URL(url).searchParams.get("vbid");

if (!vbid) {
throw new Error("No vbid parameter found in URL");
}

const vbidRequest = await fetchAndValidateJson({
url: `https://www.bahn.de/web/api/angebote/verbindung/${vbid}`,
schema: vbidSchema,
});

const cookies = vbidRequest.response.headers.getSetCookie();
const { data } = await parseHinfahrtReconWithAPI(vbidRequest.data, cookies);

function buildJourneySearchUrl(
reconResponse: ReconResponse,
verbindungenResponse: VerbindungResponse
): string {
const newUrl = new URL("https://www.bahn.de/buchung/fahrplan/suche");

// Use hash parameters for consistency with DB URLs
const hashParams = new URLSearchParams();

// Find first segment with halte data for start station
const firstSegmentWithHalte = data.verbindungen[0].verbindungsAbschnitte.find(
(segment) => segment.halte.length > 0
);
const firstSegmentWithHalte =
reconResponse.verbindungen[0].verbindungsAbschnitte.find(
(segment) => segment.halte.length > 0
);
const lastSegmentWithHalte =
data.verbindungen[0].verbindungsAbschnitte.findLast(
reconResponse.verbindungen[0].verbindungsAbschnitte.findLast(
(segment) => segment.halte.length > 0
);

Expand All @@ -188,8 +191,8 @@ async function getResolvedUrlBrowserless(url: string) {
hashParams.set("zoid", lastSegmentWithHalte.halte.at(-1)!.id);

// Add date information from the booking
if (vbidRequest.data.hinfahrtDatum) {
hashParams.set("hd", vbidRequest.data.hinfahrtDatum);
if (verbindungenResponse.hinfahrtDatum) {
hashParams.set("hd", verbindungenResponse.hinfahrtDatum);
}

newUrl.hash = hashParams.toString();
Expand Down
1 change: 1 addition & 0 deletions next-env.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/// <reference types="next" />
/// <reference types="next/image-types/global" />
/// <reference path="./.next/types/routes.d.ts" />
Copy link
Contributor Author

@alexanderroidl alexanderroidl Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why Next.js automatically added this. 🤷

Somebody please double-check this for correctness.


// NOTE: This file should not be edited
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
5 changes: 3 additions & 2 deletions next.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ module.exports = {
// Dies erstellt eine eigenständige Version der App mit allen Abhängigkeiten
output: "standalone",
typescript: {
ignoreBuildErrors: true // temporarily, since some type errors still exists and are ambiguous
}
ignoreBuildErrors: true, // temporarily, since some type errors still exists and are ambiguous
},
serverExternalPackages: ["puppeteer-extra", "puppeteer-extra-plugin-stealth"],
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those must be stated as external to avoid bundling.

};
Loading