Files
svelte-formula11/src/lib/server/scrape.ts
Christoph Urlacher d31c6b6735
All checks were successful
Build Formula11 Docker Image / pocketbase-docker (push) Successful in 55s
Lib: Fix f1.com scraping after the keks changed their site
2025-06-21 22:35:06 +02:00

182 lines
5.9 KiB
TypeScript

import type {
ScrapedDriverStanding,
ScrapedStartingGrid,
ScrapedRaceResult,
ScrapedTeamStanding,
} from "$lib/schema";
import * as cheerio from "cheerio";
// TODO: Validate the generated stuff
export const base_url: string = "https://www.formula1.com/en/results/2025";
/**
* Returns a list of links to all past races of the season,
* based on official f1.com data.
*/
export const scrape_race_links = async (): Promise<string[]> => {
const races_response = await fetch(`${base_url}/races`);
const races_text = await races_response.text();
const $ = cheerio.load(races_text);
const race_links: string[] = [];
$("tbody > tr > td:first-child > p > a[href]", "table.f1-table").each((_, element) => {
const href: string = element.attribs["href"];
// Keks changed the link format, cut off the start
const substring: string = href.replace("/../../en/results/2025/", "");
race_links.push(substring);
});
console.log(`Found ${race_links.length} races...`);
console.log(race_links);
return race_links;
};
/**
* Returns a list of [ScrapedStartingGrids] for all races contained in [race_links],
* based on official f1.com data.
*/
export const scrape_starting_grids = async (
race_links: string[],
): Promise<ScrapedStartingGrid[]> => {
// Update the race_links to point to the qualifications
const starting_grid_links: string[] = race_links.map((link: string) =>
link.replace("/race-result", "/starting-grid"),
);
const starting_grids: ScrapedStartingGrid[] = [];
await Promise.all(
starting_grid_links.map(async (link: string, index: number) => {
console.log(`Fetching qualifying results from ${base_url}/${link}...`);
const starting_grids_response = await fetch(`${base_url}/${link}`);
const starting_grids_text = await starting_grids_response.text();
const $ = cheerio.load(starting_grids_text);
// Obtain the positions for this starting grid for each driver
$("tbody > tr", "table.f1-table").each((driver_index, element) => {
const $$ = cheerio.load(element);
let result: ScrapedStartingGrid = {
id: "",
race_step: index + 1,
driver_code: $$(
"td:nth-child(3) > p > span:first-child > span:last-child > span:last-child",
).text(),
position: driver_index + 1, // parseInt($$("td:nth-child(1) > p").text()),
time: $$("td:nth-child(5) > p").text(),
};
starting_grids.push(result);
});
}),
);
console.log(`Scraped ${starting_grids.length} starting grids...`);
// console.log(starting_grids);
return starting_grids;
};
/**
* Returns a list of [ScrapedRaceResults] for all races contained in [race_links],
* based on official f1.com data.
*/
export const scrape_race_results = async (race_links: string[]): Promise<ScrapedRaceResult[]> => {
const race_results: ScrapedRaceResult[] = [];
await Promise.all(
race_links.map(async (link: string, index: number) => {
console.log(`Fetching race results from ${base_url}/${link}...`);
const race_response = await fetch(`${base_url}/${link}`);
const race_text = await race_response.text();
const $ = cheerio.load(race_text);
// Obtain the results for this race for each driver
$("tbody > tr", "table.f1-table").each((driver_index, element) => {
const $$ = cheerio.load(element);
let result: ScrapedRaceResult = {
id: "",
race_step: index + 1,
driver_code: $$(
"td:nth-child(3) > p > span:first-child > span:last-child > span:last-child",
).text(),
position: driver_index + 1, // parseInt($$("td:nth-child(1) > p").text()),
status: $$("td:nth-child(6) > p").text(),
points: parseInt($$("td:nth-child(7) > p").text()),
};
// DSQ'd/DNF'd drivers have NaN positions
// if (Number.isNaN(result.position)) {
// result.position = driver_index;
// }
race_results.push(result);
});
}),
);
console.log(`Scraped ${race_results.length} race results...`);
// console.log(race_results);
return race_results;
};
/**
* Returns a list of [ScrapedDriverStandings], based on official f1.com data.
*/
export const scrape_driver_standings = async (): Promise<ScrapedDriverStanding[]> => {
const standings_response = await fetch(`${base_url}/drivers`);
const standings_text = await standings_response.text();
const $ = cheerio.load(standings_text);
const driver_standings: ScrapedDriverStanding[] = [];
$("tbody > tr", "table.f1-table").each((driver_index, element) => {
const $$ = cheerio.load(element);
let standing: ScrapedDriverStanding = {
id: "",
driver_code: $$("td:nth-child(2) > p > a > span:last-child > span:last-child").text(),
position: driver_index + 1,
points: parseInt($$("td:nth-child(5) > p").text()),
};
driver_standings.push(standing);
});
console.log(`Scraped ${driver_standings.length} driver standings...`);
// console.log(driver_standings);
return driver_standings;
};
/**
* Returns a list of [ScrapedTeamStandings], based on official f1.com data.
*/
export const scrape_team_standings = async (): Promise<ScrapedTeamStanding[]> => {
const standings_response = await fetch(`${base_url}/team`);
const standings_text = await standings_response.text();
const $ = cheerio.load(standings_text);
const team_standings: ScrapedTeamStanding[] = [];
$("tbody > tr", "table.f1-table").each((team_index, element) => {
const $$ = cheerio.load(element);
let standing: ScrapedTeamStanding = {
id: "",
team_fullname: $$("td:nth-child(2) > p > a").text(),
position: team_index + 1,
points: parseInt($$("td:nth-child(3) > p").text()),
};
team_standings.push(standing);
});
console.log(`Scraped ${team_standings.length} team standings...`);
// console.log(team_standings);
return team_standings;
};