import type { ScrapedDriverStanding, ScrapedStartingGrid, ScrapedRaceResult, ScrapedTeamStanding, } from "$lib/schema"; import * as cheerio from "cheerio"; // TODO: Validate the generated stuff export const base_url: string = "https://www.formula1.com/en/results/2025"; /** * Returns a list of links to all past races of the season, * based on official f1.com data. */ export const scrape_race_links = async (): Promise => { const races_response = await fetch(`${base_url}/races`); const races_text = await races_response.text(); const $ = cheerio.load(races_text); const race_links: string[] = []; $("tbody > tr > td:first-child > p > a[href]", "table.f1-table").each((_, element) => { const href: string = element.attribs["href"]; // Keks changed the link format, cut off the start const substring: string = href.replace("/../../en/results/2025/", ""); race_links.push(substring); }); console.log(`Found ${race_links.length} races...`); console.log(race_links); return race_links; }; /** * Returns a list of [ScrapedStartingGrids] for all races contained in [race_links], * based on official f1.com data. */ export const scrape_starting_grids = async ( race_links: string[], ): Promise => { // Update the race_links to point to the qualifications const starting_grid_links: string[] = race_links.map((link: string) => link.replace("/race-result", "/starting-grid"), ); const starting_grids: ScrapedStartingGrid[] = []; await Promise.all( starting_grid_links.map(async (link: string, index: number) => { console.log(`Fetching qualifying results from ${base_url}/${link}...`); const starting_grids_response = await fetch(`${base_url}/${link}`); const starting_grids_text = await starting_grids_response.text(); const $ = cheerio.load(starting_grids_text); // Obtain the positions for this starting grid for each driver $("tbody > tr", "table.f1-table").each((driver_index, element) => { const $$ = cheerio.load(element); let result: ScrapedStartingGrid = { id: "", race_step: index + 1, driver_code: $$( "td:nth-child(3) > p > span:first-child > span:last-child > span:last-child", ).text(), position: driver_index + 1, // parseInt($$("td:nth-child(1) > p").text()), time: $$("td:nth-child(5) > p").text(), }; starting_grids.push(result); }); }), ); console.log(`Scraped ${starting_grids.length} starting grids...`); // console.log(starting_grids); return starting_grids; }; /** * Returns a list of [ScrapedRaceResults] for all races contained in [race_links], * based on official f1.com data. */ export const scrape_race_results = async (race_links: string[]): Promise => { const race_results: ScrapedRaceResult[] = []; await Promise.all( race_links.map(async (link: string, index: number) => { console.log(`Fetching race results from ${base_url}/${link}...`); const race_response = await fetch(`${base_url}/${link}`); const race_text = await race_response.text(); const $ = cheerio.load(race_text); // Obtain the results for this race for each driver $("tbody > tr", "table.f1-table").each((driver_index, element) => { const $$ = cheerio.load(element); let result: ScrapedRaceResult = { id: "", race_step: index + 1, driver_code: $$( "td:nth-child(3) > p > span:first-child > span:last-child > span:last-child", ).text(), position: driver_index + 1, // parseInt($$("td:nth-child(1) > p").text()), status: $$("td:nth-child(6) > p").text(), points: parseInt($$("td:nth-child(7) > p").text()), }; // DSQ'd/DNF'd drivers have NaN positions // if (Number.isNaN(result.position)) { // result.position = driver_index; // } race_results.push(result); }); }), ); console.log(`Scraped ${race_results.length} race results...`); // console.log(race_results); return race_results; }; /** * Returns a list of [ScrapedDriverStandings], based on official f1.com data. */ export const scrape_driver_standings = async (): Promise => { const standings_response = await fetch(`${base_url}/drivers`); const standings_text = await standings_response.text(); const $ = cheerio.load(standings_text); const driver_standings: ScrapedDriverStanding[] = []; $("tbody > tr", "table.f1-table").each((driver_index, element) => { const $$ = cheerio.load(element); let standing: ScrapedDriverStanding = { id: "", driver_code: $$("td:nth-child(2) > p > a > span:last-child > span:last-child").text(), position: driver_index + 1, points: parseInt($$("td:nth-child(5) > p").text()), }; driver_standings.push(standing); }); console.log(`Scraped ${driver_standings.length} driver standings...`); // console.log(driver_standings); return driver_standings; }; /** * Returns a list of [ScrapedTeamStandings], based on official f1.com data. */ export const scrape_team_standings = async (): Promise => { const standings_response = await fetch(`${base_url}/team`); const standings_text = await standings_response.text(); const $ = cheerio.load(standings_text); const team_standings: ScrapedTeamStanding[] = []; $("tbody > tr", "table.f1-table").each((team_index, element) => { const $$ = cheerio.load(element); let standing: ScrapedTeamStanding = { id: "", team_fullname: $$("td:nth-child(2) > p > a").text(), position: team_index + 1, points: parseInt($$("td:nth-child(3) > p").text()), }; team_standings.push(standing); }); console.log(`Scraped ${team_standings.length} team standings...`); // console.log(team_standings); return team_standings; };