From f5d8f56330ea5455ae2e420811885336917f675d Mon Sep 17 00:00:00 2001 From: Christoph Urlacher Date: Sat, 7 Jun 2025 20:34:02 +0200 Subject: [PATCH] Lib: Implement starting grid scraper --- src/lib/fetch.ts | 14 ++++++++++++ src/lib/schema.ts | 8 +++++++ src/lib/server/scrape.ts | 49 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/lib/fetch.ts b/src/lib/fetch.ts index 8f32779..3e43828 100644 --- a/src/lib/fetch.ts +++ b/src/lib/fetch.ts @@ -13,6 +13,7 @@ import type { RaceResult, ScrapedDriverStanding, ScrapedRaceResult, + ScrapedStartingGrid, ScrapedTeamStanding, SeasonPick, SeasonPickedUser, @@ -333,6 +334,19 @@ export const fetch_scraped_teamstandings = async ( return scraped_teamstandings; }; +/** + * Fetch all [ScrapedStartingGrids] from the database, ordered descendingly by race step. + */ +export const fetch_scraped_startinggrids = async ( + fetch: (_: any) => Promise, +): Promise => { + const scraped_startinggrids: ScrapedStartingGrid[] = await pb + .collection("scraped_startinggrids") + .getFullList({ fetch: fetch, sort: "-race_step,+position" }); + + return scraped_startinggrids; +}; + /** * Fetch all [ScrapedRaceResults] from the database, ordered descendingly by race step. */ diff --git a/src/lib/schema.ts b/src/lib/schema.ts index e088425..c83ec81 100644 --- a/src/lib/schema.ts +++ b/src/lib/schema.ts @@ -159,6 +159,14 @@ export interface RacePickPointsTotal { // Scraped Data +export interface ScrapedStartingGrid { + id: string; + race_step: number; // This maps to races + driver_code: string; // This maps to drivers + position: number; + time: string; +} + export interface ScrapedRaceResult { id: string; race_step: number; // This maps to races diff --git a/src/lib/server/scrape.ts b/src/lib/server/scrape.ts index 94b1d11..1f7194f 100644 --- a/src/lib/server/scrape.ts +++ b/src/lib/server/scrape.ts @@ -1,4 +1,9 @@ -import type { ScrapedDriverStanding, ScrapedRaceResult, ScrapedTeamStanding } from "$lib/schema"; +import type { + ScrapedDriverStanding, + ScrapedStartingGrid, + ScrapedRaceResult, + ScrapedTeamStanding, +} from "$lib/schema"; import * as cheerio from "cheerio"; // TODO: Validate the generated stuff @@ -26,6 +31,48 @@ export const scrape_race_links = async (): Promise => { return race_links; }; +/** + * Returns a list of [ScrapedStartingGrids] for all races contained in [race_links], + * based on official f1.com data. + */ +export const scrape_starting_grids = async ( + race_links: string[], +): Promise => { + // Update the race_links to point to the qualifications + const starting_grid_links: string[] = race_links.map((link: string) => + link.replace("/race-result", "/starting-grid"), + ); + + const starting_grids: ScrapedStartingGrid[] = []; + await Promise.all( + starting_grid_links.map(async (link: string, index: number) => { + console.log(`Fetching qualifying results from ${base_url}/${link}...`); + const starting_grids_response = await fetch(`${base_url}/${link}`); + const starting_grids_text = await starting_grids_response.text(); + + const $ = cheerio.load(starting_grids_text); + + // Obtain the positions for this starting grid for each driver + $("tbody > tr", "div.f1-inner-wrapper table.f1-table").each((driver_index, element) => { + const $$ = cheerio.load(element); + + let result: ScrapedStartingGrid = { + id: "", + race_step: index + 1, + driver_code: $$("td:nth-child(3) > p > span:last-child").text(), + position: driver_index + 1, // parseInt($$("td:nth-child(1) > p").text()), + time: $$("td:nth-child(5) > p").text(), + }; + + starting_grids.push(result); + }); + }), + ); + console.log(`Scraped ${starting_grids.length} starting grids...`); + + return starting_grids; +}; + /** * Returns a list of [ScrapedRaceResults] for all races contained in [race_links], * based on official f1.com data.