from bs4 import BeautifulSoup import os def get_main( page, config_data: dict, list_entries: tuple[str, str] ) -> list[tuple[str, str]]: base = config_data["base"] page.goto(list_entries[1]) entry_path = list_entries[0] os.makedirs(entry_path, mode=0o777, exist_ok=True) with open(os.path.join(entry_path, "main.html"), "w") as f: f.write(page.content()) html_content = page.content() # Parse the HTML content using BeautifulSoup soup = BeautifulSoup(html_content, "html.parser") # Find the table with class "nb list" table = soup.find("table", {"class": "tb750 rw-table rw-all sections"}) assert table is not None # Extract rows from the table body (tbody) tbody = table.find("tbody") rows = tbody.find_all("tr") # Extract the first and second column from all rows list_sub_entries: list[tuple[str, str]] = [] for row in rows: cells = row.find_all("td", {"class": "tbdata"}) if len(cells) > 1: link_tag = cells[0].find("a") assert link_tag is not None entry_name = cells[0].text.strip().replace(" ", "_") entry_link = f"{base}{link_tag.get("href")}" entry_status = cells[1].text.strip() entry_path = f"{entry_status}_{entry_name}" list_sub_entries.append((entry_path, entry_link)) return list_sub_entries