From 82da29572b1885e7f5caeb59bad30ac37a22c3ac Mon Sep 17 00:00:00 2001 From: Kayra Date: Thu, 26 Sep 2024 19:57:00 +0300 Subject: [PATCH 1/8] Update regex.py --- app/utils/regex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/utils/regex.py b/app/utils/regex.py index ee0cf12..ef1f548 100644 --- a/app/utils/regex.py +++ b/app/utils/regex.py @@ -2,5 +2,5 @@ REGEX_MEMBERS_DATE: str = r"\(Score: (?P.+)\)" REGEX_BG_COLOR: str = r"background-color:(?P.+);" REGEX_CHART_CLUB_ID: str = r"(?P\d+)" -REGEX_COUNTRY_ID: str = r"(?P\d)" +REGEX_COUNTRY_ID: str = r"(?P\d+)" REGEX_DOB_AGE: str = r"^(?P\w{3} \d{1,2}, \d{4}) \((?P\d{2})\)" From df5188493d03ba15859539f28bd66b95ab759747 Mon Sep 17 00:00:00 2001 From: Kayra Date: Thu, 26 Sep 2024 19:57:49 +0300 Subject: [PATCH 2/8] Update xpath.py --- app/utils/xpath.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/utils/xpath.py b/app/utils/xpath.py index 5257b34..bfe8ff5 100644 --- a/app/utils/xpath.py +++ b/app/utils/xpath.py @@ -124,6 +124,8 @@ class Profile: LEAGUE_COUNTRY_ID = "//div[@class='data-header__club-info']//img[contains(@class, 'flaggenrahmen')]//@data-src" LEAGUE_COUNTRY_NAME = "//div[@class='data-header__club-info']//img[contains(@class, 'flaggenrahmen')]//@title" LEAGUE_TIER = "//div[@class='data-header__club-info']//strong//text()//following::span[1]/a/text()[2]" + LEAGUE_POSITION = "//div[@class='data-header__club-info']//following::span[3]//text()//following::span[1]/a/text()[1]" + IN_LEAGUE_SINCE = "//div[@class='data-header__club-info']//following::span[4]//text()//following::span[1]/a/text()[1]" CRESTS_HISTORICAL = "//div[@class='wappen-datenfakten-wappen']//@src" class Search: From dea5afef5f84f0223c60af68ed048bba78574b8a Mon Sep 17 00:00:00 2001 From: Kayra Date: Thu, 26 Sep 2024 19:59:03 +0300 Subject: [PATCH 3/8] Update profile.py --- app/services/clubs/profile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/services/clubs/profile.py b/app/services/clubs/profile.py index 9a63f6f..772be11 100644 --- a/app/services/clubs/profile.py +++ b/app/services/clubs/profile.py @@ -83,6 +83,8 @@ def get_club_profile(self) -> dict: "countryID": safe_regex(self.get_text_by_xpath(Clubs.Profile.LEAGUE_COUNTRY_ID), REGEX_COUNTRY_ID, "id"), "countryName": self.get_text_by_xpath(Clubs.Profile.LEAGUE_COUNTRY_NAME), "tier": self.get_text_by_xpath(Clubs.Profile.LEAGUE_TIER), + "position": self.get_text_by_xpath(Clubs.Profile.LEAGUE_POSITION), + "inLeagueSince": self.get_text_by_xpath(Clubs.Profile.IN_LEAGUE_SINCE), } self.response["historicalCrests"] = [ safe_split(crest, "?")[0] for crest in self.get_list_by_xpath(Clubs.Profile.CRESTS_HISTORICAL) From f0cf82484ed5df77958f24b707a7ca8a40539183 Mon Sep 17 00:00:00 2001 From: Kayra Date: Sun, 6 Oct 2024 15:50:13 +0300 Subject: [PATCH 4/8] Add files via upload --- app/api/endpoints/clubs.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app/api/endpoints/clubs.py b/app/api/endpoints/clubs.py index 642a461..abb8eff 100644 --- a/app/api/endpoints/clubs.py +++ b/app/api/endpoints/clubs.py @@ -28,3 +28,10 @@ def get_club_players(club_id: str, season_id: Optional[str] = None) -> dict: tfmkt = TransfermarktClubPlayers(club_id=club_id, season_id=season_id) club_players = tfmkt.get_club_players() return club_players + + +@router.get("/{club_id}/staffs") +def get_club_staffs(club_id: str) -> dict: + tfmkt = TransfermarktClubPlayers(club_id=club_id) + club_staffs = tfmkt.get_club_staffs() + return club_staffs From 6ff24c8c1f243ffcaf8004767e52deb40210fd6e Mon Sep 17 00:00:00 2001 From: Kayra Date: Sun, 6 Oct 2024 15:50:44 +0300 Subject: [PATCH 5/8] Staff Update --- app/services/clubs/staff.py | 82 +++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 app/services/clubs/staff.py diff --git a/app/services/clubs/staff.py b/app/services/clubs/staff.py new file mode 100644 index 0000000..95a740f --- /dev/null +++ b/app/services/clubs/staff.py @@ -0,0 +1,82 @@ +from dataclasses import dataclass +from datetime import datetime + +from app.services.base import TransfermarktBase +from app.utils.regex import REGEX_STAFF_ID +from app.utils.utils import clean_response, extract_from_url, safe_regex +from app.utils.xpath import Clubs + + +@dataclass +class TransfermarktClubStaffs(TransfermarktBase): + """ + A class for retrieving and parsing the staff members of a football club from Transfermarkt. + + Args: + club_id (str): The unique identifier of the football club. + URL (str): The URL template for the club's staff page on Transfermarkt. + """ + + club_id: str = None + URL: str = "https://www.transfermarkt.us/-/mitarbeiter/verein/{club_id}/plus/1" + + def __post_init__(self) -> None: + """Initialize the TransfermarktClubStaffs class.""" + self.URL = self.URL.format(club_id=self.club_id) + self.page = self.request_url_page() + self.raise_exception_if_not_found(xpath=Clubs.Staff.CLUB_NAME) + + def __parse_club_staffs(self) -> list[dict]: + """ + Parse staff information from the webpage and return a list of dictionaries, each representing a staff member. + + Returns: + list[dict]: A list of staff information dictionaries. + """ + staffs_ids = [ + safe_regex(ids, REGEX_STAFF_ID, "id") for ids in self.get_list_by_xpath(Clubs.Staff.ID) + ] + staffs_names = self.page.xpath(Clubs.Staff.NAME) + staffs_jobs = self.page.xpath(Clubs.Players.PAGE_INFOS) + staffs_ages = self.page.xpath(Clubs.Staff.AGE) + staffs_nationalities = self.page.xpath(Clubs.Staff.NATIONALITIES) + staffs_appointed = self.page.xpath(Clubs.Staff.APPOINTED) + staffs_contracts = self.page.xpath(Clubs.Staff.CONTRACT) + staffs_last_club = self.page.xpath(Clubs.Staff.LAST_CLUB) + + return [ + { + "id": idx, + "name": name, + "job": job, + "age": age, + "nationality": nationality, + "appointed": appointed, + "contract": contract, + "lastClub": last_club, + } + for idx, name, job, age, nationality, appointed, contract, last_club, in zip( # noqa: E501 + staffs_ids, + staffs_names, + staffs_jobs, + staffs_ages, + staffs_nationalities, + staffs_appointed, + staffs_contracts, + staffs_last_club, + ) + ] + + def get_club_staffs(self) -> dict: + """ + Retrieve and parse staff information for the specified football club. + + Returns: + dict: A dictionary containing the club's unique identifier, staff information, and the timestamp of when + the data was last updated. + """ + self.response["id"] = self.club_id + self.response["staffs"] = self.__parse_club_staffs() + self.response["updatedAt"] = datetime.now() + + return clean_response(self.response) From e74c196b82fad0b0e28357037c1d6fc0712f34bc Mon Sep 17 00:00:00 2001 From: Kayra Date: Sun, 6 Oct 2024 15:51:18 +0300 Subject: [PATCH 6/8] Staff Update --- app/utils/regex.py | 1 + app/utils/xpath.py | 31 ++++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/app/utils/regex.py b/app/utils/regex.py index ef1f548..fed234c 100644 --- a/app/utils/regex.py +++ b/app/utils/regex.py @@ -4,3 +4,4 @@ REGEX_CHART_CLUB_ID: str = r"(?P\d+)" REGEX_COUNTRY_ID: str = r"(?P\d+)" REGEX_DOB_AGE: str = r"^(?P\w{3} \d{1,2}, \d{4}) \((?P\d{2})\)" +REGEX_STAFF_ID: str = r'/small/(\d+)-' \ No newline at end of file diff --git a/app/utils/xpath.py b/app/utils/xpath.py index bfe8ff5..0964e88 100644 --- a/app/utils/xpath.py +++ b/app/utils/xpath.py @@ -92,6 +92,17 @@ class Achievements: class Clubs: + class Staff: + CLUB_NAME = "//header//h1//text()" + ID = "//div[@class='large-12 columns']//table[@class='inline-table']//td[@rowspan='2']/img/@src" + NAME = "//div[@class='large-12 columns']//table[@class='inline-table']//td[@class='hauptlink']/a/text()" + JOB = "//div[@class='large-12 columns']//table[@class='inline-table']//td[@class='hauptlink']/following-sibling::tr[1]/td/text()" + AGE = "//div[@class='large-12 columns']//td[@class='zentriert'][2]/text()" + NATIONALITIES = "//div[@class='large-12 columns']//td[@class='zentriert'][3]/img/@title" + APPOINTED = "//div[@class='large-12 columns']//td[@class='zentriert'][4]/text()" + CONTRACT = "//div[@class='large-12 columns']//td[@class='zentriert'][5]/text()" + LAST_CLUB = "//div[@class='large-12 columns']//td[@class='zentriert'][6]/a/@title" + class Profile: URL = "//div[@class='datenfakten-wappen']//@href" NAME = "//header//h1//text()" @@ -142,9 +153,9 @@ class Players: CLUB_URL = "//li[@id='overview']//@href" PAGE_NATIONALITIES = "//td[img[@class='flaggenrahmen']]" PAGE_INFOS = "//td[@class='posrela']" - NAMES = "//td[@class='posrela']//a//text()" + NAMES = "//table[@class='inline-table']//a//text()" URLS = "//td[@class='hauptlink']//@href" - POSITIONS = "//td[@class='posrela']//tr[2]//text()" + POSITIONS = "//table[@class='inline-table']//tr[2]//text()" DOB_AGE = "//div[@id='yw1']//td[3]//text()" NATIONALITIES = ".//img//@title" JOINED = ".//span/node()/@title" @@ -170,6 +181,20 @@ class Past: class Competitions: class Profile: + LEAGUE_COUNTRY_NAME = "//div[@class='data-header__club-info']//following::span[1]/a/text()[1]" + LEAGUE_COUNTRY_ID = "//div[@class='data-header__club-info']//following::span[1]/a//@href" + LEAGUE_TIER = "//div[@class='data-header__club-info']//following::span[2]//span[2]/text()" + REIGNING_CHAMPION = "//div[@class='data-header__club-info']//following::span[3]//text()//following::span[1]/a/text()[1]" + RECORD_HOLDING_CHAMPION = "//div[@class='data-header__club-info']//following::span[4]//text()//following::a/text()[1]" + UEFA_COEFFICIENT_POSITION = "//div[@class='data-header__club-info']//following::span[5]//text()//following::a/text()[1]" + UEFA_COEFFICIENT_POINT = "//div[@class='data-header__club-info']//following::span[5]//text()//following::span[1]//following::span[1]/text()[1]" + NUMBER_OF_TEAMS = "//div[@class='data-header__info-box']//div[@class='data-header__details']//li[1]/span/text()" + NUMBER_OF_PLAYERS = "//div[@class='data-header__info-box']//div[@class='data-header__details']//li[2]/span/text()" + NUMBER_OF_FOREIGNERS = "//div[@class='data-header__info-box']//div[@class='data-header__details']//li[3]/span/a/text()" + TOTAL_MARKET_VALUE = "//div[@class='data-header__box--small']/text()[2]" + TOTAL_MARKET_VALUE_UNIT = "//div[@class='data-header__box--small']//span[@class='waehrung'][2]/text()" + AVERAGE_MARKET_VALUE = "//div[@class='data-header__info-box']//div[@class='data-header__details']//ul[2]//li[1]/span/text()" + AVERAGE_AGE = "//div[@class='data-header__info-box']//div[@class='data-header__details']//ul[2]//li[2]/span/text()" URL = "//a[@class='tm-tab']//@href" NAME = "//div[@class='data-header__headline-container']//h1//text()" @@ -191,4 +216,4 @@ class Clubs: class Pagination: PAGE_NUMBER_LAST = "//li[contains(@class, 'list-item--icon-last-page')]//@href" - PAGE_NUMBER_ACTIVE = "//li[contains(@class, 'list-item--active')]//@href" + PAGE_NUMBER_ACTIVE = "//li[contains(@class, 'list-item--active')]//@href" \ No newline at end of file From facc94e2853b4c2a20cb483037ea943f35c3f9a1 Mon Sep 17 00:00:00 2001 From: Kayra Date: Sun, 6 Oct 2024 17:13:04 +0300 Subject: [PATCH 7/8] Update staff.py --- app/services/clubs/staff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/clubs/staff.py b/app/services/clubs/staff.py index 95a740f..7466988 100644 --- a/app/services/clubs/staff.py +++ b/app/services/clubs/staff.py @@ -55,7 +55,7 @@ def __parse_club_staffs(self) -> list[dict]: "contract": contract, "lastClub": last_club, } - for idx, name, job, age, nationality, appointed, contract, last_club, in zip( # noqa: E501 + for idx, name, job, age, nationality, appointed, contract, last_club in zip( # noqa: E501 staffs_ids, staffs_names, staffs_jobs, From beeeade85eefe14b1d9b28690af0670a53ab265e Mon Sep 17 00:00:00 2001 From: Kayra Date: Sun, 6 Oct 2024 19:35:23 +0300 Subject: [PATCH 8/8] Update staff.py --- app/services/clubs/staff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/clubs/staff.py b/app/services/clubs/staff.py index 7466988..742e130 100644 --- a/app/services/clubs/staff.py +++ b/app/services/clubs/staff.py @@ -37,7 +37,7 @@ def __parse_club_staffs(self) -> list[dict]: safe_regex(ids, REGEX_STAFF_ID, "id") for ids in self.get_list_by_xpath(Clubs.Staff.ID) ] staffs_names = self.page.xpath(Clubs.Staff.NAME) - staffs_jobs = self.page.xpath(Clubs.Players.PAGE_INFOS) + staffs_jobs = self.page.xpath(Clubs.Staff.JOB) staffs_ages = self.page.xpath(Clubs.Staff.AGE) staffs_nationalities = self.page.xpath(Clubs.Staff.NATIONALITIES) staffs_appointed = self.page.xpath(Clubs.Staff.APPOINTED)