Skip to content

Commit

Permalink
httpx http2 and headers WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
Commandcracker committed Jun 4, 2024
1 parent 7277b19 commit 187e1ff
Show file tree
Hide file tree
Showing 19 changed files with 370 additions and 41 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ maintainers = [{name="Commandcracker"}]
license = {file = "LICENSE.txt"}
readme = "README.md"
dependencies = [
"textual>=0.63.3",
"textual>=0.64.0",
"beautifulsoup4>=4.12.3",
"httpx>=0.27.0",
"httpx[http2]>=0.27.0",
"pypresence>=4.3.0",
"packaging>=24.0",
"platformdirs>=4.2.2",
Expand Down
4 changes: 2 additions & 2 deletions src/gucken/aniskip.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from dataclasses import dataclass

from fuzzywuzzy import process
from httpx import AsyncClient

from .networking import AsyncClient
from .tracker.myanimelist import search
from .rome import replace_roman_numerals

Expand All @@ -20,7 +20,7 @@ class SkipTimes:
async def get_timings_from_id(
anime_id: int, episode_number: int
) -> Union[SkipTimes, None]:
async with (AsyncClient(verify=False) as client):
async with AsyncClient() as client:
response = await client.get(
f"https://api.aniskip.com/v1/skip-times/{anime_id}/{episode_number}?types=op&types=ed"
)
Expand Down
2 changes: 1 addition & 1 deletion src/gucken/gucken.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ async def disable_RPC(self):

# TODO: https://textual.textualize.io/guide/workers/#thread-workers
# TODO: Exit on error when debug = true
@work(exclusive=True, exit_on_error=False)
@work(exclusive=True) #exit_on_error=False
async def lookup_anime(self, keyword: str) -> None:
search_providers = []
if self.query_one("#aniworld_to", Checkbox).value:
Expand Down
5 changes: 3 additions & 2 deletions src/gucken/hoster/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from abc import abstractmethod
from dataclasses import dataclass

from httpx import AsyncClient, HTTPError
from ..networking import AsyncClient
from httpx import HTTPError


@dataclass
Expand All @@ -13,7 +14,7 @@ async def check_is_working(self) -> bool:
try:
async with AsyncClient(verify=False) as client:
response = await client.head(
self.url, follow_redirects=True, headers=self.headers
self.url, headers=self.headers
)
return response.is_success
except HTTPError:
Expand Down
3 changes: 1 addition & 2 deletions src/gucken/hoster/doodstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from time import time
from urllib.parse import urlparse

from httpx import AsyncClient

from ..networking import AsyncClient
from .common import DirectLink, Hoster

EXTRACT_DOODSTREAM_HLS_PATTERN = re_compile(r"/pass_md5/[\w-]+/[\w-]+")
Expand Down
5 changes: 2 additions & 3 deletions src/gucken/hoster/streamtape.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from re import compile as re_compile

from httpx import AsyncClient

from ..networking import AsyncClient
from .common import DirectLink, Hoster

STREAMTAPE_PATTERN = re_compile(r"botlink(.*?)innerHTML(.*?)\);")
Expand All @@ -13,7 +12,7 @@ class StreamtapeHoster(Hoster):
async def get_direct_link(self) -> DirectLink:
# TODO: Error checking
async with AsyncClient(verify=False) as client:
response = await client.get(self.url, follow_redirects=True)
response = await client.get(self.url)
# TODO: Save html and error in order to investigate
# with open("out.txt", "wb") as f:
# f.write(response.text.encode('utf-8'))
Expand Down
5 changes: 2 additions & 3 deletions src/gucken/hoster/veo.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from base64 import b64decode
from re import compile as re_compile

from httpx import AsyncClient

from ..networking import AsyncClient
from .common import DirectLink, Hoster

EXTRACT_VEO_HLS_PATTERN = re_compile(r"'hls': '(.*?)'")
Expand All @@ -11,7 +10,7 @@
class VOEHoster(Hoster):
async def get_direct_link(self) -> DirectLink:
async with AsyncClient(verify=False) as client:
response = await client.get(self.url, follow_redirects=True)
response = await client.get(self.url)
match_hls = EXTRACT_VEO_HLS_PATTERN.search(response.text)
hls_link = match_hls.group(1)
return DirectLink(b64decode(hls_link).decode())
4 changes: 2 additions & 2 deletions src/gucken/hoster/vidoza.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from re import compile as re_compile

from httpx import AsyncClient
from ..networking import AsyncClient

from .common import DirectLink, Hoster

Expand All @@ -13,6 +13,6 @@
class VidozaHoster(Hoster):
async def get_direct_link(self) -> DirectLink:
async with AsyncClient(verify=False) as client:
response = await client.get(self.url, follow_redirects=True)
response = await client.get(self.url)
match_hls = EXTRACT_VIDOZA_HLS_PATTERN.search(response.text)
return DirectLink(match_hls.group(1))
115 changes: 115 additions & 0 deletions src/gucken/networking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from enum import Enum
from json import loads
from pathlib import Path
from random import choice
from urllib.parse import urlparse

from httpx import AsyncClient as HttpxAsyncClient, Response, AsyncBaseTransport

from rich import print
from asyncio import run


# https://www.useragents.me/
# https://github.com/microlinkhq/top-user-agents/blob/master/src/index.json
# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/utils/networking.py
# TODO: generate and dict into ios android mac win etc
user_agents_path = Path(__file__).parent.joinpath("resources", "user_agents.json")
with open(user_agents_path, "r") as f:
user_agents_raw = f.read()
user_agents = loads(user_agents_raw)


class AsyncHTTPSRedirectTransport(AsyncBaseTransport):
async def handle_async_request(self, request) -> Response:
url = request.url.copy_with(scheme="https")
return Response(303, headers={"Location": str(url)})


class AcceptLanguage(Enum):
EN = 0
DE = 1


class AsyncClient(HttpxAsyncClient):
def __init__(
self,
*args,
http2: bool = True,
follow_redirects: bool = True,
auto_referer: bool = True,
https_only: bool = True,
accept_language: AcceptLanguage = AcceptLanguage.EN,
**kwargs
) -> None:
# verify=False
self.auto_referer = auto_referer
kwargs["http2"] = http2
kwargs["follow_redirects"] = follow_redirects

# aiodns / dnspython[doh]
# socksio - SOCKS proxy support. (Optional, with httpx[socks])

user_agent = choice(user_agents)
headers = {
# Add others
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
# "br" = "brotli" -> httpx[brotli]
# "zstd" -> httpx[zstd] wait for next release https://github.com/encode/httpx/pull/3139
# "Accept-Encoding": "gzip, deflate, br", httpx is covering this
# "Accept-Language": "en-us,en;q=0.5", see below
# "Host": "xxx", httpx is covering this
# "Sec-Ch-Ua-Platform": "macOS", # only on mac
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1", # Not on iphone
"Upgrade-Insecure-Requests": "1", # Not on iphone
"User-Agent": user_agent
# "X-Amzn-Trace-Id": "Root=1-xxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxx"
# TODO: More variation
}

if accept_language is accept_language.EN:
headers["Accept-Language"] = "en-us,en;q=0.5" # "en-US,en;q=0.9", "en-US"
elif accept_language is accept_language.DE:
headers["Accept-Language"] = choice([
"de-DE,de;q=0.9",
"de", # found on macos
"de-DE,de;q=0.9", # found on ios
"de-DE,de",
"de,en-US;q=0.7,en;q=0.3"
])
else:
raise Exception()

if kwargs.get("headers") is not None:
headers = {**kwargs.get("headers"), **headers}
kwargs["headers"] = headers

if https_only is True:
kwargs["mounts"] = {"http://": AsyncHTTPSRedirectTransport()}

super().__init__(*args, **kwargs)

async def request(self, *args, **kwargs) -> Response:
if self.auto_referer is True:
parsed_url = urlparse(args[1]) # maby use httpx.URL instead ?
base_url = f'{parsed_url.scheme}://{parsed_url.netloc}'
headers = {"Referer": base_url}
if kwargs.get("headers") is not None:
headers = {**kwargs.get("headers"), **headers}
kwargs["headers"] = headers
return await super().request(*args, **kwargs)


async def main():
async with AsyncClient() as client:
response = await client.get("https://httpbin.org/headers")
print(response.json())
async with HttpxAsyncClient() as client:
response = await client.get("https://httpbin.org/headers")
print(response.json())

if __name__ == "__main__":
run(main())
10 changes: 5 additions & 5 deletions src/gucken/provider/aniworld.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Union

from bs4 import BeautifulSoup
from httpx import AsyncClient
from ..networking import AsyncClient, AcceptLanguage

from ..hoster.doodstream import DoodstreamHoster
from ..hoster.streamtape import StreamtapeHoster
Expand Down Expand Up @@ -42,7 +42,7 @@ class AniWorldEpisode(Episode):
url: str

async def process_hoster(self) -> dict[Language, list[Hoster]]:
async with AsyncClient(verify=False) as client:
async with AsyncClient(accept_language=AcceptLanguage.DE) as client:
response = await client.get(
f"{self.url}/staffel-{self.season}/episode-{self.episode_number}"
)
Expand Down Expand Up @@ -131,7 +131,7 @@ class AniWorldProvider(Provider):
async def search(keyword: str) -> Union[list[AniWorldSearchResult], None]:
if keyword.strip() == "":
return None
async with AsyncClient(verify=False) as client:
async with AsyncClient(accept_language=AcceptLanguage.DE) as client:
response = await client.get(
f"https://{AniWorldProvider.host}/ajax/seriesSearch?keyword={keyword}"
)
Expand All @@ -153,7 +153,7 @@ async def search(keyword: str) -> Union[list[AniWorldSearchResult], None]:

@staticmethod
async def get_series(search_result: AniWorldSearchResult) -> AniWorldSeries:
async with AsyncClient(verify=False) as client:
async with AsyncClient(accept_language=AcceptLanguage.DE) as client:
response = await client.get(search_result.url)
soup = BeautifulSoup(response.text, "html.parser")

Expand Down Expand Up @@ -235,7 +235,7 @@ async def get_series(search_result: AniWorldSearchResult) -> AniWorldSeries:


async def get_episodes_from_url(staffel: int, url: str) -> list[Episode]:
async with AsyncClient(verify=False) as client:
async with AsyncClient(accept_language=AcceptLanguage.DE) as client:
response = await client.get(f"{url}/staffel-{staffel}")
return await get_episodes_from_page(staffel, url, response.text)

Expand Down
28 changes: 19 additions & 9 deletions src/gucken/provider/serienstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,24 @@
from typing import Union

from bs4 import BeautifulSoup
from httpx import AsyncClient

from ..networking import AcceptLanguage, AsyncClient
from ..hoster.doodstream import DoodstreamHoster
from ..hoster.streamtape import StreamtapeHoster
from ..hoster.veo import VOEHoster
from ..hoster.vidoza import VidozaHoster
from .common import Episode, Hoster, Language, Provider, SearchResult, Series

# TODO: Timeouts
# TODO: use base_url
# TODO: faster json
# TODO: reuse same client
# TODO: do serienstream resolve using mounts (remove veryfy fale from hosts)


headers = {"Host": "serienstream.to"}
extensions = {"sni_hostname": "serienstream.to"}


def provider_to_hoster(provider: str, url: str) -> Hoster:
if provider == "VOE":
Expand Down Expand Up @@ -42,9 +52,9 @@ class SerienStreamEpisode(Episode):
url: str

async def process_hoster(self) -> dict[Language, list[Hoster]]:
async with AsyncClient(verify=False) as client:
async with AsyncClient(accept_language=AcceptLanguage.DE) as client:
response = await client.get(
f"{self.url}/staffel-{self.season}/episode-{self.episode_number}"
f"{self.url}/staffel-{self.season}/episode-{self.episode_number}", headers=headers, extensions=extensions
)
soup = BeautifulSoup(response.text, "html.parser")
watch_episode = soup.find_all(
Expand Down Expand Up @@ -132,9 +142,9 @@ class SerienStreamProvider(Provider):
async def search(keyword: str) -> Union[list[SerienStreamSearchResult], None]:
if keyword.strip() == "":
return None
async with AsyncClient(verify=False) as client:
async with AsyncClient(accept_language=AcceptLanguage.DE) as client:
response = await client.get(
f"https://{SerienStreamProvider.host}/ajax/seriesSearch?keyword={keyword}"
f"https://{SerienStreamProvider.host}/ajax/seriesSearch?keyword={keyword}", headers=headers, extensions=extensions
)
results = response.json()
search_results = []
Expand All @@ -154,8 +164,8 @@ async def search(keyword: str) -> Union[list[SerienStreamSearchResult], None]:

@staticmethod
async def get_series(search_result: SerienStreamSearchResult) -> SerienStreamSeries:
async with AsyncClient(verify=False) as client:
response = await client.get(search_result.url)
async with AsyncClient(accept_language=AcceptLanguage.DE) as client:
response = await client.get(search_result.url, headers=headers, extensions=extensions)
soup = BeautifulSoup(response.text, "html.parser")

tags = []
Expand Down Expand Up @@ -236,8 +246,8 @@ async def get_series(search_result: SerienStreamSearchResult) -> SerienStreamSer


async def get_episodes_from_url(staffel: int, url: str) -> list[Episode]:
async with AsyncClient(verify=False) as client:
response = await client.get(f"{url}/staffel-{staffel}")
async with AsyncClient(accept_language=AcceptLanguage.DE) as client:
response = await client.get(f"{url}/staffel-{staffel}", headers=headers, extensions=extensions)
return await get_episodes_from_page(staffel, url, response.text)


Expand Down
3 changes: 3 additions & 0 deletions src/gucken/resources/default_settings.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,8 @@ serienstream_to = false
[settings.ui]
dark = true

#[settings.networking]
#doh = true

[meta]
version = "0.0.0"
File renamed without changes.
6 changes: 3 additions & 3 deletions src/gucken/tracker/anilist.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from httpx import AsyncClient
from ..networking import AsyncClient

SEARCH_QUERY = """
query ($id: Int, $page: Int, $perPage: Int, $search: String) {
Expand All @@ -24,9 +24,9 @@


async def search(keyword: str) -> dict:
async with AsyncClient(verify=False) as client:
async with AsyncClient() as client:
response = await client.post(
f"https://graphql.anilist.co",
"https://graphql.anilist.co",
headers={"Content-Type": "application/json"},
json={"query": SEARCH_QUERY, "variables": {"search": keyword}},
)
Expand Down
4 changes: 2 additions & 2 deletions src/gucken/tracker/myanimelist.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from httpx import AsyncClient
from ..networking import AsyncClient


async def search(keyword: str) -> dict:
async with AsyncClient(verify=False) as client:
async with AsyncClient() as client:
response = await client.get(
f"https://myanimelist.net/search/prefix.json?type=anime&keyword={keyword}"
)
Expand Down
Loading

0 comments on commit 187e1ff

Please sign in to comment.