Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[watchertv] Add extractor #9748

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2306,6 +2306,10 @@
WashingtonPostArticleIE,
)
from .wat import WatIE
from .watchertv import (
WatcherTVSeasonIE,
WatcherTVIE
)
from .wdr import (
WDRIE,
WDRPageIE,
Expand Down
196 changes: 102 additions & 94 deletions yt_dlp/extractor/dropout.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,83 +17,12 @@
)


class DropoutIE(InfoExtractor):
_LOGIN_URL = 'https://www.dropout.tv/login'
_NETRC_MACHINE = 'dropout'

_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P<id>[^/]+)/?$'
_TESTS = [
{
'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no',
'note': 'Episode in a series',
'md5': '5e000fdfd8d8fa46ff40456f1c2af04a',
'info_dict': {
'id': '738153',
'display_id': 'yes-or-no',
'ext': 'mp4',
'title': 'Yes or No',
'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?',
'release_date': '20200508',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg',
'series': 'Game Changer',
'season_number': 2,
'season': 'Season 2',
'episode_number': 6,
'episode': 'Yes or No',
'duration': 1180,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
},
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1',
'note': 'Episode in a series (missing release_date)',
'md5': '712caf7c191f1c47c8f1879520c2fa5c',
'info_dict': {
'id': '320562',
'display_id': 'episode-1',
'ext': 'mp4',
'title': 'The Beginning Begins',
'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg',
'series': 'Dimension 20: Fantasy High',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'The Beginning Begins',
'duration': 6838,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
},
{
'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special',
'note': 'Episode not in a series',
'md5': 'c30fa18999c5880d156339f13c953a26',
'info_dict': {
'id': '1915774',
'display_id': 'misfits-magic-holiday-special',
'ext': 'mp4',
'title': 'Misfits & Magic Holiday Special',
'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.',
'release_date': '20211215',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg',
'duration': 11698,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
}
]
class DropoutBaseIE(InfoExtractor):
_HOST = None
McSwindler marked this conversation as resolved.
Show resolved Hide resolved

def _get_authenticity_token(self, display_id):
signin_page = self._download_webpage(
self._LOGIN_URL, display_id, note='Getting authenticity token')
f'{self._HOST}/login', display_id, note='Getting authenticity token')
return self._html_search_regex(
r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']',
signin_page, 'authenticity_token')
Expand All @@ -104,7 +33,7 @@ def _login(self, display_id):
return True

response = self._download_webpage(
self._LOGIN_URL, display_id, note='Logging in', fatal=False,
f'{self._HOST}/login', display_id, note='Logging in', fatal=False,
data=urlencode_postdata({
'email': username,
'password': password,
Expand All @@ -125,7 +54,7 @@ def _real_extract(self, url):
display_id = self._match_id(url)

webpage = None
if self._get_cookies('https://www.dropout.tv').get('_session'):
if self._get_cookies(self._HOST).get('_session'):
webpage = self._download_webpage(url, display_id)
if not webpage or '<div id="watch-unauthorized"' in webpage:
login_err = self._login(display_id)
Expand All @@ -148,7 +77,7 @@ def _real_extract(self, url):
return {
'_type': 'url_transparent',
'ie_key': VHXEmbedIE.ie_key(),
'url': VHXEmbedIE._smuggle_referrer(embed_url, 'https://www.dropout.tv'),
'url': VHXEmbedIE._smuggle_referrer(embed_url, self._HOST),
'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'),
'display_id': display_id,
'title': title,
Expand All @@ -165,9 +94,104 @@ def _real_extract(self, url):
}


class DropoutSeasonIE(InfoExtractor):
class DropoutIE(DropoutBaseIE):
_HOST = 'https://www.dropout.tv'
_NETRC_MACHINE = 'dropout'

_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P<id>[^/]+)/?$'
_TESTS = [
{
'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no',
'note': 'Episode in a series',
'md5': 'fc55805bac60b1ce2ffdc35fb9c51195',
'info_dict': {
'id': '738153',
'display_id': 'yes-or-no',
'ext': 'mp4',
'title': 'Yes or No',
'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?',
'release_date': '20200508',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg',
'series': 'Game Changer',
'season_number': 2,
'season': 'Season 2',
'episode_number': 6,
'episode': 'Yes or No',
'duration': 1180,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
},
{
'url': 'https://www.dropout.tv/ch-shorts/season:1/videos/post-apocalyptic-dane-cook',
'note': 'Episode in a series (missing release_date)',
'md5': 'f260b8d7d0fdbaceae713c9196dac07f',
'info_dict': {
'id': '449042',
'display_id': 'post-apocalyptic-dane-cook',
'ext': 'mp4',
'title': 'Post-Apocalyptic Dane Cook',
'description': 'Dane Cook is back with his all new special. Don\'t worry, it\'s not the end of the world.',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/5b0678df-d9c3-4864-b811-24db03072f4a.jpg',
'series': 'CH Shorts',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'Post-Apocalyptic Dane Cook',
'duration': 135,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
},
{
'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special',
'note': 'Episode not in a series',
'md5': '147e0607bd877a791665c0b7219b512c',
'info_dict': {
'id': '1915774',
'display_id': 'misfits-magic-holiday-special',
'ext': 'mp4',
'title': 'Misfits & Magic Holiday Special',
'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.',
'release_date': '20211215',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg',
'duration': 11698,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
}
]


class DropoutSeasonBaseIE(InfoExtractor):
McSwindler marked this conversation as resolved.
Show resolved Hide resolved
_PAGE_SIZE = 24

def _fetch_page(self, url, season_id, page):
page += 1
webpage = self._download_webpage(
f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400})
yield from [self.url_result(item_url, self._VIDEO_IE) for item_url in traverse_obj(
get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))]

def _real_extract(self, url):
season_id = self._match_id(url)
season_num = self._match_valid_url(url).group('season') or 1
season_title = season_id.replace('-', ' ').title()

return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE),
f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')


class DropoutSeasonIE(DropoutSeasonBaseIE):
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:(?P<season>[0-9]+)/?$)'
_VIDEO_IE = DropoutIE
_TESTS = [
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
Expand Down Expand Up @@ -206,19 +230,3 @@ class DropoutSeasonIE(InfoExtractor):
}
}
]

def _fetch_page(self, url, season_id, page):
page += 1
webpage = self._download_webpage(
f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400})
yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj(
get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))]

def _real_extract(self, url):
season_id = self._match_id(url)
season_num = self._match_valid_url(url).group('season') or 1
season_title = season_id.replace('-', ' ').title()

return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE),
f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')
110 changes: 110 additions & 0 deletions yt_dlp/extractor/watchertv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from .dropout import DropoutBaseIE, DropoutSeasonBaseIE


class WatcherTVIE(DropoutBaseIE):
_HOST = 'https://www.watchertv.com'
_NETRC_MACHINE = 'watchertv'

_VALID_URL = r'https?://(?:www\.)?watchertv\.com/(?:[^/]+/)*videos/(?P<id>[^/]+)/?$'
_TESTS = [
{
'url': 'https://www.watchertv.com/ghost-files/season:2/videos/gf-201',
'note': 'Episode in a series',
'md5': '99c9aab2cb62157467b7ef5e37266e4e',
'info_dict': {
'id': '3129338',
'display_id': 'gf-201',
'ext': 'mp4',
'title': 'The Death Row Poltergeists of Missouri State Penitentiary',
'description': 'Where Curiosity Meets Comedy',
'release_date': '20230825',
'thumbnail': 'https://vhx.imgix.net/watcherentertainment/assets/92c02f39-2ed6-4b51-9e63-1a907b82e2bc.png',
'series': 'Ghost Files',
'season_number': 2,
'season': 'Season 2',
'episode_number': 1,
'episode': 'The Death Row Poltergeists of Missouri State Penitentiary',
'duration': 3853,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
},
{
'url': 'https://www.watchertv.com/road-files/season:1/videos/rf101',
'note': 'Episode in a series (missing release_date)',
'md5': '02f9aaafc8ad9bd1be366cf6a61a68d8',
'info_dict': {
'id': '3187312',
'display_id': 'rf101',
'ext': 'mp4',
'title': 'Road Files: Haunted Hill House',
'description': 'Where Curiosity Meets Comedy',
'thumbnail': 'https://vhx.imgix.net/watcherentertainment/assets/7445f23c-a3e7-47fb-835a-d288273e2698.png',
'series': 'Road Files',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'Road Files: Haunted Hill House',
'duration': 516,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
},
{
'url': 'https://www.watchertv.com/videos/welcome-beta-users',
'note': 'Episode not in a series',
'md5': 'fd1db805f9adc442c38d706bba21ad03',
'info_dict': {
'id': '3187107',
'display_id': 'welcome-beta-users',
'ext': 'mp4',
'title': 'Welcome to Watcher!',
'description': 'Where Curiosity Meets Comedy',
'release_date': '20240419',
'thumbnail': 'https://vhx.imgix.net/watcherentertainment/assets/fbb90dc8-ebb0-4597-9a83-95729e234030.jpg',
'duration': 92,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
}
]


class WatcherTVSeasonIE(DropoutSeasonBaseIE):
_VALID_URL = r'https?://(?:www\.)?watchertv\.com/(?P<id>[^\/$&?#]+)(?:/?$|/season:(?P<season>[0-9]+)/?$)'
_VIDEO_IE = WatcherTVIE
_TESTS = [
{
'url': 'https://www.watchertv.com/ghost-files/season:1',
'note': 'Multi-season series with the season in the url',
'playlist_count': 8,
'info_dict': {
'id': 'ghost-files-season-1',
'title': 'Ghost Files - Season 1'
}
},
{
'url': 'https://www.watchertv.com/are-you-scared',
'note': 'Multi-season series with the season not in the url',
'playlist_count': 3,
'info_dict': {
'id': 'are-you-scared-season-1',
'title': 'Are You Scared - Season 1'
}
},
{
'url': 'https://www.watchertv.com/watcher-one-offs',
'note': 'Single-season series',
'playlist_count': 16,
'info_dict': {
'id': 'watcher-one-offs-season-1',
'title': 'Watcher One Offs - Season 1'
}
}
]