[cleanup] Misc (yt-dlp#10075)

Closes yt-dlp#10303 Authored by: bashonly, seproDev, jucor, c-basalt Co-authored-by: sepro <[email protected]> Co-authored-by: Julien Cornebise <[email protected]> Co-authored-by: c-basalt <[email protected]>
Nirrti-Raja · Jul 1, 2024 · 6aaf96a · 6aaf96a
1 parent d4b99a2
commit 6aaf96a
Show file tree

Hide file tree

Showing 42 changed files with 132 additions and 106 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -525,6 +525,10 @@ jobs:
           # make sure SHA sums are also printed to stdout
           sha256sum -- * | tee ../SHA2-256SUMS
           sha512sum -- * | tee ../SHA2-512SUMS
+          # also print as permanent annotations to the summary page
+          while read -r shasum; do
+            echo "::notice title=${shasum##* }::sha256: ${shasum% *}"
+          done < ../SHA2-256SUMS
 
       - name: Make Update spec
         run: |

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -127,7 +127,7 @@ While these steps won't necessarily ensure that no misuse of the account takes p
 
 ### Is the website primarily used for piracy?
 
-We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). 
+We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management).
 
 
 
@@ -215,8 +215,8 @@ After you have ensured this site is distributing its content legally, you can fo
 
     ```python
     from .common import InfoExtractor
-    
-    
+
+
     class YourExtractorIE(InfoExtractor):
         _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
         _TESTS = [{
@@ -244,7 +244,7 @@ After you have ensured this site is distributing its content legally, you can fo
         def _real_extract(self, url):
             video_id = self._match_id(url)
             webpage = self._download_webpage(url, video_id)
-    
+
             # TODO more code goes here, for example ...
             title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
 
@@ -320,7 +320,7 @@ Say you have some source dictionary `meta` that you've fetched as JSON with HTTP
 ```python
 meta = self._download_json(url, video_id)
 ```
-    
+
 Assume at this point `meta`'s layout is:
 
 ```python
@@ -750,7 +750,7 @@ Use `url_or_none` for safe URL processing.
 
 Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON.
 
-Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. 
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
 
 Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions.
 

diff --git a/Collaborators.md b/Collaborators.md
@@ -61,3 +61,10 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho
 * Reworked internals like `traverse_obj`, various core refactors and bugs fixes
 * Implemented proper progress reporting for parallel downloads
 * Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc
+
+
+## [sepro](https://github.com/seproDev)
+
+* UX improvements: Warn when ffmpeg is missing, warn when double-clicking exe
+* Code cleanup: Remove dead extractors, mark extractors as broken, enable/apply ruff rules
+* Improved/fixed/added ArdMediathek, DRTV, Floatplane, MagentaMusik, Naver, Nebula, OnDemandKorea, Vbox7 etc
diff --git a/README.md b/README.md
diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
@@ -169,5 +169,11 @@
         "when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6",
         "short": "[cleanup] Misc (#9765)",
         "authors": ["bashonly", "Grub4K", "seproDev"]
+    },
+    {
+        "action": "change",
+        "when": "e6a22834df1776ec4e486526f6df2bf53cb7e06f",
+        "short": "[ie/orf:on] Add `prefer_segments_playlist` extractor-arg (#10314)",
+        "authors": ["seproDev"]
     }
 ]
diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 # Allow direct execution
 import os
 import sys

diff --git a/pyproject.toml b/pyproject.toml
@@ -299,7 +299,7 @@ banned-from = [
     "string",
     "sys",
     "time",
-    "urllib",
+    "urllib.parse",
     "uuid",
     "xml",
 ]

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
@@ -92,6 +92,7 @@ def test_operators(self):
         self._test('function f(){return 0 && 1 || 2;}', 2)
         self._test('function f(){return 0 ?? 42;}', 0)
         self._test('function f(){return "life, the universe and everything" < 42;}', False)
+        self._test('function f(){return 0  - 7 * - 6;}', 42)
 
     def test_array_access(self):
         self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
@@ -163,6 +163,10 @@
         'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
         '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
     ),
+    (
+        'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
+        '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
+    ),
 ]
 
 

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
@@ -4,6 +4,7 @@
 import datetime as dt
 import errno
 import fileinput
+import functools
 import http.cookiejar
 import io
 import itertools
@@ -24,7 +25,7 @@
 import unicodedata
 
 from .cache import Cache
-from .compat import functools, urllib  # isort: split
+from .compat import urllib  # isort: split
 from .compat import compat_os_name, urllib_req_to_req
 from .cookies import LenientSimpleCookie, load_cookies
 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name

diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
@@ -2,7 +2,9 @@
 import collections
 import contextlib
 import datetime as dt
+import functools
 import glob
+import hashlib
 import http.cookiejar
 import http.cookies
 import io
@@ -17,14 +19,12 @@
 import time
 import urllib.request
 from enum import Enum, auto
-from hashlib import pbkdf2_hmac
 
 from .aes import (
     aes_cbc_decrypt_bytes,
     aes_gcm_decrypt_and_verify_bytes,
     unpad_pkcs7,
 )
-from .compat import functools  # isort: split
 from .compat import compat_os_name
 from .dependencies import (
     _SECRETSTORAGE_UNAVAILABLE_REASON,
@@ -999,7 +999,7 @@ def _get_windows_v10_key(browser_root, logger):
 
 
 def pbkdf2_sha1(password, salt, iterations, key_length):
-    return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
+    return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
 
 
 def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):

diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
@@ -1,4 +1,5 @@
 import enum
+import functools
 import json
 import os
 import re
@@ -9,7 +10,6 @@
 import uuid
 
 from .fragment import FragmentFD
-from ..compat import functools
 from ..networking import Request
 from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
 from ..utils import (

diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py
@@ -4,6 +4,7 @@
 from ..utils import (
     extract_attributes,
     int_or_none,
+    join_nonempty,
     parse_iso8601,
     try_get,
 )
@@ -136,7 +137,7 @@ def _real_extract(self, url):
             else:
                 vbr = int_or_none(s.get('bitrate'))
                 formats.append({
-                    'format_id': f'{stream_type}-{vbr}' if vbr else stream_type,
+                    'format_id': join_nonempty(stream_type, vbr),
                     'vbr': vbr,
                     'width': int_or_none(s.get('width')),
                     'height': int_or_none(s.get('height')),

diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py
@@ -131,8 +131,8 @@ def _real_extract(self, url):
                             formats.extend(self._extract_f4m_formats(
                                 href, video_id, f4m_id='hds', fatal=False))
                         elif mime_type == 'application/dash+xml':
-                            formats.extend(self._extract_f4m_formats(
-                                href, video_id, f4m_id='hds', fatal=False))
+                            formats.extend(self._extract_mpd_formats(
+                                href, video_id, mpd_id='dash', fatal=False))
                         elif mime_type == 'application/vnd.ms-sstr+xml':
                             formats.extend(self._extract_ism_formats(
                                 href, video_id, ism_id='mss', fatal=False))

diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py
@@ -41,7 +41,7 @@ class BandcampIE(InfoExtractor):
             'uploader_id': 'youtube-dl',
             'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
         },
-        '_skip': 'There is a limit of 200 free downloads / month for the test song',
+        'skip': 'There is a limit of 200 free downloads / month for the test song',
     }, {
         # free download
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',

diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
@@ -18,6 +18,7 @@
     fix_xml_ampersands,
     float_or_none,
     int_or_none,
+    join_nonempty,
     js_to_json,
     mimetype2ext,
     parse_iso8601,
@@ -538,12 +539,7 @@ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
                     })
 
                 def build_format_id(kind):
-                    format_id = kind
-                    if tbr:
-                        format_id += f'-{int(tbr)}k'
-                    if height:
-                        format_id += f'-{height}p'
-                    return format_id
+                    return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
 
                 if src or streaming_src:
                     f.update({

diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py
@@ -1,6 +1,5 @@
 import base64
 import re
-import urllib.error
 import urllib.parse
 import zlib
 

diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py
@@ -2,6 +2,7 @@
 from ..utils import (
     determine_protocol,
     int_or_none,
+    join_nonempty,
     try_get,
     unescapeHTML,
 )
@@ -52,7 +53,7 @@ def _real_extract(self, url):
             is_hls = container == 'M2TS'
             protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
             formats.append({
-                'format_id': ('hls' if is_hls else protocol) + (f'-{tbr}' if tbr else ''),
+                'format_id': join_nonempty('hls' if is_hls else protocol, tbr),
                 'url': rendition_url,
                 'width': int_or_none(rendition.get('frameWidth')),
                 'height': int_or_none(rendition.get('frameHeight')),

diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py
@@ -1,6 +1,11 @@
 from .common import InfoExtractor
 from ..networking import Request
-from ..utils import float_or_none, int_or_none, parse_iso8601
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    join_nonempty,
+    parse_iso8601,
+)
 
 
 class EitbIE(InfoExtractor):
@@ -37,12 +42,9 @@ def _real_extract(self, url):
             if not video_url:
                 continue
             tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
-            format_id = 'http'
-            if tbr:
-                format_id += f'-{int(tbr)}'
             formats.append({
                 'url': rendition['PMD_URL'],
-                'format_id': format_id,
+                'format_id': join_nonempty('http', int_or_none(tbr)),
                 'width': int_or_none(rendition.get('FRAME_WIDTH')),
                 'height': int_or_none(rendition.get('FRAME_HEIGHT')),
                 'tbr': tbr,

diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py
@@ -29,9 +29,6 @@ class EpornerIE(InfoExtractor):
             'view_count': int,
             'age_limit': 18,
         },
-        'params': {
-            'proxy': '127.0.0.1:8118',
-        },
     }, {
         # New (May 2016) URL layout
         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',

diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py
@@ -5,6 +5,7 @@
     ExtractorError,
     determine_ext,
     int_or_none,
+    join_nonempty,
     parse_age_limit,
     remove_end,
     remove_start,
@@ -287,7 +288,7 @@ def _real_extract(self, url):
                     if mobj:
                         height = int(mobj.group(2))
                         f.update({
-                            'format_id': (f'{format_id}-' if format_id else '') + f'{height}P',
+                            'format_id': join_nonempty(format_id, f'{height}P'),
                             'width': int(mobj.group(1)),
                             'height': height,
                         })

diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py
@@ -3,6 +3,7 @@
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
+    join_nonempty,
     parse_duration,
     urljoin,
     xpath_element,
@@ -69,7 +70,7 @@ def _extract_info(self, url, display_id):
                 height = format_info.get('height')
                 fmt = {
                     'url': path,
-                    'format_id': 'http{}'.format(f'-{height}p' if height else ''),
+                    'format_id': join_nonempty('http'. height and f'{height}p'),
                     'width': format_info.get('width'),
                     'height': height,
                 }

diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py
@@ -44,9 +44,6 @@ class HKETVIE(InfoExtractor):
             'duration': 907,
             'subtitles': {},
         },
-        'params': {
-            'geo_verification_proxy': '<HK proxy here>',
-        },
         'skip': 'Geo restricted to HK',
     }]
 

diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
@@ -453,7 +453,7 @@ def _real_extract(self, url):
             else:
                 self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
                 webpage = self._download_webpage(
-                    f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
+                    f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) or ''
                 additional_data = self._search_json(
                     r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
                 if not additional_data and not media:

diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
@@ -2,7 +2,6 @@
 import hashlib
 import json
 import time
-import urllib.error
 import urllib.parse
 
 from .common import InfoExtractor

diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
@@ -16,6 +16,7 @@
     determine_ext,
     float_or_none,
     int_or_none,
+    join_nonempty,
     mimetype2ext,
     parse_age_limit,
     parse_duration,
@@ -498,10 +499,8 @@ def _real_extract(self, url):
                     m3u8_id=format_id, fatal=False))
                 continue
             tbr = int_or_none(va.get('bitrate'), 1000)
-            if tbr:
-                format_id += f'-{tbr}'
             formats.append({
-                'format_id': format_id,
+                'format_id': join_nonempty(format_id, tbr),
                 'url': public_url,
                 'width': int_or_none(va.get('width')),
                 'height': int_or_none(va.get('height')),

diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py
@@ -2,6 +2,7 @@
 from ..utils import (
     determine_ext,
     int_or_none,
+    join_nonempty,
     parse_duration,
     parse_iso8601,
 )
@@ -41,7 +42,7 @@ def _real_extract(self, url):
             else:
                 height = int_or_none(playback.get('height'))
                 formats.append({
-                    'format_id': playback.get('name', 'http' + (f'-{height}p' if height else '')),
+                    'format_id': playback.get('name') or join_nonempty('http', height and f'{height}p'),
                     'url': playback_url,
                     'width': int_or_none(playback.get('width')),
                     'height': height,
-Original file line number
+Diff line change
@@ Expand Up / @@ -299,7 +299,7 @@ banned-from = [ @@
         "string",
         "sys",
         "time",
-        "urllib",
+        "urllib.parse",
         "uuid",
         "xml",
     ]
@@ Expand Down @@