added black formatter
This commit is contained in:
parent
0b6cc5980f
commit
5f96588ada
|
@ -1,5 +1,51 @@
|
||||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "black"
|
||||||
|
version = "24.8.0"
|
||||||
|
description = "The uncompromising code formatter."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "black-24.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:09cdeb74d494ec023ded657f7092ba518e8cf78fa8386155e4a03fdcc44679e6"},
|
||||||
|
{file = "black-24.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:81c6742da39f33b08e791da38410f32e27d632260e599df7245cccee2064afeb"},
|
||||||
|
{file = "black-24.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:707a1ca89221bc8a1a64fb5e15ef39cd755633daa672a9db7498d1c19de66a42"},
|
||||||
|
{file = "black-24.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d6417535d99c37cee4091a2f24eb2b6d5ec42b144d50f1f2e436d9fe1916fe1a"},
|
||||||
|
{file = "black-24.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fb6e2c0b86bbd43dee042e48059c9ad7830abd5c94b0bc518c0eeec57c3eddc1"},
|
||||||
|
{file = "black-24.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:837fd281f1908d0076844bc2b801ad2d369c78c45cf800cad7b61686051041af"},
|
||||||
|
{file = "black-24.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62e8730977f0b77998029da7971fa896ceefa2c4c4933fcd593fa599ecbf97a4"},
|
||||||
|
{file = "black-24.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:72901b4913cbac8972ad911dc4098d5753704d1f3c56e44ae8dce99eecb0e3af"},
|
||||||
|
{file = "black-24.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7c046c1d1eeb7aea9335da62472481d3bbf3fd986e093cffd35f4385c94ae368"},
|
||||||
|
{file = "black-24.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:649f6d84ccbae73ab767e206772cc2d7a393a001070a4c814a546afd0d423aed"},
|
||||||
|
{file = "black-24.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b59b250fdba5f9a9cd9d0ece6e6d993d91ce877d121d161e4698af3eb9c1018"},
|
||||||
|
{file = "black-24.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e55d30d44bed36593c3163b9bc63bf58b3b30e4611e4d88a0c3c239930ed5b2"},
|
||||||
|
{file = "black-24.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:505289f17ceda596658ae81b61ebbe2d9b25aa78067035184ed0a9d855d18afd"},
|
||||||
|
{file = "black-24.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b19c9ad992c7883ad84c9b22aaa73562a16b819c1d8db7a1a1a49fb7ec13c7d2"},
|
||||||
|
{file = "black-24.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f13f7f386f86f8121d76599114bb8c17b69d962137fc70efe56137727c7047e"},
|
||||||
|
{file = "black-24.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:f490dbd59680d809ca31efdae20e634f3fae27fba3ce0ba3208333b713bc3920"},
|
||||||
|
{file = "black-24.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eab4dd44ce80dea27dc69db40dab62d4ca96112f87996bca68cd75639aeb2e4c"},
|
||||||
|
{file = "black-24.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3c4285573d4897a7610054af5a890bde7c65cb466040c5f0c8b732812d7f0e5e"},
|
||||||
|
{file = "black-24.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e84e33b37be070ba135176c123ae52a51f82306def9f7d063ee302ecab2cf47"},
|
||||||
|
{file = "black-24.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:73bbf84ed136e45d451a260c6b73ed674652f90a2b3211d6a35e78054563a9bb"},
|
||||||
|
{file = "black-24.8.0-py3-none-any.whl", hash = "sha256:972085c618ee94f402da1af548a4f218c754ea7e5dc70acb168bfaca4c2542ed"},
|
||||||
|
{file = "black-24.8.0.tar.gz", hash = "sha256:2500945420b6784c38b9ee885af039f5e7471ef284ab03fa35ecdde4688cd83f"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
click = ">=8.0.0"
|
||||||
|
mypy-extensions = ">=0.4.3"
|
||||||
|
packaging = ">=22.0"
|
||||||
|
pathspec = ">=0.9.0"
|
||||||
|
platformdirs = ">=2"
|
||||||
|
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
||||||
|
typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
colorama = ["colorama (>=0.4.3)"]
|
||||||
|
d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
|
||||||
|
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
|
||||||
|
uvloop = ["uvloop (>=0.15.2)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "certifi"
|
name = "certifi"
|
||||||
version = "2024.8.30"
|
version = "2024.8.30"
|
||||||
|
@ -110,6 +156,20 @@ files = [
|
||||||
{file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
|
{file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "click"
|
||||||
|
version = "8.1.7"
|
||||||
|
description = "Composable command line interface toolkit"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
|
||||||
|
{file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "colorama"
|
name = "colorama"
|
||||||
version = "0.4.6"
|
version = "0.4.6"
|
||||||
|
@ -302,6 +362,17 @@ build = ["blurb", "twine", "wheel"]
|
||||||
docs = ["sphinx"]
|
docs = ["sphinx"]
|
||||||
test = ["pytest", "pytest-cov"]
|
test = ["pytest", "pytest-cov"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mypy-extensions"
|
||||||
|
version = "1.0.0"
|
||||||
|
description = "Type system extensions for programs checked with the mypy type checker."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.5"
|
||||||
|
files = [
|
||||||
|
{file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
|
||||||
|
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "packaging"
|
name = "packaging"
|
||||||
version = "24.1"
|
version = "24.1"
|
||||||
|
@ -313,6 +384,33 @@ files = [
|
||||||
{file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
|
{file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pathspec"
|
||||||
|
version = "0.12.1"
|
||||||
|
description = "Utility library for gitignore style pattern matching of file paths."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
|
||||||
|
{file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "platformdirs"
|
||||||
|
version = "4.3.6"
|
||||||
|
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
|
||||||
|
{file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
|
||||||
|
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
|
||||||
|
type = ["mypy (>=1.11.2)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pluggy"
|
name = "pluggy"
|
||||||
version = "1.5.0"
|
version = "1.5.0"
|
||||||
|
@ -382,6 +480,17 @@ files = [
|
||||||
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typing-extensions"
|
||||||
|
version = "4.12.2"
|
||||||
|
description = "Backported and Experimental Type Hints for Python 3.8+"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
|
||||||
|
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "urllib3"
|
name = "urllib3"
|
||||||
version = "2.2.3"
|
version = "2.2.3"
|
||||||
|
@ -402,4 +511,4 @@ zstd = ["zstandard (>=0.18.0)"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8,<3.13"
|
python-versions = ">=3.8,<3.13"
|
||||||
content-hash = "ae3ea36431a2a24e1d07e7c6e251fe7490b86edd928c22eda084e3cb974aaa99"
|
content-hash = "4c2e7d294773ea148b69f961053a9469630c48b88248903ead43e41a2838ff94"
|
||||||
|
|
|
@ -40,6 +40,7 @@ youtube_transcript_api = "youtube_transcript_api.__main__:main"
|
||||||
[tool.poe.tasks]
|
[tool.poe.tasks]
|
||||||
test = "pytest youtube_transcript_api"
|
test = "pytest youtube_transcript_api"
|
||||||
coverage.shell = "pytest youtube_transcript_api && coverage report -m"
|
coverage.shell = "pytest youtube_transcript_api && coverage report -m"
|
||||||
|
format = "black youtube_transcript_api"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = ">=3.8,<3.13"
|
python = ">=3.8,<3.13"
|
||||||
|
@ -51,6 +52,7 @@ coverage = "^7.6.1"
|
||||||
mock = "^5.1.0"
|
mock = "^5.1.0"
|
||||||
httpretty = "^1.1.4"
|
httpretty = "^1.1.4"
|
||||||
coveralls = "^4.0.1"
|
coveralls = "^4.0.1"
|
||||||
|
black = "^24.8.0"
|
||||||
|
|
||||||
[tool.coverage.run]
|
[tool.coverage.run]
|
||||||
source = ["youtube_transcript_api"]
|
source = ["youtube_transcript_api"]
|
||||||
|
|
|
@ -11,5 +11,5 @@ def main():
|
||||||
print(YouTubeTranscriptCli(sys.argv[1:]).run())
|
print(YouTubeTranscriptCli(sys.argv[1:]).run())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -1,17 +1,17 @@
|
||||||
import requests
|
import requests
|
||||||
try: # pragma: no cover
|
|
||||||
|
try: # pragma: no cover
|
||||||
import http.cookiejar as cookiejar
|
import http.cookiejar as cookiejar
|
||||||
|
|
||||||
CookieLoadError = (FileNotFoundError, cookiejar.LoadError)
|
CookieLoadError = (FileNotFoundError, cookiejar.LoadError)
|
||||||
except ImportError: # pragma: no cover
|
except ImportError: # pragma: no cover
|
||||||
import cookielib as cookiejar
|
import cookielib as cookiejar
|
||||||
|
|
||||||
CookieLoadError = IOError
|
CookieLoadError = IOError
|
||||||
|
|
||||||
from ._transcripts import TranscriptListFetcher
|
from ._transcripts import TranscriptListFetcher
|
||||||
|
|
||||||
from ._errors import (
|
from ._errors import CookiePathInvalid, CookiesInvalid
|
||||||
CookiePathInvalid,
|
|
||||||
CookiesInvalid
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class YouTubeTranscriptApi(object):
|
class YouTubeTranscriptApi(object):
|
||||||
|
@ -71,8 +71,15 @@ class YouTubeTranscriptApi(object):
|
||||||
return TranscriptListFetcher(http_client).fetch(video_id)
|
return TranscriptListFetcher(http_client).fetch(video_id)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None,
|
def get_transcripts(
|
||||||
cookies=None, preserve_formatting=False):
|
cls,
|
||||||
|
video_ids,
|
||||||
|
languages=("en",),
|
||||||
|
continue_after_error=False,
|
||||||
|
proxies=None,
|
||||||
|
cookies=None,
|
||||||
|
preserve_formatting=False,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Retrieves the transcripts for a list of videos.
|
Retrieves the transcripts for a list of videos.
|
||||||
|
|
||||||
|
@ -102,7 +109,9 @@ class YouTubeTranscriptApi(object):
|
||||||
|
|
||||||
for video_id in video_ids:
|
for video_id in video_ids:
|
||||||
try:
|
try:
|
||||||
data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies, preserve_formatting)
|
data[video_id] = cls.get_transcript(
|
||||||
|
video_id, languages, proxies, cookies, preserve_formatting
|
||||||
|
)
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
if not continue_after_error:
|
if not continue_after_error:
|
||||||
raise exception
|
raise exception
|
||||||
|
@ -112,7 +121,14 @@ class YouTubeTranscriptApi(object):
|
||||||
return data, unretrievable_videos
|
return data, unretrievable_videos
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_transcript(cls, video_id, languages=('en',), proxies=None, cookies=None, preserve_formatting=False):
|
def get_transcript(
|
||||||
|
cls,
|
||||||
|
video_id,
|
||||||
|
languages=("en",),
|
||||||
|
proxies=None,
|
||||||
|
cookies=None,
|
||||||
|
preserve_formatting=False,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Retrieves the transcript for a single video. This is just a shortcut for calling::
|
Retrieves the transcript for a single video. This is just a shortcut for calling::
|
||||||
|
|
||||||
|
@ -134,7 +150,11 @@ class YouTubeTranscriptApi(object):
|
||||||
:rtype [{'text': str, 'start': float, 'end': float}]:
|
:rtype [{'text': str, 'start': float, 'end': float}]:
|
||||||
"""
|
"""
|
||||||
assert isinstance(video_id, str), "`video_id` must be a string"
|
assert isinstance(video_id, str), "`video_id` must be a string"
|
||||||
return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch(preserve_formatting=preserve_formatting)
|
return (
|
||||||
|
cls.list_transcripts(video_id, proxies, cookies)
|
||||||
|
.find_transcript(languages)
|
||||||
|
.fetch(preserve_formatting=preserve_formatting)
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _load_cookies(cls, cookies, video_id):
|
def _load_cookies(cls, cookies, video_id):
|
||||||
|
|
|
@ -13,10 +13,10 @@ class YouTubeTranscriptCli(object):
|
||||||
parsed_args = self._parse_args()
|
parsed_args = self._parse_args()
|
||||||
|
|
||||||
if parsed_args.exclude_manually_created and parsed_args.exclude_generated:
|
if parsed_args.exclude_manually_created and parsed_args.exclude_generated:
|
||||||
return ''
|
return ""
|
||||||
|
|
||||||
proxies = None
|
proxies = None
|
||||||
if parsed_args.http_proxy != '' or parsed_args.https_proxy != '':
|
if parsed_args.http_proxy != "" or parsed_args.https_proxy != "":
|
||||||
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy}
|
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy}
|
||||||
|
|
||||||
cookies = parsed_args.cookies
|
cookies = parsed_args.cookies
|
||||||
|
@ -26,25 +26,41 @@ class YouTubeTranscriptCli(object):
|
||||||
|
|
||||||
for video_id in parsed_args.video_ids:
|
for video_id in parsed_args.video_ids:
|
||||||
try:
|
try:
|
||||||
transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id))
|
transcripts.append(
|
||||||
|
self._fetch_transcript(parsed_args, proxies, cookies, video_id)
|
||||||
|
)
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
exceptions.append(exception)
|
exceptions.append(exception)
|
||||||
|
|
||||||
return '\n\n'.join(
|
return "\n\n".join(
|
||||||
[str(exception) for exception in exceptions]
|
[str(exception) for exception in exceptions]
|
||||||
+ ([FormatterLoader().load(parsed_args.format).format_transcripts(transcripts)] if transcripts else [])
|
+ (
|
||||||
|
[
|
||||||
|
FormatterLoader()
|
||||||
|
.load(parsed_args.format)
|
||||||
|
.format_transcripts(transcripts)
|
||||||
|
]
|
||||||
|
if transcripts
|
||||||
|
else []
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
|
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
|
||||||
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies)
|
transcript_list = YouTubeTranscriptApi.list_transcripts(
|
||||||
|
video_id, proxies=proxies, cookies=cookies
|
||||||
|
)
|
||||||
|
|
||||||
if parsed_args.list_transcripts:
|
if parsed_args.list_transcripts:
|
||||||
return str(transcript_list)
|
return str(transcript_list)
|
||||||
|
|
||||||
if parsed_args.exclude_manually_created:
|
if parsed_args.exclude_manually_created:
|
||||||
transcript = transcript_list.find_generated_transcript(parsed_args.languages)
|
transcript = transcript_list.find_generated_transcript(
|
||||||
|
parsed_args.languages
|
||||||
|
)
|
||||||
elif parsed_args.exclude_generated:
|
elif parsed_args.exclude_generated:
|
||||||
transcript = transcript_list.find_manually_created_transcript(parsed_args.languages)
|
transcript = transcript_list.find_manually_created_transcript(
|
||||||
|
parsed_args.languages
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
transcript = transcript_list.find_transcript(parsed_args.languages)
|
transcript = transcript_list.find_transcript(parsed_args.languages)
|
||||||
|
|
||||||
|
@ -56,80 +72,84 @@ class YouTubeTranscriptCli(object):
|
||||||
def _parse_args(self):
|
def _parse_args(self):
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description=(
|
description=(
|
||||||
'This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. '
|
"This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. "
|
||||||
'It also works for automatically generated subtitles and it does not require a headless browser, like '
|
"It also works for automatically generated subtitles and it does not require a headless browser, like "
|
||||||
'other selenium based solutions do!'
|
"other selenium based solutions do!"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--list-transcripts',
|
"--list-transcripts",
|
||||||
action='store_const',
|
action="store_const",
|
||||||
const=True,
|
const=True,
|
||||||
default=False,
|
default=False,
|
||||||
help='This will list the languages in which the given videos are available in.',
|
help="This will list the languages in which the given videos are available in.",
|
||||||
)
|
)
|
||||||
parser.add_argument('video_ids', nargs='+', type=str, help='List of YouTube video IDs.')
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--languages',
|
"video_ids", nargs="+", type=str, help="List of YouTube video IDs."
|
||||||
nargs='*',
|
)
|
||||||
default=['en',],
|
parser.add_argument(
|
||||||
|
"--languages",
|
||||||
|
nargs="*",
|
||||||
|
default=[
|
||||||
|
"en",
|
||||||
|
],
|
||||||
type=str,
|
type=str,
|
||||||
help=(
|
help=(
|
||||||
'A list of language codes in a descending priority. For example, if this is set to "de en" it will '
|
'A list of language codes in a descending priority. For example, if this is set to "de en" it will '
|
||||||
'first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails '
|
"first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails "
|
||||||
'to do so. As I can\'t provide a complete list of all working language codes with full certainty, you '
|
"to do so. As I can't provide a complete list of all working language codes with full certainty, you "
|
||||||
'may have to play around with the language codes a bit, to find the one which is working for you!'
|
"may have to play around with the language codes a bit, to find the one which is working for you!"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--exclude-generated',
|
"--exclude-generated",
|
||||||
action='store_const',
|
action="store_const",
|
||||||
const=True,
|
const=True,
|
||||||
default=False,
|
default=False,
|
||||||
help='If this flag is set transcripts which have been generated by YouTube will not be retrieved.',
|
help="If this flag is set transcripts which have been generated by YouTube will not be retrieved.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--exclude-manually-created',
|
"--exclude-manually-created",
|
||||||
action='store_const',
|
action="store_const",
|
||||||
const=True,
|
const=True,
|
||||||
default=False,
|
default=False,
|
||||||
help='If this flag is set transcripts which have been manually created will not be retrieved.',
|
help="If this flag is set transcripts which have been manually created will not be retrieved.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--format',
|
"--format",
|
||||||
type=str,
|
type=str,
|
||||||
default='pretty',
|
default="pretty",
|
||||||
choices=tuple(FormatterLoader.TYPES.keys()),
|
choices=tuple(FormatterLoader.TYPES.keys()),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--translate',
|
"--translate",
|
||||||
default='',
|
default="",
|
||||||
help=(
|
help=(
|
||||||
'The language code for the language you want this transcript to be translated to. Use the '
|
"The language code for the language you want this transcript to be translated to. Use the "
|
||||||
'--list-transcripts feature to find out which languages are translatable and which translation '
|
"--list-transcripts feature to find out which languages are translatable and which translation "
|
||||||
'languages are available.'
|
"languages are available."
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--http-proxy',
|
"--http-proxy",
|
||||||
default='',
|
default="",
|
||||||
metavar='URL',
|
metavar="URL",
|
||||||
help='Use the specified HTTP proxy.'
|
help="Use the specified HTTP proxy.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--https-proxy',
|
"--https-proxy",
|
||||||
default='',
|
default="",
|
||||||
metavar='URL',
|
metavar="URL",
|
||||||
help='Use the specified HTTPS proxy.'
|
help="Use the specified HTTPS proxy.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--cookies',
|
"--cookies",
|
||||||
default=None,
|
default=None,
|
||||||
help='The cookie file that will be used for authorization with youtube.'
|
help="The cookie file that will be used for authorization with youtube.",
|
||||||
)
|
)
|
||||||
|
|
||||||
return self._sanitize_video_ids(parser.parse_args(self._args))
|
return self._sanitize_video_ids(parser.parse_args(self._args))
|
||||||
|
|
||||||
def _sanitize_video_ids(self, args):
|
def _sanitize_video_ids(self, args):
|
||||||
args.video_ids = [video_id.replace('\\', '') for video_id in args.video_ids]
|
args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids]
|
||||||
return args
|
return args
|
||||||
|
|
|
@ -5,16 +5,17 @@ class CouldNotRetrieveTranscript(Exception):
|
||||||
"""
|
"""
|
||||||
Raised if a transcript could not be retrieved.
|
Raised if a transcript could not be retrieved.
|
||||||
"""
|
"""
|
||||||
ERROR_MESSAGE = '\nCould not retrieve a transcript for the video {video_url}!'
|
|
||||||
CAUSE_MESSAGE_INTRO = ' This is most likely caused by:\n\n{cause}'
|
ERROR_MESSAGE = "\nCould not retrieve a transcript for the video {video_url}!"
|
||||||
CAUSE_MESSAGE = ''
|
CAUSE_MESSAGE_INTRO = " This is most likely caused by:\n\n{cause}"
|
||||||
|
CAUSE_MESSAGE = ""
|
||||||
GITHUB_REFERRAL = (
|
GITHUB_REFERRAL = (
|
||||||
'\n\nIf you are sure that the described cause is not responsible for this error '
|
"\n\nIf you are sure that the described cause is not responsible for this error "
|
||||||
'and that a transcript should be retrievable, please create an issue at '
|
"and that a transcript should be retrievable, please create an issue at "
|
||||||
'https://github.com/jdepoix/youtube-transcript-api/issues. '
|
"https://github.com/jdepoix/youtube-transcript-api/issues. "
|
||||||
'Please add which version of youtube_transcript_api you are using '
|
"Please add which version of youtube_transcript_api you are using "
|
||||||
'and provide the information needed to replicate the error. '
|
"and provide the information needed to replicate the error. "
|
||||||
'Also make sure that there are no open issues which already describe your problem!'
|
"Also make sure that there are no open issues which already describe your problem!"
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, video_id):
|
def __init__(self, video_id):
|
||||||
|
@ -23,10 +24,14 @@ class CouldNotRetrieveTranscript(Exception):
|
||||||
|
|
||||||
def _build_error_message(self):
|
def _build_error_message(self):
|
||||||
cause = self.cause
|
cause = self.cause
|
||||||
error_message = self.ERROR_MESSAGE.format(video_url=WATCH_URL.format(video_id=self.video_id))
|
error_message = self.ERROR_MESSAGE.format(
|
||||||
|
video_url=WATCH_URL.format(video_id=self.video_id)
|
||||||
|
)
|
||||||
|
|
||||||
if cause:
|
if cause:
|
||||||
error_message += self.CAUSE_MESSAGE_INTRO.format(cause=cause) + self.GITHUB_REFERRAL
|
error_message += (
|
||||||
|
self.CAUSE_MESSAGE_INTRO.format(cause=cause) + self.GITHUB_REFERRAL
|
||||||
|
)
|
||||||
|
|
||||||
return error_message
|
return error_message
|
||||||
|
|
||||||
|
@ -36,7 +41,7 @@ class CouldNotRetrieveTranscript(Exception):
|
||||||
|
|
||||||
|
|
||||||
class YouTubeRequestFailed(CouldNotRetrieveTranscript):
|
class YouTubeRequestFailed(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'Request to YouTube failed: {reason}'
|
CAUSE_MESSAGE = "Request to YouTube failed: {reason}"
|
||||||
|
|
||||||
def __init__(self, video_id, http_error):
|
def __init__(self, video_id, http_error):
|
||||||
self.reason = str(http_error)
|
self.reason = str(http_error)
|
||||||
|
@ -50,12 +55,12 @@ class YouTubeRequestFailed(CouldNotRetrieveTranscript):
|
||||||
|
|
||||||
|
|
||||||
class VideoUnavailable(CouldNotRetrieveTranscript):
|
class VideoUnavailable(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'The video is no longer available'
|
CAUSE_MESSAGE = "The video is no longer available"
|
||||||
|
|
||||||
|
|
||||||
class InvalidVideoId(CouldNotRetrieveTranscript):
|
class InvalidVideoId(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = (
|
CAUSE_MESSAGE = (
|
||||||
'You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n'
|
"You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n"
|
||||||
'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n'
|
'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n'
|
||||||
'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`'
|
'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`'
|
||||||
)
|
)
|
||||||
|
@ -63,48 +68,48 @@ class InvalidVideoId(CouldNotRetrieveTranscript):
|
||||||
|
|
||||||
class TooManyRequests(CouldNotRetrieveTranscript):
|
class TooManyRequests(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = (
|
CAUSE_MESSAGE = (
|
||||||
'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. '
|
"YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. "
|
||||||
'One of the following things can be done to work around this:\n\
|
"One of the following things can be done to work around this:\n\
|
||||||
- Manually solve the captcha in a browser and export the cookie. '
|
- Manually solve the captcha in a browser and export the cookie. "
|
||||||
'Read here how to use that cookie with '
|
"Read here how to use that cookie with "
|
||||||
'youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\
|
"youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\
|
||||||
- Use a different IP address\n\
|
- Use a different IP address\n\
|
||||||
- Wait until the ban on your IP has been lifted'
|
- Wait until the ban on your IP has been lifted"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TranscriptsDisabled(CouldNotRetrieveTranscript):
|
class TranscriptsDisabled(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'Subtitles are disabled for this video'
|
CAUSE_MESSAGE = "Subtitles are disabled for this video"
|
||||||
|
|
||||||
|
|
||||||
class NoTranscriptAvailable(CouldNotRetrieveTranscript):
|
class NoTranscriptAvailable(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'No transcripts are available for this video'
|
CAUSE_MESSAGE = "No transcripts are available for this video"
|
||||||
|
|
||||||
|
|
||||||
class NotTranslatable(CouldNotRetrieveTranscript):
|
class NotTranslatable(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'The requested language is not translatable'
|
CAUSE_MESSAGE = "The requested language is not translatable"
|
||||||
|
|
||||||
|
|
||||||
class TranslationLanguageNotAvailable(CouldNotRetrieveTranscript):
|
class TranslationLanguageNotAvailable(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'The requested translation language is not available'
|
CAUSE_MESSAGE = "The requested translation language is not available"
|
||||||
|
|
||||||
|
|
||||||
class CookiePathInvalid(CouldNotRetrieveTranscript):
|
class CookiePathInvalid(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'The provided cookie file was unable to be loaded'
|
CAUSE_MESSAGE = "The provided cookie file was unable to be loaded"
|
||||||
|
|
||||||
|
|
||||||
class CookiesInvalid(CouldNotRetrieveTranscript):
|
class CookiesInvalid(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'The cookies provided are not valid (may have expired)'
|
CAUSE_MESSAGE = "The cookies provided are not valid (may have expired)"
|
||||||
|
|
||||||
|
|
||||||
class FailedToCreateConsentCookie(CouldNotRetrieveTranscript):
|
class FailedToCreateConsentCookie(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'Failed to automatically give consent to saving cookies'
|
CAUSE_MESSAGE = "Failed to automatically give consent to saving cookies"
|
||||||
|
|
||||||
|
|
||||||
class NoTranscriptFound(CouldNotRetrieveTranscript):
|
class NoTranscriptFound(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = (
|
CAUSE_MESSAGE = (
|
||||||
'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n'
|
"No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n"
|
||||||
'{transcript_data}'
|
"{transcript_data}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, video_id, requested_language_codes, transcript_data):
|
def __init__(self, video_id, requested_language_codes, transcript_data):
|
||||||
|
|
|
@ -2,10 +2,10 @@ import sys
|
||||||
|
|
||||||
|
|
||||||
# This can only be tested by using different python versions, therefore it is not covered by coverage.py
|
# This can only be tested by using different python versions, therefore it is not covered by coverage.py
|
||||||
if sys.version_info.major == 3 and sys.version_info.minor >= 4: # pragma: no cover
|
if sys.version_info.major == 3 and sys.version_info.minor >= 4: # pragma: no cover
|
||||||
# Python 3.4+
|
# Python 3.4+
|
||||||
from html import unescape
|
from html import unescape
|
||||||
else: # pragma: no cover
|
else: # pragma: no cover
|
||||||
if sys.version_info.major <= 2:
|
if sys.version_info.major <= 2:
|
||||||
# Python 2
|
# Python 2
|
||||||
import HTMLParser
|
import HTMLParser
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
WATCH_URL = 'https://www.youtube.com/watch?v={video_id}'
|
WATCH_URL = "https://www.youtube.com/watch?v={video_id}"
|
||||||
|
|
|
@ -3,7 +3,7 @@ import sys
|
||||||
# This can only be tested by using different python versions, therefore it is not covered by coverage.py
|
# This can only be tested by using different python versions, therefore it is not covered by coverage.py
|
||||||
if sys.version_info.major == 2: # pragma: no cover
|
if sys.version_info.major == 2: # pragma: no cover
|
||||||
reload(sys)
|
reload(sys)
|
||||||
sys.setdefaultencoding('utf-8')
|
sys.setdefaultencoding("utf-8")
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ class TranscriptListFetcher(object):
|
||||||
splitted_html = html.split('"captions":')
|
splitted_html = html.split('"captions":')
|
||||||
|
|
||||||
if len(splitted_html) <= 1:
|
if len(splitted_html) <= 1:
|
||||||
if video_id.startswith('http://') or video_id.startswith('https://'):
|
if video_id.startswith("http://") or video_id.startswith("https://"):
|
||||||
raise InvalidVideoId(video_id)
|
raise InvalidVideoId(video_id)
|
||||||
if 'class="g-recaptcha"' in html:
|
if 'class="g-recaptcha"' in html:
|
||||||
raise TooManyRequests(video_id)
|
raise TooManyRequests(video_id)
|
||||||
|
@ -62,12 +62,12 @@ class TranscriptListFetcher(object):
|
||||||
raise TranscriptsDisabled(video_id)
|
raise TranscriptsDisabled(video_id)
|
||||||
|
|
||||||
captions_json = json.loads(
|
captions_json = json.loads(
|
||||||
splitted_html[1].split(',"videoDetails')[0].replace('\n', '')
|
splitted_html[1].split(',"videoDetails')[0].replace("\n", "")
|
||||||
).get('playerCaptionsTracklistRenderer')
|
).get("playerCaptionsTracklistRenderer")
|
||||||
if captions_json is None:
|
if captions_json is None:
|
||||||
raise TranscriptsDisabled(video_id)
|
raise TranscriptsDisabled(video_id)
|
||||||
|
|
||||||
if 'captionTracks' not in captions_json:
|
if "captionTracks" not in captions_json:
|
||||||
raise NoTranscriptAvailable(video_id)
|
raise NoTranscriptAvailable(video_id)
|
||||||
|
|
||||||
return captions_json
|
return captions_json
|
||||||
|
@ -76,7 +76,9 @@ class TranscriptListFetcher(object):
|
||||||
match = re.search('name="v" value="(.*?)"', html)
|
match = re.search('name="v" value="(.*?)"', html)
|
||||||
if match is None:
|
if match is None:
|
||||||
raise FailedToCreateConsentCookie(video_id)
|
raise FailedToCreateConsentCookie(video_id)
|
||||||
self._http_client.cookies.set('CONSENT', 'YES+' + match.group(1), domain='.youtube.com')
|
self._http_client.cookies.set(
|
||||||
|
"CONSENT", "YES+" + match.group(1), domain=".youtube.com"
|
||||||
|
)
|
||||||
|
|
||||||
def _fetch_video_html(self, video_id):
|
def _fetch_video_html(self, video_id):
|
||||||
html = self._fetch_html(video_id)
|
html = self._fetch_html(video_id)
|
||||||
|
@ -88,7 +90,9 @@ class TranscriptListFetcher(object):
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def _fetch_html(self, video_id):
|
def _fetch_html(self, video_id):
|
||||||
response = self._http_client.get(WATCH_URL.format(video_id=video_id), headers={'Accept-Language': 'en-US'})
|
response = self._http_client.get(
|
||||||
|
WATCH_URL.format(video_id=video_id), headers={"Accept-Language": "en-US"}
|
||||||
|
)
|
||||||
return unescape(_raise_http_errors(response, video_id).text)
|
return unescape(_raise_http_errors(response, video_id).text)
|
||||||
|
|
||||||
|
|
||||||
|
@ -98,7 +102,13 @@ class TranscriptList(object):
|
||||||
for a given YouTube video. Also it provides functionality to search for a transcript in a given language.
|
for a given YouTube video. Also it provides functionality to search for a transcript in a given language.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages):
|
def __init__(
|
||||||
|
self,
|
||||||
|
video_id,
|
||||||
|
manually_created_transcripts,
|
||||||
|
generated_transcripts,
|
||||||
|
translation_languages,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
The constructor is only for internal use. Use the static build method instead.
|
The constructor is only for internal use. Use the static build method instead.
|
||||||
|
|
||||||
|
@ -132,28 +142,29 @@ class TranscriptList(object):
|
||||||
"""
|
"""
|
||||||
translation_languages = [
|
translation_languages = [
|
||||||
{
|
{
|
||||||
'language': translation_language['languageName']['simpleText'],
|
"language": translation_language["languageName"]["simpleText"],
|
||||||
'language_code': translation_language['languageCode'],
|
"language_code": translation_language["languageCode"],
|
||||||
} for translation_language in captions_json.get('translationLanguages', [])
|
}
|
||||||
|
for translation_language in captions_json.get("translationLanguages", [])
|
||||||
]
|
]
|
||||||
|
|
||||||
manually_created_transcripts = {}
|
manually_created_transcripts = {}
|
||||||
generated_transcripts = {}
|
generated_transcripts = {}
|
||||||
|
|
||||||
for caption in captions_json['captionTracks']:
|
for caption in captions_json["captionTracks"]:
|
||||||
if caption.get('kind', '') == 'asr':
|
if caption.get("kind", "") == "asr":
|
||||||
transcript_dict = generated_transcripts
|
transcript_dict = generated_transcripts
|
||||||
else:
|
else:
|
||||||
transcript_dict = manually_created_transcripts
|
transcript_dict = manually_created_transcripts
|
||||||
|
|
||||||
transcript_dict[caption['languageCode']] = Transcript(
|
transcript_dict[caption["languageCode"]] = Transcript(
|
||||||
http_client,
|
http_client,
|
||||||
video_id,
|
video_id,
|
||||||
caption['baseUrl'],
|
caption["baseUrl"],
|
||||||
caption['name']['simpleText'],
|
caption["name"]["simpleText"],
|
||||||
caption['languageCode'],
|
caption["languageCode"],
|
||||||
caption.get('kind', '') == 'asr',
|
caption.get("kind", "") == "asr",
|
||||||
translation_languages if caption.get('isTranslatable', False) else [],
|
translation_languages if caption.get("isTranslatable", False) else [],
|
||||||
)
|
)
|
||||||
|
|
||||||
return TranscriptList(
|
return TranscriptList(
|
||||||
|
@ -164,7 +175,10 @@ class TranscriptList(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return iter(list(self._manually_created_transcripts.values()) + list(self._generated_transcripts.values()))
|
return iter(
|
||||||
|
list(self._manually_created_transcripts.values())
|
||||||
|
+ list(self._generated_transcripts.values())
|
||||||
|
)
|
||||||
|
|
||||||
def find_transcript(self, language_codes):
|
def find_transcript(self, language_codes):
|
||||||
"""
|
"""
|
||||||
|
@ -180,7 +194,10 @@ class TranscriptList(object):
|
||||||
:rtype Transcript:
|
:rtype Transcript:
|
||||||
:raises: NoTranscriptFound
|
:raises: NoTranscriptFound
|
||||||
"""
|
"""
|
||||||
return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts])
|
return self._find_transcript(
|
||||||
|
language_codes,
|
||||||
|
[self._manually_created_transcripts, self._generated_transcripts],
|
||||||
|
)
|
||||||
|
|
||||||
def find_generated_transcript(self, language_codes):
|
def find_generated_transcript(self, language_codes):
|
||||||
"""
|
"""
|
||||||
|
@ -208,7 +225,9 @@ class TranscriptList(object):
|
||||||
:rtype Transcript:
|
:rtype Transcript:
|
||||||
:raises: NoTranscriptFound
|
:raises: NoTranscriptFound
|
||||||
"""
|
"""
|
||||||
return self._find_transcript(language_codes, [self._manually_created_transcripts])
|
return self._find_transcript(
|
||||||
|
language_codes, [self._manually_created_transcripts]
|
||||||
|
)
|
||||||
|
|
||||||
def _find_transcript(self, language_codes, transcript_dicts):
|
def _find_transcript(self, language_codes, transcript_dicts):
|
||||||
for language_code in language_codes:
|
for language_code in language_codes:
|
||||||
|
@ -216,44 +235,54 @@ class TranscriptList(object):
|
||||||
if language_code in transcript_dict:
|
if language_code in transcript_dict:
|
||||||
return transcript_dict[language_code]
|
return transcript_dict[language_code]
|
||||||
|
|
||||||
raise NoTranscriptFound(
|
raise NoTranscriptFound(self.video_id, language_codes, self)
|
||||||
self.video_id,
|
|
||||||
language_codes,
|
|
||||||
self
|
|
||||||
)
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return (
|
return (
|
||||||
'For this video ({video_id}) transcripts are available in the following languages:\n\n'
|
"For this video ({video_id}) transcripts are available in the following languages:\n\n"
|
||||||
'(MANUALLY CREATED)\n'
|
"(MANUALLY CREATED)\n"
|
||||||
'{available_manually_created_transcript_languages}\n\n'
|
"{available_manually_created_transcript_languages}\n\n"
|
||||||
'(GENERATED)\n'
|
"(GENERATED)\n"
|
||||||
'{available_generated_transcripts}\n\n'
|
"{available_generated_transcripts}\n\n"
|
||||||
'(TRANSLATION LANGUAGES)\n'
|
"(TRANSLATION LANGUAGES)\n"
|
||||||
'{available_translation_languages}'
|
"{available_translation_languages}"
|
||||||
).format(
|
).format(
|
||||||
video_id=self.video_id,
|
video_id=self.video_id,
|
||||||
available_manually_created_transcript_languages=self._get_language_description(
|
available_manually_created_transcript_languages=self._get_language_description(
|
||||||
str(transcript) for transcript in self._manually_created_transcripts.values()
|
str(transcript)
|
||||||
|
for transcript in self._manually_created_transcripts.values()
|
||||||
),
|
),
|
||||||
available_generated_transcripts=self._get_language_description(
|
available_generated_transcripts=self._get_language_description(
|
||||||
str(transcript) for transcript in self._generated_transcripts.values()
|
str(transcript) for transcript in self._generated_transcripts.values()
|
||||||
),
|
),
|
||||||
available_translation_languages=self._get_language_description(
|
available_translation_languages=self._get_language_description(
|
||||||
'{language_code} ("{language}")'.format(
|
'{language_code} ("{language}")'.format(
|
||||||
language=translation_language['language'],
|
language=translation_language["language"],
|
||||||
language_code=translation_language['language_code'],
|
language_code=translation_language["language_code"],
|
||||||
) for translation_language in self._translation_languages
|
)
|
||||||
)
|
for translation_language in self._translation_languages
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_language_description(self, transcript_strings):
|
def _get_language_description(self, transcript_strings):
|
||||||
description = '\n'.join(' - {transcript}'.format(transcript=transcript) for transcript in transcript_strings)
|
description = "\n".join(
|
||||||
return description if description else 'None'
|
" - {transcript}".format(transcript=transcript)
|
||||||
|
for transcript in transcript_strings
|
||||||
|
)
|
||||||
|
return description if description else "None"
|
||||||
|
|
||||||
|
|
||||||
class Transcript(object):
|
class Transcript(object):
|
||||||
def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):
|
def __init__(
|
||||||
|
self,
|
||||||
|
http_client,
|
||||||
|
video_id,
|
||||||
|
url,
|
||||||
|
language,
|
||||||
|
language_code,
|
||||||
|
is_generated,
|
||||||
|
translation_languages,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
You probably don't want to initialize this directly. Usually you'll access Transcript objects using a
|
You probably don't want to initialize this directly. Usually you'll access Transcript objects using a
|
||||||
TranscriptList.
|
TranscriptList.
|
||||||
|
@ -276,7 +305,7 @@ class Transcript(object):
|
||||||
self.is_generated = is_generated
|
self.is_generated = is_generated
|
||||||
self.translation_languages = translation_languages
|
self.translation_languages = translation_languages
|
||||||
self._translation_languages_dict = {
|
self._translation_languages_dict = {
|
||||||
translation_language['language_code']: translation_language['language']
|
translation_language["language_code"]: translation_language["language"]
|
||||||
for translation_language in translation_languages
|
for translation_language in translation_languages
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,7 +317,9 @@ class Transcript(object):
|
||||||
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
||||||
:rtype [{'text': str, 'start': float, 'end': float}]:
|
:rtype [{'text': str, 'start': float, 'end': float}]:
|
||||||
"""
|
"""
|
||||||
response = self._http_client.get(self._url, headers={'Accept-Language': 'en-US'})
|
response = self._http_client.get(
|
||||||
|
self._url, headers={"Accept-Language": "en-US"}
|
||||||
|
)
|
||||||
return _TranscriptParser(preserve_formatting=preserve_formatting).parse(
|
return _TranscriptParser(preserve_formatting=preserve_formatting).parse(
|
||||||
_raise_http_errors(response, self.video_id).text,
|
_raise_http_errors(response, self.video_id).text,
|
||||||
)
|
)
|
||||||
|
@ -297,7 +328,7 @@ class Transcript(object):
|
||||||
return '{language_code} ("{language}"){translation_description}'.format(
|
return '{language_code} ("{language}"){translation_description}'.format(
|
||||||
language=self.language,
|
language=self.language,
|
||||||
language_code=self.language_code,
|
language_code=self.language_code,
|
||||||
translation_description='[TRANSLATABLE]' if self.is_translatable else ''
|
translation_description="[TRANSLATABLE]" if self.is_translatable else "",
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -314,7 +345,9 @@ class Transcript(object):
|
||||||
return Transcript(
|
return Transcript(
|
||||||
self._http_client,
|
self._http_client,
|
||||||
self.video_id,
|
self.video_id,
|
||||||
'{url}&tlang={language_code}'.format(url=self._url, language_code=language_code),
|
"{url}&tlang={language_code}".format(
|
||||||
|
url=self._url, language_code=language_code
|
||||||
|
),
|
||||||
self._translation_languages_dict[language_code],
|
self._translation_languages_dict[language_code],
|
||||||
language_code,
|
language_code,
|
||||||
True,
|
True,
|
||||||
|
@ -324,16 +357,16 @@ class Transcript(object):
|
||||||
|
|
||||||
class _TranscriptParser(object):
|
class _TranscriptParser(object):
|
||||||
_FORMATTING_TAGS = [
|
_FORMATTING_TAGS = [
|
||||||
'strong', # important
|
"strong", # important
|
||||||
'em', # emphasized
|
"em", # emphasized
|
||||||
'b', # bold
|
"b", # bold
|
||||||
'i', # italic
|
"i", # italic
|
||||||
'mark', # marked
|
"mark", # marked
|
||||||
'small', # smaller
|
"small", # smaller
|
||||||
'del', # deleted
|
"del", # deleted
|
||||||
'ins', # inserted
|
"ins", # inserted
|
||||||
'sub', # subscript
|
"sub", # subscript
|
||||||
'sup', # superscript
|
"sup", # superscript
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, preserve_formatting=False):
|
def __init__(self, preserve_formatting=False):
|
||||||
|
@ -341,19 +374,19 @@ class _TranscriptParser(object):
|
||||||
|
|
||||||
def _get_html_regex(self, preserve_formatting):
|
def _get_html_regex(self, preserve_formatting):
|
||||||
if preserve_formatting:
|
if preserve_formatting:
|
||||||
formats_regex = '|'.join(self._FORMATTING_TAGS)
|
formats_regex = "|".join(self._FORMATTING_TAGS)
|
||||||
formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>'
|
formats_regex = r"<\/?(?!\/?(" + formats_regex + r")\b).*?\b>"
|
||||||
html_regex = re.compile(formats_regex, re.IGNORECASE)
|
html_regex = re.compile(formats_regex, re.IGNORECASE)
|
||||||
else:
|
else:
|
||||||
html_regex = re.compile(r'<[^>]*>', re.IGNORECASE)
|
html_regex = re.compile(r"<[^>]*>", re.IGNORECASE)
|
||||||
return html_regex
|
return html_regex
|
||||||
|
|
||||||
def parse(self, plain_data):
|
def parse(self, plain_data):
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
'text': re.sub(self._html_regex, '', unescape(xml_element.text)),
|
"text": re.sub(self._html_regex, "", unescape(xml_element.text)),
|
||||||
'start': float(xml_element.attrib['start']),
|
"start": float(xml_element.attrib["start"]),
|
||||||
'duration': float(xml_element.attrib.get('dur', '0.0')),
|
"duration": float(xml_element.attrib.get("dur", "0.0")),
|
||||||
}
|
}
|
||||||
for xml_element in ElementTree.fromstring(plain_data)
|
for xml_element in ElementTree.fromstring(plain_data)
|
||||||
if xml_element.text is not None
|
if xml_element.text is not None
|
||||||
|
|
|
@ -12,12 +12,16 @@ class Formatter(object):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def format_transcript(self, transcript, **kwargs):
|
def format_transcript(self, transcript, **kwargs):
|
||||||
raise NotImplementedError('A subclass of Formatter must implement ' \
|
raise NotImplementedError(
|
||||||
'their own .format_transcript() method.')
|
"A subclass of Formatter must implement "
|
||||||
|
"their own .format_transcript() method."
|
||||||
|
)
|
||||||
|
|
||||||
def format_transcripts(self, transcripts, **kwargs):
|
def format_transcripts(self, transcripts, **kwargs):
|
||||||
raise NotImplementedError('A subclass of Formatter must implement ' \
|
raise NotImplementedError(
|
||||||
'their own .format_transcripts() method.')
|
"A subclass of Formatter must implement "
|
||||||
|
"their own .format_transcripts() method."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PrettyPrintFormatter(Formatter):
|
class PrettyPrintFormatter(Formatter):
|
||||||
|
@ -68,7 +72,7 @@ class TextFormatter(Formatter):
|
||||||
:return: all transcript text lines separated by newline breaks.'
|
:return: all transcript text lines separated by newline breaks.'
|
||||||
:rtype str
|
:rtype str
|
||||||
"""
|
"""
|
||||||
return '\n'.join(line['text'] for line in transcript)
|
return "\n".join(line["text"] for line in transcript)
|
||||||
|
|
||||||
def format_transcripts(self, transcripts, **kwargs):
|
def format_transcripts(self, transcripts, **kwargs):
|
||||||
"""Converts a list of transcripts into plain text with no timestamps.
|
"""Converts a list of transcripts into plain text with no timestamps.
|
||||||
|
@ -77,20 +81,29 @@ class TextFormatter(Formatter):
|
||||||
:return: all transcript text lines separated by newline breaks.'
|
:return: all transcript text lines separated by newline breaks.'
|
||||||
:rtype str
|
:rtype str
|
||||||
"""
|
"""
|
||||||
return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts])
|
return "\n\n\n".join(
|
||||||
|
[self.format_transcript(transcript, **kwargs) for transcript in transcripts]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class _TextBasedFormatter(TextFormatter):
|
class _TextBasedFormatter(TextFormatter):
|
||||||
def _format_timestamp(self, hours, mins, secs, ms):
|
def _format_timestamp(self, hours, mins, secs, ms):
|
||||||
raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \
|
raise NotImplementedError(
|
||||||
'their own .format_timestamp() method.')
|
"A subclass of _TextBasedFormatter must implement "
|
||||||
|
"their own .format_timestamp() method."
|
||||||
|
)
|
||||||
|
|
||||||
def _format_transcript_header(self, lines):
|
def _format_transcript_header(self, lines):
|
||||||
raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \
|
raise NotImplementedError(
|
||||||
'their own _format_transcript_header method.')
|
"A subclass of _TextBasedFormatter must implement "
|
||||||
|
"their own _format_transcript_header method."
|
||||||
|
)
|
||||||
|
|
||||||
def _format_transcript_helper(self, i, time_text, line):
|
def _format_transcript_helper(self, i, time_text, line):
|
||||||
raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \
|
raise NotImplementedError(
|
||||||
'their own _format_transcript_helper method.')
|
"A subclass of _TextBasedFormatter must implement "
|
||||||
|
"their own _format_transcript_helper method."
|
||||||
|
)
|
||||||
|
|
||||||
def _seconds_to_timestamp(self, time):
|
def _seconds_to_timestamp(self, time):
|
||||||
"""Helper that converts `time` into a transcript cue timestamp.
|
"""Helper that converts `time` into a transcript cue timestamp.
|
||||||
|
@ -109,7 +122,7 @@ class _TextBasedFormatter(TextFormatter):
|
||||||
hours_float, remainder = divmod(time, 3600)
|
hours_float, remainder = divmod(time, 3600)
|
||||||
mins_float, secs_float = divmod(remainder, 60)
|
mins_float, secs_float = divmod(remainder, 60)
|
||||||
hours, mins, secs = int(hours_float), int(mins_float), int(secs_float)
|
hours, mins, secs = int(hours_float), int(mins_float), int(secs_float)
|
||||||
ms = int(round((time - int(time))*1000, 2))
|
ms = int(round((time - int(time)) * 1000, 2))
|
||||||
return self._format_timestamp(hours, mins, secs, ms)
|
return self._format_timestamp(hours, mins, secs, ms)
|
||||||
|
|
||||||
def format_transcript(self, transcript, **kwargs):
|
def format_transcript(self, transcript, **kwargs):
|
||||||
|
@ -122,13 +135,14 @@ class _TextBasedFormatter(TextFormatter):
|
||||||
"""
|
"""
|
||||||
lines = []
|
lines = []
|
||||||
for i, line in enumerate(transcript):
|
for i, line in enumerate(transcript):
|
||||||
end = line['start'] + line['duration']
|
end = line["start"] + line["duration"]
|
||||||
time_text = "{} --> {}".format(
|
time_text = "{} --> {}".format(
|
||||||
self._seconds_to_timestamp(line['start']),
|
self._seconds_to_timestamp(line["start"]),
|
||||||
self._seconds_to_timestamp(
|
self._seconds_to_timestamp(
|
||||||
transcript[i + 1]['start']
|
transcript[i + 1]["start"]
|
||||||
if i < len(transcript) - 1 and transcript[i + 1]['start'] < end else end
|
if i < len(transcript) - 1 and transcript[i + 1]["start"] < end
|
||||||
)
|
else end
|
||||||
|
),
|
||||||
)
|
)
|
||||||
lines.append(self._format_transcript_helper(i, time_text, line))
|
lines.append(self._format_transcript_helper(i, time_text, line))
|
||||||
|
|
||||||
|
@ -143,7 +157,7 @@ class SRTFormatter(_TextBasedFormatter):
|
||||||
return "\n\n".join(lines) + "\n"
|
return "\n\n".join(lines) + "\n"
|
||||||
|
|
||||||
def _format_transcript_helper(self, i, time_text, line):
|
def _format_transcript_helper(self, i, time_text, line):
|
||||||
return "{}\n{}\n{}".format(i + 1, time_text, line['text'])
|
return "{}\n{}\n{}".format(i + 1, time_text, line["text"])
|
||||||
|
|
||||||
|
|
||||||
class WebVTTFormatter(_TextBasedFormatter):
|
class WebVTTFormatter(_TextBasedFormatter):
|
||||||
|
@ -154,29 +168,29 @@ class WebVTTFormatter(_TextBasedFormatter):
|
||||||
return "WEBVTT\n\n" + "\n\n".join(lines) + "\n"
|
return "WEBVTT\n\n" + "\n\n".join(lines) + "\n"
|
||||||
|
|
||||||
def _format_transcript_helper(self, i, time_text, line):
|
def _format_transcript_helper(self, i, time_text, line):
|
||||||
return "{}\n{}".format(time_text, line['text'])
|
return "{}\n{}".format(time_text, line["text"])
|
||||||
|
|
||||||
|
|
||||||
class FormatterLoader(object):
|
class FormatterLoader(object):
|
||||||
TYPES = {
|
TYPES = {
|
||||||
'json': JSONFormatter,
|
"json": JSONFormatter,
|
||||||
'pretty': PrettyPrintFormatter,
|
"pretty": PrettyPrintFormatter,
|
||||||
'text': TextFormatter,
|
"text": TextFormatter,
|
||||||
'webvtt': WebVTTFormatter,
|
"webvtt": WebVTTFormatter,
|
||||||
'srt' : SRTFormatter,
|
"srt": SRTFormatter,
|
||||||
}
|
}
|
||||||
|
|
||||||
class UnknownFormatterType(Exception):
|
class UnknownFormatterType(Exception):
|
||||||
def __init__(self, formatter_type):
|
def __init__(self, formatter_type):
|
||||||
super(FormatterLoader.UnknownFormatterType, self).__init__(
|
super(FormatterLoader.UnknownFormatterType, self).__init__(
|
||||||
'The format \'{formatter_type}\' is not supported. '
|
"The format '{formatter_type}' is not supported. "
|
||||||
'Choose one of the following formats: {supported_formatter_types}'.format(
|
"Choose one of the following formats: {supported_formatter_types}".format(
|
||||||
formatter_type=formatter_type,
|
formatter_type=formatter_type,
|
||||||
supported_formatter_types=', '.join(FormatterLoader.TYPES.keys()),
|
supported_formatter_types=", ".join(FormatterLoader.TYPES.keys()),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def load(self, formatter_type='pretty'):
|
def load(self, formatter_type="pretty"):
|
||||||
"""
|
"""
|
||||||
Loads the Formatter for the given formatter type.
|
Loads the Formatter for the given formatter type.
|
||||||
|
|
||||||
|
|
|
@ -25,8 +25,9 @@ from youtube_transcript_api import (
|
||||||
|
|
||||||
|
|
||||||
def load_asset(filename):
|
def load_asset(filename):
|
||||||
filepath = '{dirname}/assets/{filename}'.format(
|
filepath = "{dirname}/assets/{filename}".format(
|
||||||
dirname=os.path.dirname(__file__), filename=filename)
|
dirname=os.path.dirname(__file__), filename=filename
|
||||||
|
)
|
||||||
|
|
||||||
with open(filepath, mode="rb") as file:
|
with open(filepath, mode="rb") as file:
|
||||||
return file.read()
|
return file.read()
|
||||||
|
@ -37,13 +38,13 @@ class TestYouTubeTranscriptApi(TestCase):
|
||||||
httpretty.enable()
|
httpretty.enable()
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube.html.static')
|
body=load_asset("youtube.html.static"),
|
||||||
)
|
)
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/api/timedtext',
|
"https://www.youtube.com/api/timedtext",
|
||||||
body=load_asset('transcript.xml.static')
|
body=load_asset("transcript.xml.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
|
@ -51,306 +52,362 @@ class TestYouTubeTranscriptApi(TestCase):
|
||||||
httpretty.disable()
|
httpretty.disable()
|
||||||
|
|
||||||
def test_get_transcript(self):
|
def test_get_transcript(self):
|
||||||
transcript = YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8')
|
transcript = YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8")
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
transcript,
|
transcript,
|
||||||
[
|
[
|
||||||
{'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
|
{"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
|
||||||
{'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16},
|
{
|
||||||
{'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
|
"text": "this is not the original transcript",
|
||||||
]
|
"start": 1.54,
|
||||||
|
"duration": 4.16,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "just something shorter, I made up for testing",
|
||||||
|
"start": 5.7,
|
||||||
|
"duration": 3.239,
|
||||||
|
},
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_get_transcript_formatted(self):
|
def test_get_transcript_formatted(self):
|
||||||
transcript = YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', preserve_formatting=True)
|
transcript = YouTubeTranscriptApi.get_transcript(
|
||||||
|
"GJLlxj_dtq8", preserve_formatting=True
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
transcript,
|
transcript,
|
||||||
[
|
[
|
||||||
{'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
|
{"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
|
||||||
{'text': 'this is <i>not</i> the original transcript', 'start': 1.54, 'duration': 4.16},
|
{
|
||||||
{'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
|
"text": "this is <i>not</i> the original transcript",
|
||||||
]
|
"start": 1.54,
|
||||||
|
"duration": 4.16,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "just something shorter, I made up for testing",
|
||||||
|
"start": 5.7,
|
||||||
|
"duration": 3.239,
|
||||||
|
},
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_list_transcripts(self):
|
def test_list_transcripts(self):
|
||||||
transcript_list = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8')
|
transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8")
|
||||||
|
|
||||||
language_codes = {transcript.language_code for transcript in transcript_list}
|
language_codes = {transcript.language_code for transcript in transcript_list}
|
||||||
|
|
||||||
self.assertEqual(language_codes, {'zh', 'de', 'en', 'hi', 'ja', 'ko', 'es', 'cs', 'en'})
|
self.assertEqual(
|
||||||
|
language_codes, {"zh", "de", "en", "hi", "ja", "ko", "es", "cs", "en"}
|
||||||
|
)
|
||||||
|
|
||||||
def test_list_transcripts__find_manually_created(self):
|
def test_list_transcripts__find_manually_created(self):
|
||||||
transcript_list = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8')
|
transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8")
|
||||||
transcript = transcript_list.find_manually_created_transcript(['cs'])
|
transcript = transcript_list.find_manually_created_transcript(["cs"])
|
||||||
|
|
||||||
self.assertFalse(transcript.is_generated)
|
self.assertFalse(transcript.is_generated)
|
||||||
|
|
||||||
|
|
||||||
def test_list_transcripts__find_generated(self):
|
def test_list_transcripts__find_generated(self):
|
||||||
transcript_list = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8')
|
transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8")
|
||||||
|
|
||||||
with self.assertRaises(NoTranscriptFound):
|
with self.assertRaises(NoTranscriptFound):
|
||||||
transcript_list.find_generated_transcript(['cs'])
|
transcript_list.find_generated_transcript(["cs"])
|
||||||
|
|
||||||
transcript = transcript_list.find_generated_transcript(['en'])
|
transcript = transcript_list.find_generated_transcript(["en"])
|
||||||
|
|
||||||
self.assertTrue(transcript.is_generated)
|
self.assertTrue(transcript.is_generated)
|
||||||
|
|
||||||
def test_list_transcripts__url_as_video_id(self):
|
def test_list_transcripts__url_as_video_id(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_transcripts_disabled.html.static')
|
body=load_asset("youtube_transcripts_disabled.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(InvalidVideoId):
|
with self.assertRaises(InvalidVideoId):
|
||||||
YouTubeTranscriptApi.list_transcripts('https://www.youtube.com/watch?v=GJLlxj_dtq8')
|
YouTubeTranscriptApi.list_transcripts(
|
||||||
|
"https://www.youtube.com/watch?v=GJLlxj_dtq8"
|
||||||
|
)
|
||||||
|
|
||||||
def test_list_transcripts__no_translation_languages_provided(self):
|
def test_list_transcripts__no_translation_languages_provided(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_no_translation_languages.html.static')
|
body=load_asset("youtube_no_translation_languages.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
transcript_list = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8')
|
transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8")
|
||||||
for transcript in transcript_list:
|
for transcript in transcript_list:
|
||||||
self.assertEqual(len(transcript.translation_languages), 0)
|
self.assertEqual(len(transcript.translation_languages), 0)
|
||||||
|
|
||||||
|
|
||||||
def test_translate_transcript(self):
|
def test_translate_transcript(self):
|
||||||
transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])
|
transcript = YouTubeTranscriptApi.list_transcripts(
|
||||||
|
"GJLlxj_dtq8"
|
||||||
|
).find_transcript(["en"])
|
||||||
|
|
||||||
translated_transcript = transcript.translate('af')
|
translated_transcript = transcript.translate("af")
|
||||||
|
|
||||||
self.assertEqual(translated_transcript.language_code, 'af')
|
self.assertEqual(translated_transcript.language_code, "af")
|
||||||
self.assertIn('&tlang=af', translated_transcript._url)
|
self.assertIn("&tlang=af", translated_transcript._url)
|
||||||
|
|
||||||
def test_translate_transcript__translation_language_not_available(self):
|
def test_translate_transcript__translation_language_not_available(self):
|
||||||
transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])
|
transcript = YouTubeTranscriptApi.list_transcripts(
|
||||||
|
"GJLlxj_dtq8"
|
||||||
|
).find_transcript(["en"])
|
||||||
|
|
||||||
with self.assertRaises(TranslationLanguageNotAvailable):
|
with self.assertRaises(TranslationLanguageNotAvailable):
|
||||||
transcript.translate('xyz')
|
transcript.translate("xyz")
|
||||||
|
|
||||||
def test_translate_transcript__not_translatable(self):
|
def test_translate_transcript__not_translatable(self):
|
||||||
transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])
|
transcript = YouTubeTranscriptApi.list_transcripts(
|
||||||
|
"GJLlxj_dtq8"
|
||||||
|
).find_transcript(["en"])
|
||||||
transcript.translation_languages = []
|
transcript.translation_languages = []
|
||||||
|
|
||||||
with self.assertRaises(NotTranslatable):
|
with self.assertRaises(NotTranslatable):
|
||||||
transcript.translate('af')
|
transcript.translate("af")
|
||||||
|
|
||||||
def test_get_transcript__correct_language_is_used(self):
|
def test_get_transcript__correct_language_is_used(self):
|
||||||
YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', ['de', 'en'])
|
YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", ["de", "en"])
|
||||||
query_string = httpretty.last_request().querystring
|
query_string = httpretty.last_request().querystring
|
||||||
|
|
||||||
self.assertIn('lang', query_string)
|
self.assertIn("lang", query_string)
|
||||||
self.assertEqual(len(query_string['lang']), 1)
|
self.assertEqual(len(query_string["lang"]), 1)
|
||||||
self.assertEqual(query_string['lang'][0], 'de')
|
self.assertEqual(query_string["lang"][0], "de")
|
||||||
|
|
||||||
def test_get_transcript__fallback_language_is_used(self):
|
def test_get_transcript__fallback_language_is_used(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_ww1_nl_en.html.static')
|
body=load_asset("youtube_ww1_nl_en.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcript('F1xioXWb8CY', ['de', 'en'])
|
YouTubeTranscriptApi.get_transcript("F1xioXWb8CY", ["de", "en"])
|
||||||
query_string = httpretty.last_request().querystring
|
query_string = httpretty.last_request().querystring
|
||||||
|
|
||||||
self.assertIn('lang', query_string)
|
self.assertIn("lang", query_string)
|
||||||
self.assertEqual(len(query_string['lang']), 1)
|
self.assertEqual(len(query_string["lang"]), 1)
|
||||||
self.assertEqual(query_string['lang'][0], 'en')
|
self.assertEqual(query_string["lang"][0], "en")
|
||||||
|
|
||||||
def test_get_transcript__create_consent_cookie_if_needed(self):
|
def test_get_transcript__create_consent_cookie_if_needed(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_consent_page.html.static')
|
body=load_asset("youtube_consent_page.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcript('F1xioXWb8CY')
|
YouTubeTranscriptApi.get_transcript("F1xioXWb8CY")
|
||||||
self.assertEqual(len(httpretty.latest_requests()), 3)
|
self.assertEqual(len(httpretty.latest_requests()), 3)
|
||||||
for request in httpretty.latest_requests()[1:]:
|
for request in httpretty.latest_requests()[1:]:
|
||||||
self.assertEqual(request.headers['cookie'], 'CONSENT=YES+cb.20210328-17-p0.de+FX+119')
|
self.assertEqual(
|
||||||
|
request.headers["cookie"], "CONSENT=YES+cb.20210328-17-p0.de+FX+119"
|
||||||
|
)
|
||||||
|
|
||||||
def test_get_transcript__exception_if_create_consent_cookie_failed(self):
|
def test_get_transcript__exception_if_create_consent_cookie_failed(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_consent_page.html.static')
|
body=load_asset("youtube_consent_page.html.static"),
|
||||||
)
|
)
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_consent_page.html.static')
|
body=load_asset("youtube_consent_page.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(FailedToCreateConsentCookie):
|
with self.assertRaises(FailedToCreateConsentCookie):
|
||||||
YouTubeTranscriptApi.get_transcript('F1xioXWb8CY')
|
YouTubeTranscriptApi.get_transcript("F1xioXWb8CY")
|
||||||
|
|
||||||
def test_get_transcript__exception_if_consent_cookie_age_invalid(self):
|
def test_get_transcript__exception_if_consent_cookie_age_invalid(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_consent_page_invalid.html.static')
|
body=load_asset("youtube_consent_page_invalid.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(FailedToCreateConsentCookie):
|
with self.assertRaises(FailedToCreateConsentCookie):
|
||||||
YouTubeTranscriptApi.get_transcript('F1xioXWb8CY')
|
YouTubeTranscriptApi.get_transcript("F1xioXWb8CY")
|
||||||
|
|
||||||
def test_get_transcript__exception_if_video_unavailable(self):
|
def test_get_transcript__exception_if_video_unavailable(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_video_unavailable.html.static')
|
body=load_asset("youtube_video_unavailable.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(VideoUnavailable):
|
with self.assertRaises(VideoUnavailable):
|
||||||
YouTubeTranscriptApi.get_transcript('abc')
|
YouTubeTranscriptApi.get_transcript("abc")
|
||||||
|
|
||||||
def test_get_transcript__exception_if_youtube_request_fails(self):
|
def test_get_transcript__exception_if_youtube_request_fails(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET, "https://www.youtube.com/watch", status=500
|
||||||
'https://www.youtube.com/watch',
|
|
||||||
status=500
|
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(YouTubeRequestFailed):
|
with self.assertRaises(YouTubeRequestFailed):
|
||||||
YouTubeTranscriptApi.get_transcript('abc')
|
YouTubeTranscriptApi.get_transcript("abc")
|
||||||
|
|
||||||
def test_get_transcript__exception_if_youtube_request_limit_reached(self):
|
def test_get_transcript__exception_if_youtube_request_limit_reached(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_too_many_requests.html.static')
|
body=load_asset("youtube_too_many_requests.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(TooManyRequests):
|
with self.assertRaises(TooManyRequests):
|
||||||
YouTubeTranscriptApi.get_transcript('abc')
|
YouTubeTranscriptApi.get_transcript("abc")
|
||||||
|
|
||||||
def test_get_transcript__exception_if_transcripts_disabled(self):
|
def test_get_transcript__exception_if_transcripts_disabled(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_transcripts_disabled.html.static')
|
body=load_asset("youtube_transcripts_disabled.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(TranscriptsDisabled):
|
with self.assertRaises(TranscriptsDisabled):
|
||||||
YouTubeTranscriptApi.get_transcript('dsMFmonKDD4')
|
YouTubeTranscriptApi.get_transcript("dsMFmonKDD4")
|
||||||
|
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_transcripts_disabled2.html.static')
|
body=load_asset("youtube_transcripts_disabled2.html.static"),
|
||||||
)
|
)
|
||||||
with self.assertRaises(TranscriptsDisabled):
|
with self.assertRaises(TranscriptsDisabled):
|
||||||
YouTubeTranscriptApi.get_transcript('Fjg5lYqvzUs')
|
YouTubeTranscriptApi.get_transcript("Fjg5lYqvzUs")
|
||||||
|
|
||||||
def test_get_transcript__exception_if_language_unavailable(self):
|
def test_get_transcript__exception_if_language_unavailable(self):
|
||||||
with self.assertRaises(NoTranscriptFound):
|
with self.assertRaises(NoTranscriptFound):
|
||||||
YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', languages=['cz'])
|
YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", languages=["cz"])
|
||||||
|
|
||||||
def test_get_transcript__exception_if_no_transcript_available(self):
|
def test_get_transcript__exception_if_no_transcript_available(self):
|
||||||
httpretty.register_uri(
|
httpretty.register_uri(
|
||||||
httpretty.GET,
|
httpretty.GET,
|
||||||
'https://www.youtube.com/watch',
|
"https://www.youtube.com/watch",
|
||||||
body=load_asset('youtube_no_transcript_available.html.static')
|
body=load_asset("youtube_no_transcript_available.html.static"),
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(NoTranscriptAvailable):
|
with self.assertRaises(NoTranscriptAvailable):
|
||||||
YouTubeTranscriptApi.get_transcript('MwBPvcYFY2E')
|
YouTubeTranscriptApi.get_transcript("MwBPvcYFY2E")
|
||||||
|
|
||||||
def test_get_transcript__with_proxy(self):
|
def test_get_transcript__with_proxy(self):
|
||||||
proxies = {'http': '', 'https:': ''}
|
proxies = {"http": "", "https:": ""}
|
||||||
transcript = YouTubeTranscriptApi.get_transcript(
|
transcript = YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", proxies=proxies)
|
||||||
'GJLlxj_dtq8', proxies=proxies
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
transcript,
|
transcript,
|
||||||
[
|
[
|
||||||
{'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
|
{"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
|
||||||
{'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16},
|
{
|
||||||
{'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
|
"text": "this is not the original transcript",
|
||||||
]
|
"start": 1.54,
|
||||||
|
"duration": 4.16,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "just something shorter, I made up for testing",
|
||||||
|
"start": 5.7,
|
||||||
|
"duration": 3.239,
|
||||||
|
},
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_get_transcript__with_cookies(self):
|
def test_get_transcript__with_cookies(self):
|
||||||
dirname, filename = os.path.split(os.path.abspath(__file__))
|
dirname, filename = os.path.split(os.path.abspath(__file__))
|
||||||
cookies = dirname + '/example_cookies.txt'
|
cookies = dirname + "/example_cookies.txt"
|
||||||
transcript = YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', cookies=cookies)
|
transcript = YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", cookies=cookies)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
transcript,
|
transcript,
|
||||||
[
|
[
|
||||||
{'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
|
{"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
|
||||||
{'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16},
|
{
|
||||||
{'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
|
"text": "this is not the original transcript",
|
||||||
]
|
"start": 1.54,
|
||||||
|
"duration": 4.16,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "just something shorter, I made up for testing",
|
||||||
|
"start": 5.7,
|
||||||
|
"duration": 3.239,
|
||||||
|
},
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_get_transcript__assertionerror_if_input_not_string(self):
|
def test_get_transcript__assertionerror_if_input_not_string(self):
|
||||||
with self.assertRaises(AssertionError):
|
with self.assertRaises(AssertionError):
|
||||||
YouTubeTranscriptApi.get_transcript(['video_id_1', 'video_id_2'])
|
YouTubeTranscriptApi.get_transcript(["video_id_1", "video_id_2"])
|
||||||
|
|
||||||
def test_get_transcripts__assertionerror_if_input_not_list(self):
|
def test_get_transcripts__assertionerror_if_input_not_list(self):
|
||||||
with self.assertRaises(AssertionError):
|
with self.assertRaises(AssertionError):
|
||||||
YouTubeTranscriptApi.get_transcripts('video_id_1')
|
YouTubeTranscriptApi.get_transcripts("video_id_1")
|
||||||
|
|
||||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
@patch("youtube_transcript_api.YouTubeTranscriptApi.get_transcript")
|
||||||
def test_get_transcripts(self, mock_get_transcript):
|
def test_get_transcripts(self, mock_get_transcript):
|
||||||
video_id_1 = 'video_id_1'
|
video_id_1 = "video_id_1"
|
||||||
video_id_2 = 'video_id_2'
|
video_id_2 = "video_id_2"
|
||||||
languages = ['de', 'en']
|
languages = ["de", "en"]
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
|
YouTubeTranscriptApi.get_transcripts(
|
||||||
|
[video_id_1, video_id_2], languages=languages
|
||||||
|
)
|
||||||
|
|
||||||
mock_get_transcript.assert_any_call(video_id_1, languages, None, None, False)
|
mock_get_transcript.assert_any_call(video_id_1, languages, None, None, False)
|
||||||
mock_get_transcript.assert_any_call(video_id_2, languages, None, None, False)
|
mock_get_transcript.assert_any_call(video_id_2, languages, None, None, False)
|
||||||
self.assertEqual(mock_get_transcript.call_count, 2)
|
self.assertEqual(mock_get_transcript.call_count, 2)
|
||||||
|
|
||||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error'))
|
@patch(
|
||||||
|
"youtube_transcript_api.YouTubeTranscriptApi.get_transcript",
|
||||||
|
side_effect=Exception("Error"),
|
||||||
|
)
|
||||||
def test_get_transcripts__stop_on_error(self, mock_get_transcript):
|
def test_get_transcripts__stop_on_error(self, mock_get_transcript):
|
||||||
with self.assertRaises(Exception):
|
with self.assertRaises(Exception):
|
||||||
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'])
|
YouTubeTranscriptApi.get_transcripts(["video_id_1", "video_id_2"])
|
||||||
|
|
||||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error'))
|
@patch(
|
||||||
|
"youtube_transcript_api.YouTubeTranscriptApi.get_transcript",
|
||||||
|
side_effect=Exception("Error"),
|
||||||
|
)
|
||||||
def test_get_transcripts__continue_on_error(self, mock_get_transcript):
|
def test_get_transcripts__continue_on_error(self, mock_get_transcript):
|
||||||
video_id_1 = 'video_id_1'
|
video_id_1 = "video_id_1"
|
||||||
video_id_2 = 'video_id_2'
|
video_id_2 = "video_id_2"
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
|
YouTubeTranscriptApi.get_transcripts(
|
||||||
|
["video_id_1", "video_id_2"], continue_after_error=True
|
||||||
|
)
|
||||||
|
|
||||||
mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None, False)
|
mock_get_transcript.assert_any_call(video_id_1, ("en",), None, None, False)
|
||||||
mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None, False)
|
mock_get_transcript.assert_any_call(video_id_2, ("en",), None, None, False)
|
||||||
|
|
||||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
@patch("youtube_transcript_api.YouTubeTranscriptApi.get_transcript")
|
||||||
def test_get_transcripts__with_cookies(self, mock_get_transcript):
|
def test_get_transcripts__with_cookies(self, mock_get_transcript):
|
||||||
cookies = '/example_cookies.txt'
|
cookies = "/example_cookies.txt"
|
||||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
|
YouTubeTranscriptApi.get_transcripts(["GJLlxj_dtq8"], cookies=cookies)
|
||||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies, False)
|
mock_get_transcript.assert_any_call(
|
||||||
|
"GJLlxj_dtq8", ("en",), None, cookies, False
|
||||||
|
)
|
||||||
|
|
||||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
@patch("youtube_transcript_api.YouTubeTranscriptApi.get_transcript")
|
||||||
def test_get_transcripts__with_proxies(self, mock_get_transcript):
|
def test_get_transcripts__with_proxies(self, mock_get_transcript):
|
||||||
proxies = {'http': '', 'https:': ''}
|
proxies = {"http": "", "https:": ""}
|
||||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
|
YouTubeTranscriptApi.get_transcripts(["GJLlxj_dtq8"], proxies=proxies)
|
||||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None, False)
|
mock_get_transcript.assert_any_call(
|
||||||
|
"GJLlxj_dtq8", ("en",), proxies, None, False
|
||||||
|
)
|
||||||
|
|
||||||
def test_load_cookies(self):
|
def test_load_cookies(self):
|
||||||
dirname, filename = os.path.split(os.path.abspath(__file__))
|
dirname, filename = os.path.split(os.path.abspath(__file__))
|
||||||
cookies = dirname + '/example_cookies.txt'
|
cookies = dirname + "/example_cookies.txt"
|
||||||
session_cookies = YouTubeTranscriptApi._load_cookies(cookies, 'GJLlxj_dtq8')
|
session_cookies = YouTubeTranscriptApi._load_cookies(cookies, "GJLlxj_dtq8")
|
||||||
self.assertEqual({'TEST_FIELD': 'TEST_VALUE'}, requests.utils.dict_from_cookiejar(session_cookies))
|
self.assertEqual(
|
||||||
|
{"TEST_FIELD": "TEST_VALUE"},
|
||||||
|
requests.utils.dict_from_cookiejar(session_cookies),
|
||||||
|
)
|
||||||
|
|
||||||
def test_load_cookies__bad_file_path(self):
|
def test_load_cookies__bad_file_path(self):
|
||||||
bad_cookies = 'nonexistent_cookies.txt'
|
bad_cookies = "nonexistent_cookies.txt"
|
||||||
with self.assertRaises(CookiePathInvalid):
|
with self.assertRaises(CookiePathInvalid):
|
||||||
YouTubeTranscriptApi._load_cookies(bad_cookies, 'GJLlxj_dtq8')
|
YouTubeTranscriptApi._load_cookies(bad_cookies, "GJLlxj_dtq8")
|
||||||
|
|
||||||
def test_load_cookies__no_valid_cookies(self):
|
def test_load_cookies__no_valid_cookies(self):
|
||||||
dirname, filename = os.path.split(os.path.abspath(__file__))
|
dirname, filename = os.path.split(os.path.abspath(__file__))
|
||||||
expired_cookies = dirname + '/expired_example_cookies.txt'
|
expired_cookies = dirname + "/expired_example_cookies.txt"
|
||||||
with self.assertRaises(CookiesInvalid):
|
with self.assertRaises(CookiesInvalid):
|
||||||
YouTubeTranscriptApi._load_cookies(expired_cookies, 'GJLlxj_dtq8')
|
YouTubeTranscriptApi._load_cookies(expired_cookies, "GJLlxj_dtq8")
|
||||||
|
|
|
@ -10,211 +10,269 @@ from youtube_transcript_api._cli import YouTubeTranscriptCli
|
||||||
class TestYouTubeTranscriptCli(TestCase):
|
class TestYouTubeTranscriptCli(TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.transcript_mock = MagicMock()
|
self.transcript_mock = MagicMock()
|
||||||
self.transcript_mock.fetch = MagicMock(return_value=[
|
self.transcript_mock.fetch = MagicMock(
|
||||||
{'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
|
return_value=[
|
||||||
{'text': 'this is <i>not</i> the original transcript', 'start': 1.54, 'duration': 4.16},
|
{"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
|
||||||
{'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
|
{
|
||||||
])
|
"text": "this is <i>not</i> the original transcript",
|
||||||
|
"start": 1.54,
|
||||||
|
"duration": 4.16,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "just something shorter, I made up for testing",
|
||||||
|
"start": 5.7,
|
||||||
|
"duration": 3.239,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
self.transcript_mock.translate = MagicMock(return_value=self.transcript_mock)
|
self.transcript_mock.translate = MagicMock(return_value=self.transcript_mock)
|
||||||
|
|
||||||
self.transcript_list_mock = MagicMock()
|
self.transcript_list_mock = MagicMock()
|
||||||
self.transcript_list_mock.find_generated_transcript = MagicMock(return_value=self.transcript_mock)
|
self.transcript_list_mock.find_generated_transcript = MagicMock(
|
||||||
self.transcript_list_mock.find_manually_created_transcript = MagicMock(return_value=self.transcript_mock)
|
return_value=self.transcript_mock
|
||||||
self.transcript_list_mock.find_transcript = MagicMock(return_value=self.transcript_mock)
|
)
|
||||||
|
self.transcript_list_mock.find_manually_created_transcript = MagicMock(
|
||||||
|
return_value=self.transcript_mock
|
||||||
|
)
|
||||||
|
self.transcript_list_mock.find_transcript = MagicMock(
|
||||||
|
return_value=self.transcript_mock
|
||||||
|
)
|
||||||
|
|
||||||
YouTubeTranscriptApi.list_transcripts = MagicMock(return_value=self.transcript_list_mock)
|
YouTubeTranscriptApi.list_transcripts = MagicMock(
|
||||||
|
return_value=self.transcript_list_mock
|
||||||
|
)
|
||||||
|
|
||||||
def test_argument_parsing(self):
|
def test_argument_parsing(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --format json --languages de en'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
"v1 v2 --format json --languages de en".split()
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertEqual(parsed_args.http_proxy, '')
|
self.assertEqual(parsed_args.format, "json")
|
||||||
self.assertEqual(parsed_args.https_proxy, '')
|
self.assertEqual(parsed_args.languages, ["de", "en"])
|
||||||
|
self.assertEqual(parsed_args.http_proxy, "")
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split())._parse_args()
|
self.assertEqual(parsed_args.https_proxy, "")
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
|
||||||
self.assertEqual(parsed_args.http_proxy, '')
|
|
||||||
self.assertEqual(parsed_args.https_proxy, '')
|
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(' --format json v1 v2 --languages de en'.split())._parse_args()
|
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
|
||||||
self.assertEqual(parsed_args.http_proxy, '')
|
|
||||||
self.assertEqual(parsed_args.https_proxy, '')
|
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli(
|
||||||
'v1 v2 --languages de en --format json '
|
"v1 v2 --languages de en --format json".split()
|
||||||
'--http-proxy http://user:pass@domain:port '
|
|
||||||
'--https-proxy https://user:pass@domain:port'.split()
|
|
||||||
)._parse_args()
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.format, "json")
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ["de", "en"])
|
||||||
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
self.assertEqual(parsed_args.http_proxy, "")
|
||||||
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
self.assertEqual(parsed_args.https_proxy, "")
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli(
|
||||||
'v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port'.split()
|
" --format json v1 v2 --languages de en".split()
|
||||||
)._parse_args()
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.format, "json")
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ["de", "en"])
|
||||||
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
self.assertEqual(parsed_args.http_proxy, "")
|
||||||
self.assertEqual(parsed_args.https_proxy, '')
|
self.assertEqual(parsed_args.https_proxy, "")
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli(
|
||||||
'v1 v2 --languages de en --format json --https-proxy https://user:pass@domain:port'.split()
|
"v1 v2 --languages de en --format json "
|
||||||
|
"--http-proxy http://user:pass@domain:port "
|
||||||
|
"--https-proxy https://user:pass@domain:port".split()
|
||||||
)._parse_args()
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.format, "json")
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ["de", "en"])
|
||||||
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
self.assertEqual(parsed_args.http_proxy, "http://user:pass@domain:port")
|
||||||
self.assertEqual(parsed_args.http_proxy, '')
|
self.assertEqual(parsed_args.https_proxy, "https://user:pass@domain:port")
|
||||||
|
|
||||||
|
parsed_args = YouTubeTranscriptCli(
|
||||||
|
"v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port".split()
|
||||||
|
)._parse_args()
|
||||||
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
|
self.assertEqual(parsed_args.format, "json")
|
||||||
|
self.assertEqual(parsed_args.languages, ["de", "en"])
|
||||||
|
self.assertEqual(parsed_args.http_proxy, "http://user:pass@domain:port")
|
||||||
|
self.assertEqual(parsed_args.https_proxy, "")
|
||||||
|
|
||||||
|
parsed_args = YouTubeTranscriptCli(
|
||||||
|
"v1 v2 --languages de en --format json --https-proxy https://user:pass@domain:port".split()
|
||||||
|
)._parse_args()
|
||||||
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
|
self.assertEqual(parsed_args.format, "json")
|
||||||
|
self.assertEqual(parsed_args.languages, ["de", "en"])
|
||||||
|
self.assertEqual(parsed_args.https_proxy, "https://user:pass@domain:port")
|
||||||
|
self.assertEqual(parsed_args.http_proxy, "")
|
||||||
|
|
||||||
def test_argument_parsing__only_video_ids(self):
|
def test_argument_parsing__only_video_ids(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli("v1 v2".split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertEqual(parsed_args.format, 'pretty')
|
self.assertEqual(parsed_args.format, "pretty")
|
||||||
self.assertEqual(parsed_args.languages, ['en'])
|
self.assertEqual(parsed_args.languages, ["en"])
|
||||||
|
|
||||||
def test_argument_parsing__video_ids_starting_with_dash(self):
|
def test_argument_parsing__video_ids_starting_with_dash(self):
|
||||||
parsed_args = YouTubeTranscriptCli('\-v1 \-\-v2 \--v3'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli("\-v1 \-\-v2 \--v3".split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['-v1', '--v2', '--v3'])
|
self.assertEqual(parsed_args.video_ids, ["-v1", "--v2", "--v3"])
|
||||||
self.assertEqual(parsed_args.format, 'pretty')
|
self.assertEqual(parsed_args.format, "pretty")
|
||||||
self.assertEqual(parsed_args.languages, ['en'])
|
self.assertEqual(parsed_args.languages, ["en"])
|
||||||
|
|
||||||
def test_argument_parsing__fail_without_video_ids(self):
|
def test_argument_parsing__fail_without_video_ids(self):
|
||||||
with self.assertRaises(SystemExit):
|
with self.assertRaises(SystemExit):
|
||||||
YouTubeTranscriptCli('--format json'.split())._parse_args()
|
YouTubeTranscriptCli("--format json".split())._parse_args()
|
||||||
|
|
||||||
def test_argument_parsing__json(self):
|
def test_argument_parsing__json(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --format json'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli("v1 v2 --format json".split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.format, "json")
|
||||||
self.assertEqual(parsed_args.languages, ['en'])
|
self.assertEqual(parsed_args.languages, ["en"])
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli('--format json v1 v2'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli("--format json v1 v2".split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.format, "json")
|
||||||
self.assertEqual(parsed_args.languages, ['en'])
|
self.assertEqual(parsed_args.languages, ["en"])
|
||||||
|
|
||||||
def test_argument_parsing__languages(self):
|
def test_argument_parsing__languages(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
"v1 v2 --languages de en".split()
|
||||||
self.assertEqual(parsed_args.format, 'pretty')
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
|
self.assertEqual(parsed_args.format, "pretty")
|
||||||
|
self.assertEqual(parsed_args.languages, ["de", "en"])
|
||||||
|
|
||||||
def test_argument_parsing__proxies(self):
|
def test_argument_parsing__proxies(self):
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli(
|
||||||
'v1 v2 --http-proxy http://user:pass@domain:port'.split()
|
"v1 v2 --http-proxy http://user:pass@domain:port".split()
|
||||||
)._parse_args()
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
self.assertEqual(parsed_args.http_proxy, "http://user:pass@domain:port")
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli(
|
||||||
'v1 v2 --https-proxy https://user:pass@domain:port'.split()
|
"v1 v2 --https-proxy https://user:pass@domain:port".split()
|
||||||
)._parse_args()
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
self.assertEqual(parsed_args.https_proxy, "https://user:pass@domain:port")
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli(
|
||||||
'v1 v2 --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port'.split()
|
"v1 v2 --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port".split()
|
||||||
)._parse_args()
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
self.assertEqual(parsed_args.http_proxy, "http://user:pass@domain:port")
|
||||||
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
self.assertEqual(parsed_args.https_proxy, "https://user:pass@domain:port")
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli("v1 v2".split())._parse_args()
|
||||||
'v1 v2'.split()
|
self.assertEqual(parsed_args.http_proxy, "")
|
||||||
)._parse_args()
|
self.assertEqual(parsed_args.https_proxy, "")
|
||||||
self.assertEqual(parsed_args.http_proxy, '')
|
|
||||||
self.assertEqual(parsed_args.https_proxy, '')
|
|
||||||
|
|
||||||
def test_argument_parsing__list_transcripts(self):
|
def test_argument_parsing__list_transcripts(self):
|
||||||
parsed_args = YouTubeTranscriptCli('--list-transcripts v1 v2'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
"--list-transcripts v1 v2".split()
|
||||||
|
)._parse_args()
|
||||||
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertTrue(parsed_args.list_transcripts)
|
self.assertTrue(parsed_args.list_transcripts)
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --list-transcripts'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
"v1 v2 --list-transcripts".split()
|
||||||
|
)._parse_args()
|
||||||
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertTrue(parsed_args.list_transcripts)
|
self.assertTrue(parsed_args.list_transcripts)
|
||||||
|
|
||||||
def test_argument_parsing__translate(self):
|
def test_argument_parsing__translate(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
"v1 v2 --languages de en --translate cz".split()
|
||||||
self.assertEqual(parsed_args.format, 'pretty')
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertEqual(parsed_args.translate, 'cz')
|
self.assertEqual(parsed_args.format, "pretty")
|
||||||
|
self.assertEqual(parsed_args.languages, ["de", "en"])
|
||||||
|
self.assertEqual(parsed_args.translate, "cz")
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --translate cz --languages de en'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
"v1 v2 --translate cz --languages de en".split()
|
||||||
self.assertEqual(parsed_args.format, 'pretty')
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertEqual(parsed_args.translate, 'cz')
|
self.assertEqual(parsed_args.format, "pretty")
|
||||||
|
self.assertEqual(parsed_args.languages, ["de", "en"])
|
||||||
|
self.assertEqual(parsed_args.translate, "cz")
|
||||||
|
|
||||||
def test_argument_parsing__manually_or_generated(self):
|
def test_argument_parsing__manually_or_generated(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --exclude-manually-created'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
"v1 v2 --exclude-manually-created".split()
|
||||||
|
)._parse_args()
|
||||||
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertTrue(parsed_args.exclude_manually_created)
|
self.assertTrue(parsed_args.exclude_manually_created)
|
||||||
self.assertFalse(parsed_args.exclude_generated)
|
self.assertFalse(parsed_args.exclude_generated)
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --exclude-generated'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
"v1 v2 --exclude-generated".split()
|
||||||
|
)._parse_args()
|
||||||
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertFalse(parsed_args.exclude_manually_created)
|
self.assertFalse(parsed_args.exclude_manually_created)
|
||||||
self.assertTrue(parsed_args.exclude_generated)
|
self.assertTrue(parsed_args.exclude_generated)
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --exclude-manually-created --exclude-generated'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
"v1 v2 --exclude-manually-created --exclude-generated".split()
|
||||||
|
)._parse_args()
|
||||||
|
self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
|
||||||
self.assertTrue(parsed_args.exclude_manually_created)
|
self.assertTrue(parsed_args.exclude_manually_created)
|
||||||
self.assertTrue(parsed_args.exclude_generated)
|
self.assertTrue(parsed_args.exclude_generated)
|
||||||
|
|
||||||
def test_run(self):
|
def test_run(self):
|
||||||
YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run()
|
YouTubeTranscriptCli("v1 v2 --languages de en".split()).run()
|
||||||
|
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
|
"v1", proxies=None, cookies=None
|
||||||
|
)
|
||||||
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
|
"v2", proxies=None, cookies=None
|
||||||
|
)
|
||||||
|
|
||||||
self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en'])
|
self.transcript_list_mock.find_transcript.assert_any_call(["de", "en"])
|
||||||
|
|
||||||
def test_run__failing_transcripts(self):
|
def test_run__failing_transcripts(self):
|
||||||
YouTubeTranscriptApi.list_transcripts = MagicMock(side_effect=VideoUnavailable('video_id'))
|
YouTubeTranscriptApi.list_transcripts = MagicMock(
|
||||||
|
side_effect=VideoUnavailable("video_id")
|
||||||
|
)
|
||||||
|
|
||||||
output = YouTubeTranscriptCli('v1 --languages de en'.split()).run()
|
output = YouTubeTranscriptCli("v1 --languages de en".split()).run()
|
||||||
|
|
||||||
self.assertEqual(output, str(VideoUnavailable('video_id')))
|
self.assertEqual(output, str(VideoUnavailable("video_id")))
|
||||||
|
|
||||||
def test_run__exclude_generated(self):
|
def test_run__exclude_generated(self):
|
||||||
YouTubeTranscriptCli('v1 v2 --languages de en --exclude-generated'.split()).run()
|
YouTubeTranscriptCli(
|
||||||
|
"v1 v2 --languages de en --exclude-generated".split()
|
||||||
|
).run()
|
||||||
|
|
||||||
self.transcript_list_mock.find_manually_created_transcript.assert_any_call(['de', 'en'])
|
self.transcript_list_mock.find_manually_created_transcript.assert_any_call(
|
||||||
|
["de", "en"]
|
||||||
|
)
|
||||||
|
|
||||||
def test_run__exclude_manually_created(self):
|
def test_run__exclude_manually_created(self):
|
||||||
YouTubeTranscriptCli('v1 v2 --languages de en --exclude-manually-created'.split()).run()
|
YouTubeTranscriptCli(
|
||||||
|
"v1 v2 --languages de en --exclude-manually-created".split()
|
||||||
|
).run()
|
||||||
|
|
||||||
self.transcript_list_mock.find_generated_transcript.assert_any_call(['de', 'en'])
|
self.transcript_list_mock.find_generated_transcript.assert_any_call(
|
||||||
|
["de", "en"]
|
||||||
|
)
|
||||||
|
|
||||||
def test_run__exclude_manually_created_and_generated(self):
|
def test_run__exclude_manually_created_and_generated(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
YouTubeTranscriptCli(
|
YouTubeTranscriptCli(
|
||||||
'v1 v2 --languages de en --exclude-manually-created --exclude-generated'.split()
|
"v1 v2 --languages de en --exclude-manually-created --exclude-generated".split()
|
||||||
).run(),
|
).run(),
|
||||||
''
|
"",
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_run__translate(self):
|
def test_run__translate(self):
|
||||||
YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split()).run(),
|
YouTubeTranscriptCli("v1 v2 --languages de en --translate cz".split()).run(),
|
||||||
|
|
||||||
self.transcript_mock.translate.assert_any_call('cz')
|
self.transcript_mock.translate.assert_any_call("cz")
|
||||||
|
|
||||||
def test_run__list_transcripts(self):
|
def test_run__list_transcripts(self):
|
||||||
YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run()
|
YouTubeTranscriptCli("--list-transcripts v1 v2".split()).run()
|
||||||
|
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
|
"v1", proxies=None, cookies=None
|
||||||
|
)
|
||||||
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
|
"v2", proxies=None, cookies=None
|
||||||
|
)
|
||||||
|
|
||||||
def test_run__json_output(self):
|
def test_run__json_output(self):
|
||||||
output = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split()).run()
|
output = YouTubeTranscriptCli(
|
||||||
|
"v1 v2 --languages de en --format json".split()
|
||||||
|
).run()
|
||||||
|
|
||||||
# will fail if output is not valid json
|
# will fail if output is not valid json
|
||||||
json.loads(output)
|
json.loads(output)
|
||||||
|
@ -222,31 +280,37 @@ class TestYouTubeTranscriptCli(TestCase):
|
||||||
def test_run__proxies(self):
|
def test_run__proxies(self):
|
||||||
YouTubeTranscriptCli(
|
YouTubeTranscriptCli(
|
||||||
(
|
(
|
||||||
'v1 v2 --languages de en '
|
"v1 v2 --languages de en "
|
||||||
'--http-proxy http://user:pass@domain:port '
|
"--http-proxy http://user:pass@domain:port "
|
||||||
'--https-proxy https://user:pass@domain:port'
|
"--https-proxy https://user:pass@domain:port"
|
||||||
).split()
|
).split()
|
||||||
).run()
|
).run()
|
||||||
|
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
'v1',
|
"v1",
|
||||||
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
|
proxies={
|
||||||
cookies= None
|
"http": "http://user:pass@domain:port",
|
||||||
|
"https": "https://user:pass@domain:port",
|
||||||
|
},
|
||||||
|
cookies=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
'v2',
|
"v2",
|
||||||
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
|
proxies={
|
||||||
cookies=None
|
"http": "http://user:pass@domain:port",
|
||||||
|
"https": "https://user:pass@domain:port",
|
||||||
|
},
|
||||||
|
cookies=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_run__cookies(self):
|
def test_run__cookies(self):
|
||||||
YouTubeTranscriptCli(
|
YouTubeTranscriptCli(
|
||||||
(
|
("v1 v2 --languages de en " "--cookies blahblah.txt").split()
|
||||||
'v1 v2 --languages de en '
|
|
||||||
'--cookies blahblah.txt'
|
|
||||||
).split()
|
|
||||||
).run()
|
).run()
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies='blahblah.txt')
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies='blahblah.txt')
|
"v1", proxies=None, cookies="blahblah.txt"
|
||||||
|
)
|
||||||
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
|
"v2", proxies=None, cookies="blahblah.txt"
|
||||||
|
)
|
||||||
|
|
|
@ -10,16 +10,17 @@ from youtube_transcript_api.formatters import (
|
||||||
TextFormatter,
|
TextFormatter,
|
||||||
SRTFormatter,
|
SRTFormatter,
|
||||||
WebVTTFormatter,
|
WebVTTFormatter,
|
||||||
PrettyPrintFormatter, FormatterLoader
|
PrettyPrintFormatter,
|
||||||
|
FormatterLoader,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestFormatters(TestCase):
|
class TestFormatters(TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.transcript = [
|
self.transcript = [
|
||||||
{'text': 'Test line 1', 'start': 0.0, 'duration': 1.50},
|
{"text": "Test line 1", "start": 0.0, "duration": 1.50},
|
||||||
{'text': 'line between', 'start': 1.5, 'duration': 2.0},
|
{"text": "line between", "start": 1.5, "duration": 2.0},
|
||||||
{'text': 'testing the end line', 'start': 2.5, 'duration': 3.25}
|
{"text": "testing the end line", "start": 2.5, "duration": 3.25},
|
||||||
]
|
]
|
||||||
self.transcripts = [self.transcript, self.transcript]
|
self.transcripts = [self.transcript, self.transcript]
|
||||||
|
|
||||||
|
@ -31,7 +32,7 @@ class TestFormatters(TestCase):
|
||||||
|
|
||||||
def test_srt_formatter_starting(self):
|
def test_srt_formatter_starting(self):
|
||||||
content = SRTFormatter().format_transcript(self.transcript)
|
content = SRTFormatter().format_transcript(self.transcript)
|
||||||
lines = content.split('\n')
|
lines = content.split("\n")
|
||||||
|
|
||||||
# test starting lines
|
# test starting lines
|
||||||
self.assertEqual(lines[0], "1")
|
self.assertEqual(lines[0], "1")
|
||||||
|
@ -39,19 +40,19 @@ class TestFormatters(TestCase):
|
||||||
|
|
||||||
def test_srt_formatter_middle(self):
|
def test_srt_formatter_middle(self):
|
||||||
content = SRTFormatter().format_transcript(self.transcript)
|
content = SRTFormatter().format_transcript(self.transcript)
|
||||||
lines = content.split('\n')
|
lines = content.split("\n")
|
||||||
|
|
||||||
# test middle lines
|
# test middle lines
|
||||||
self.assertEqual(lines[4], "2")
|
self.assertEqual(lines[4], "2")
|
||||||
self.assertEqual(lines[5], "00:00:01,500 --> 00:00:02,500")
|
self.assertEqual(lines[5], "00:00:01,500 --> 00:00:02,500")
|
||||||
self.assertEqual(lines[6], self.transcript[1]['text'])
|
self.assertEqual(lines[6], self.transcript[1]["text"])
|
||||||
|
|
||||||
def test_srt_formatter_ending(self):
|
def test_srt_formatter_ending(self):
|
||||||
content = SRTFormatter().format_transcript(self.transcript)
|
content = SRTFormatter().format_transcript(self.transcript)
|
||||||
lines = content.split('\n')
|
lines = content.split("\n")
|
||||||
|
|
||||||
# test ending lines
|
# test ending lines
|
||||||
self.assertEqual(lines[-2], self.transcript[-1]['text'])
|
self.assertEqual(lines[-2], self.transcript[-1]["text"])
|
||||||
self.assertEqual(lines[-1], "")
|
self.assertEqual(lines[-1], "")
|
||||||
|
|
||||||
def test_srt_formatter_many(self):
|
def test_srt_formatter_many(self):
|
||||||
|
@ -59,11 +60,14 @@ class TestFormatters(TestCase):
|
||||||
content = formatter.format_transcripts(self.transcripts)
|
content = formatter.format_transcripts(self.transcripts)
|
||||||
formatted_single_transcript = formatter.format_transcript(self.transcript)
|
formatted_single_transcript = formatter.format_transcript(self.transcript)
|
||||||
|
|
||||||
self.assertEqual(content, formatted_single_transcript + '\n\n\n' + formatted_single_transcript)
|
self.assertEqual(
|
||||||
|
content,
|
||||||
|
formatted_single_transcript + "\n\n\n" + formatted_single_transcript,
|
||||||
|
)
|
||||||
|
|
||||||
def test_webvtt_formatter_starting(self):
|
def test_webvtt_formatter_starting(self):
|
||||||
content = WebVTTFormatter().format_transcript(self.transcript)
|
content = WebVTTFormatter().format_transcript(self.transcript)
|
||||||
lines = content.split('\n')
|
lines = content.split("\n")
|
||||||
|
|
||||||
# test starting lines
|
# test starting lines
|
||||||
self.assertEqual(lines[0], "WEBVTT")
|
self.assertEqual(lines[0], "WEBVTT")
|
||||||
|
@ -71,10 +75,10 @@ class TestFormatters(TestCase):
|
||||||
|
|
||||||
def test_webvtt_formatter_ending(self):
|
def test_webvtt_formatter_ending(self):
|
||||||
content = WebVTTFormatter().format_transcript(self.transcript)
|
content = WebVTTFormatter().format_transcript(self.transcript)
|
||||||
lines = content.split('\n')
|
lines = content.split("\n")
|
||||||
|
|
||||||
# test ending lines
|
# test ending lines
|
||||||
self.assertEqual(lines[-2], self.transcript[-1]['text'])
|
self.assertEqual(lines[-2], self.transcript[-1]["text"])
|
||||||
self.assertEqual(lines[-1], "")
|
self.assertEqual(lines[-1], "")
|
||||||
|
|
||||||
def test_webvtt_formatter_many(self):
|
def test_webvtt_formatter_many(self):
|
||||||
|
@ -82,7 +86,10 @@ class TestFormatters(TestCase):
|
||||||
content = formatter.format_transcripts(self.transcripts)
|
content = formatter.format_transcripts(self.transcripts)
|
||||||
formatted_single_transcript = formatter.format_transcript(self.transcript)
|
formatted_single_transcript = formatter.format_transcript(self.transcript)
|
||||||
|
|
||||||
self.assertEqual(content, formatted_single_transcript + '\n\n\n' + formatted_single_transcript)
|
self.assertEqual(
|
||||||
|
content,
|
||||||
|
formatted_single_transcript + "\n\n\n" + formatted_single_transcript,
|
||||||
|
)
|
||||||
|
|
||||||
def test_pretty_print_formatter(self):
|
def test_pretty_print_formatter(self):
|
||||||
content = PrettyPrintFormatter().format_transcript(self.transcript)
|
content = PrettyPrintFormatter().format_transcript(self.transcript)
|
||||||
|
@ -106,7 +113,7 @@ class TestFormatters(TestCase):
|
||||||
|
|
||||||
def test_text_formatter(self):
|
def test_text_formatter(self):
|
||||||
content = TextFormatter().format_transcript(self.transcript)
|
content = TextFormatter().format_transcript(self.transcript)
|
||||||
lines = content.split('\n')
|
lines = content.split("\n")
|
||||||
|
|
||||||
self.assertEqual(lines[0], self.transcript[0]["text"])
|
self.assertEqual(lines[0], self.transcript[0]["text"])
|
||||||
self.assertEqual(lines[-1], self.transcript[-1]["text"])
|
self.assertEqual(lines[-1], self.transcript[-1]["text"])
|
||||||
|
@ -116,11 +123,14 @@ class TestFormatters(TestCase):
|
||||||
content = formatter.format_transcripts(self.transcripts)
|
content = formatter.format_transcripts(self.transcripts)
|
||||||
formatted_single_transcript = formatter.format_transcript(self.transcript)
|
formatted_single_transcript = formatter.format_transcript(self.transcript)
|
||||||
|
|
||||||
self.assertEqual(content, formatted_single_transcript + '\n\n\n' + formatted_single_transcript)
|
self.assertEqual(
|
||||||
|
content,
|
||||||
|
formatted_single_transcript + "\n\n\n" + formatted_single_transcript,
|
||||||
|
)
|
||||||
|
|
||||||
def test_formatter_loader(self):
|
def test_formatter_loader(self):
|
||||||
loader = FormatterLoader()
|
loader = FormatterLoader()
|
||||||
formatter = loader.load('json')
|
formatter = loader.load("json")
|
||||||
|
|
||||||
self.assertTrue(isinstance(formatter, JSONFormatter))
|
self.assertTrue(isinstance(formatter, JSONFormatter))
|
||||||
|
|
||||||
|
@ -132,4 +142,4 @@ class TestFormatters(TestCase):
|
||||||
|
|
||||||
def test_formatter_loader__unknown_format(self):
|
def test_formatter_loader__unknown_format(self):
|
||||||
with self.assertRaises(FormatterLoader.UnknownFormatterType):
|
with self.assertRaises(FormatterLoader.UnknownFormatterType):
|
||||||
FormatterLoader().load('png')
|
FormatterLoader().load("png")
|
||||||
|
|
Loading…
Reference in New Issue