added error which is thrown if url is used as the video id

This commit is contained in:
Jonas Depoix 2023-04-17 15:34:46 +02:00
parent e0a9f0d3e5
commit a04a7010ed
4 changed files with 24 additions and 2 deletions

View File

@ -13,4 +13,5 @@ from ._errors import (
CookiesInvalid, CookiesInvalid,
FailedToCreateConsentCookie, FailedToCreateConsentCookie,
YouTubeRequestFailed, YouTubeRequestFailed,
InvalidVideoId,
) )

View File

@ -53,6 +53,14 @@ class VideoUnavailable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'The video is no longer available' CAUSE_MESSAGE = 'The video is no longer available'
class InvalidVideoId(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
'You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n'
'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n'
'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`'
)
class TooManyRequests(CouldNotRetrieveTranscript): class TooManyRequests(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = ( CAUSE_MESSAGE = (
'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. ' 'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. '

View File

@ -24,6 +24,7 @@ from ._errors import (
TranslationLanguageNotAvailable, TranslationLanguageNotAvailable,
NoTranscriptAvailable, NoTranscriptAvailable,
FailedToCreateConsentCookie, FailedToCreateConsentCookie,
InvalidVideoId,
) )
from ._settings import WATCH_URL from ._settings import WATCH_URL
@ -41,7 +42,6 @@ class TranscriptListFetcher(object):
self._http_client = http_client self._http_client = http_client
def fetch(self, video_id): def fetch(self, video_id):
return TranscriptList.build( return TranscriptList.build(
self._http_client, self._http_client,
video_id, video_id,
@ -52,6 +52,8 @@ class TranscriptListFetcher(object):
splitted_html = html.split('"captions":') splitted_html = html.split('"captions":')
if len(splitted_html) <= 1: if len(splitted_html) <= 1:
if video_id.startswith('http://') or video_id.startswith('https://'):
raise InvalidVideoId(video_id)
if 'class="g-recaptcha"' in html: if 'class="g-recaptcha"' in html:
raise TooManyRequests(video_id) raise TooManyRequests(video_id)
if '"playabilityStatus":' not in html: if '"playabilityStatus":' not in html:
@ -182,7 +184,7 @@ class TranscriptList(object):
def find_generated_transcript(self, language_codes): def find_generated_transcript(self, language_codes):
""" """
Finds a automatically generated transcript for a given language code. Finds an automatically generated transcript for a given language code.
:param language_codes: A list of language codes in a descending priority. For example, if this is set to :param language_codes: A list of language codes in a descending priority. For example, if this is set to
['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if

View File

@ -20,6 +20,7 @@ from youtube_transcript_api import (
CookiesInvalid, CookiesInvalid,
FailedToCreateConsentCookie, FailedToCreateConsentCookie,
YouTubeRequestFailed, YouTubeRequestFailed,
InvalidVideoId,
) )
@ -97,6 +98,16 @@ class TestYouTubeTranscriptApi(TestCase):
self.assertTrue(transcript.is_generated) self.assertTrue(transcript.is_generated)
def test_list_transcripts__url_as_video_id(self):
httpretty.register_uri(
httpretty.GET,
'https://www.youtube.com/watch',
body=load_asset('youtube_transcripts_disabled.html.static')
)
with self.assertRaises(InvalidVideoId):
YouTubeTranscriptApi.list_transcripts('https://www.youtube.com/watch?v=GJLlxj_dtq8')
def test_translate_transcript(self): def test_translate_transcript(self):
transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en']) transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])