added error which is thrown if url is used as the video id
This commit is contained in:
		
							parent
							
								
									e0a9f0d3e5
								
							
						
					
					
						commit
						a04a7010ed
					
				|  | @ -13,4 +13,5 @@ from ._errors import ( | ||||||
|     CookiesInvalid, |     CookiesInvalid, | ||||||
|     FailedToCreateConsentCookie, |     FailedToCreateConsentCookie, | ||||||
|     YouTubeRequestFailed, |     YouTubeRequestFailed, | ||||||
|  |     InvalidVideoId, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | @ -53,6 +53,14 @@ class VideoUnavailable(CouldNotRetrieveTranscript): | ||||||
|     CAUSE_MESSAGE = 'The video is no longer available' |     CAUSE_MESSAGE = 'The video is no longer available' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class InvalidVideoId(CouldNotRetrieveTranscript): | ||||||
|  |     CAUSE_MESSAGE = ( | ||||||
|  |         'You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n' | ||||||
|  |         'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n' | ||||||
|  |         'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`' | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class TooManyRequests(CouldNotRetrieveTranscript): | class TooManyRequests(CouldNotRetrieveTranscript): | ||||||
|     CAUSE_MESSAGE = ( |     CAUSE_MESSAGE = ( | ||||||
|         'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. ' |         'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. ' | ||||||
|  |  | ||||||
|  | @ -24,6 +24,7 @@ from ._errors import ( | ||||||
|     TranslationLanguageNotAvailable, |     TranslationLanguageNotAvailable, | ||||||
|     NoTranscriptAvailable, |     NoTranscriptAvailable, | ||||||
|     FailedToCreateConsentCookie, |     FailedToCreateConsentCookie, | ||||||
|  |     InvalidVideoId, | ||||||
| ) | ) | ||||||
| from ._settings import WATCH_URL | from ._settings import WATCH_URL | ||||||
| 
 | 
 | ||||||
|  | @ -41,7 +42,6 @@ class TranscriptListFetcher(object): | ||||||
|         self._http_client = http_client |         self._http_client = http_client | ||||||
| 
 | 
 | ||||||
|     def fetch(self, video_id): |     def fetch(self, video_id): | ||||||
| 
 |  | ||||||
|         return TranscriptList.build( |         return TranscriptList.build( | ||||||
|             self._http_client, |             self._http_client, | ||||||
|             video_id, |             video_id, | ||||||
|  | @ -52,6 +52,8 @@ class TranscriptListFetcher(object): | ||||||
|         splitted_html = html.split('"captions":') |         splitted_html = html.split('"captions":') | ||||||
| 
 | 
 | ||||||
|         if len(splitted_html) <= 1: |         if len(splitted_html) <= 1: | ||||||
|  |             if video_id.startswith('http://') or video_id.startswith('https://'): | ||||||
|  |                 raise InvalidVideoId(video_id) | ||||||
|             if 'class="g-recaptcha"' in html: |             if 'class="g-recaptcha"' in html: | ||||||
|                 raise TooManyRequests(video_id) |                 raise TooManyRequests(video_id) | ||||||
|             if '"playabilityStatus":' not in html: |             if '"playabilityStatus":' not in html: | ||||||
|  | @ -182,7 +184,7 @@ class TranscriptList(object): | ||||||
| 
 | 
 | ||||||
|     def find_generated_transcript(self, language_codes): |     def find_generated_transcript(self, language_codes): | ||||||
|         """ |         """ | ||||||
|         Finds a automatically generated transcript for a given language code. |         Finds an automatically generated transcript for a given language code. | ||||||
| 
 | 
 | ||||||
|         :param language_codes: A list of language codes in a descending priority. For example, if this is set to |         :param language_codes: A list of language codes in a descending priority. For example, if this is set to | ||||||
|         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if |         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ from youtube_transcript_api import ( | ||||||
|     CookiesInvalid, |     CookiesInvalid, | ||||||
|     FailedToCreateConsentCookie, |     FailedToCreateConsentCookie, | ||||||
|     YouTubeRequestFailed, |     YouTubeRequestFailed, | ||||||
|  |     InvalidVideoId, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -97,6 +98,16 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
| 
 | 
 | ||||||
|         self.assertTrue(transcript.is_generated) |         self.assertTrue(transcript.is_generated) | ||||||
| 
 | 
 | ||||||
|  |     def test_list_transcripts__url_as_video_id(self): | ||||||
|  |         httpretty.register_uri( | ||||||
|  |             httpretty.GET, | ||||||
|  |             'https://www.youtube.com/watch', | ||||||
|  |             body=load_asset('youtube_transcripts_disabled.html.static') | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         with self.assertRaises(InvalidVideoId): | ||||||
|  |             YouTubeTranscriptApi.list_transcripts('https://www.youtube.com/watch?v=GJLlxj_dtq8') | ||||||
|  | 
 | ||||||
|     def test_translate_transcript(self): |     def test_translate_transcript(self): | ||||||
|         transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en']) |         transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en']) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue