added ability to create consent cookie
This commit is contained in:
		
							parent
							
								
									c90cf16484
								
							
						
					
					
						commit
						9251be8462
					
				|  | @ -10,5 +10,6 @@ from ._errors import ( | |||
|     TranslationLanguageNotAvailable, | ||||
|     NoTranscriptAvailable, | ||||
|     CookiePathInvalid, | ||||
|     CookiesInvalid | ||||
|     CookiesInvalid, | ||||
|     FailedToCreateConsentCookie, | ||||
| ) | ||||
|  |  | |||
|  | @ -129,12 +129,11 @@ class YouTubeTranscriptApi(object): | |||
|      | ||||
|     @classmethod | ||||
|     def _load_cookies(cls, cookies, video_id): | ||||
|         cookie_jar = {} | ||||
|         try: | ||||
|             cookie_jar = cookiejar.MozillaCookieJar() | ||||
|             cookie_jar.load(cookies) | ||||
|             if not cookie_jar: | ||||
|                 raise CookiesInvalid(video_id) | ||||
|             return cookie_jar | ||||
|         except CookieLoadError: | ||||
|             raise CookiePathInvalid(video_id) | ||||
|         if not cookie_jar: | ||||
|             raise CookiesInvalid(video_id) | ||||
|         return cookie_jar  | ||||
|  |  | |||
|  | @ -40,10 +40,15 @@ class VideoUnavailable(CouldNotRetrieveTranscript): | |||
| 
 | ||||
| 
 | ||||
| class TooManyRequests(CouldNotRetrieveTranscript): | ||||
|     CAUSE_MESSAGE = ("YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. One of the following things can be done to work around this:\n\ | ||||
|     - Manually solve the captcha in a browser and export the cookie. Read here how to use that cookie with youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\ | ||||
|     - Use a different IP address\n\ | ||||
|     - Wait until the ban on your IP has been lifted") | ||||
|     CAUSE_MESSAGE = ( | ||||
|         'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. ' | ||||
|         'One of the following things can be done to work around this:\n\ | ||||
|         - Manually solve the captcha in a browser and export the cookie. ' | ||||
|         'Read here how to use that cookie with ' | ||||
|         'youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\ | ||||
|         - Use a different IP address\n\ | ||||
|         - Wait until the ban on your IP has been lifted' | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| class TranscriptsDisabled(CouldNotRetrieveTranscript): | ||||
|  | @ -70,6 +75,10 @@ class CookiesInvalid(CouldNotRetrieveTranscript): | |||
|     CAUSE_MESSAGE = 'The cookies provided are not valid (may have expired)' | ||||
| 
 | ||||
| 
 | ||||
| class FailedToCreateConsentCookie(CouldNotRetrieveTranscript): | ||||
|     CAUSE_MESSAGE = 'Failed to automatically give consent to saving cookies' | ||||
| 
 | ||||
| 
 | ||||
| class NoTranscriptFound(CouldNotRetrieveTranscript): | ||||
|     CAUSE_MESSAGE = ( | ||||
|         'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n' | ||||
|  |  | |||
|  | @ -20,6 +20,7 @@ from ._errors import ( | |||
|     NotTranslatable, | ||||
|     TranslationLanguageNotAvailable, | ||||
|     NoTranscriptAvailable, | ||||
|     FailedToCreateConsentCookie, | ||||
| ) | ||||
| from ._settings import WATCH_URL | ||||
| 
 | ||||
|  | @ -32,7 +33,7 @@ class TranscriptListFetcher(object): | |||
|         return TranscriptList.build( | ||||
|             self._http_client, | ||||
|             video_id, | ||||
|             self._extract_captions_json(self._fetch_html(video_id), video_id) | ||||
|             self._extract_captions_json(self._fetch_video_html(video_id), video_id) | ||||
|         ) | ||||
| 
 | ||||
|     def _extract_captions_json(self, html, video_id): | ||||
|  | @ -55,6 +56,21 @@ class TranscriptListFetcher(object): | |||
| 
 | ||||
|         return captions_json | ||||
| 
 | ||||
|     def _create_consent_cookie(self, html, video_id): | ||||
|         match = re.search('name="v" value="(.*?)"', html) | ||||
|         if match is None: | ||||
|             raise FailedToCreateConsentCookie(video_id) | ||||
|         self._http_client.cookies.set('CONSENT', 'YES+' + match.group(1), domain='.youtube.com') | ||||
| 
 | ||||
|     def _fetch_video_html(self, video_id): | ||||
|         html = self._fetch_html(video_id) | ||||
|         if 'action="https://consent.youtube.com/s"' in html: | ||||
|             self._create_consent_cookie(html, video_id) | ||||
|             html = self._fetch_html(video_id) | ||||
|             if 'action="https://consent.youtube.com/s"' in html: | ||||
|                 raise FailedToCreateConsentCookie(video_id) | ||||
|         return html | ||||
| 
 | ||||
|     def _fetch_html(self, video_id): | ||||
|         return self._http_client.get(WATCH_URL.format(video_id=video_id)).text.replace( | ||||
|             '\\u0026', '&' | ||||
|  |  | |||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							|  | @ -17,7 +17,8 @@ from youtube_transcript_api import ( | |||
|     NotTranslatable, | ||||
|     TranslationLanguageNotAvailable, | ||||
|     CookiePathInvalid, | ||||
|     CookiesInvalid | ||||
|     CookiesInvalid, | ||||
|     FailedToCreateConsentCookie, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
|  | @ -44,6 +45,7 @@ class TestYouTubeTranscriptApi(TestCase): | |||
|         ) | ||||
| 
 | ||||
|     def tearDown(self): | ||||
|         httpretty.reset() | ||||
|         httpretty.disable() | ||||
| 
 | ||||
|     def test_get_transcript(self): | ||||
|  | @ -125,6 +127,43 @@ class TestYouTubeTranscriptApi(TestCase): | |||
|         self.assertEqual(len(query_string['lang']), 1) | ||||
|         self.assertEqual(query_string['lang'][0], 'en') | ||||
| 
 | ||||
|     def test_get_transcript__create_consent_cookie_if_needed(self): | ||||
|         httpretty.register_uri( | ||||
|             httpretty.GET, | ||||
|             'https://www.youtube.com/watch', | ||||
|             body=load_asset('youtube_consent_page.html.static') | ||||
|         ) | ||||
| 
 | ||||
|         YouTubeTranscriptApi.get_transcript('F1xioXWb8CY') | ||||
|         self.assertEqual(len(httpretty.latest_requests()), 3) | ||||
|         for request in httpretty.latest_requests()[1:]: | ||||
|             self.assertEqual(request.headers['cookie'], 'CONSENT=YES+cb.20210328-17-p0.de+FX+119') | ||||
| 
 | ||||
|     def test_get_transcript__exception_if_create_consent_cookie_failed(self): | ||||
|         httpretty.register_uri( | ||||
|             httpretty.GET, | ||||
|             'https://www.youtube.com/watch', | ||||
|             body=load_asset('youtube_consent_page.html.static') | ||||
|         ) | ||||
|         httpretty.register_uri( | ||||
|             httpretty.GET, | ||||
|             'https://www.youtube.com/watch', | ||||
|             body=load_asset('youtube_consent_page.html.static') | ||||
|         ) | ||||
| 
 | ||||
|         with self.assertRaises(FailedToCreateConsentCookie): | ||||
|             YouTubeTranscriptApi.get_transcript('F1xioXWb8CY') | ||||
| 
 | ||||
|     def test_get_transcript__exception_if_consent_cookie_age_invalid(self): | ||||
|         httpretty.register_uri( | ||||
|             httpretty.GET, | ||||
|             'https://www.youtube.com/watch', | ||||
|             body=load_asset('youtube_consent_page_invalid.html.static') | ||||
|         ) | ||||
| 
 | ||||
|         with self.assertRaises(FailedToCreateConsentCookie): | ||||
|             YouTubeTranscriptApi.get_transcript('F1xioXWb8CY') | ||||
| 
 | ||||
|     def test_get_transcript__exception_if_video_unavailable(self): | ||||
|         httpretty.register_uri( | ||||
|             httpretty.GET, | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue