added ability to create consent cookie
This commit is contained in:
		
							parent
							
								
									c90cf16484
								
							
						
					
					
						commit
						9251be8462
					
				|  | @ -10,5 +10,6 @@ from ._errors import ( | ||||||
|     TranslationLanguageNotAvailable, |     TranslationLanguageNotAvailable, | ||||||
|     NoTranscriptAvailable, |     NoTranscriptAvailable, | ||||||
|     CookiePathInvalid, |     CookiePathInvalid, | ||||||
|     CookiesInvalid |     CookiesInvalid, | ||||||
|  |     FailedToCreateConsentCookie, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | @ -129,12 +129,11 @@ class YouTubeTranscriptApi(object): | ||||||
|      |      | ||||||
|     @classmethod |     @classmethod | ||||||
|     def _load_cookies(cls, cookies, video_id): |     def _load_cookies(cls, cookies, video_id): | ||||||
|         cookie_jar = {} |  | ||||||
|         try: |         try: | ||||||
|             cookie_jar = cookiejar.MozillaCookieJar() |             cookie_jar = cookiejar.MozillaCookieJar() | ||||||
|             cookie_jar.load(cookies) |             cookie_jar.load(cookies) | ||||||
|  |             if not cookie_jar: | ||||||
|  |                 raise CookiesInvalid(video_id) | ||||||
|  |             return cookie_jar | ||||||
|         except CookieLoadError: |         except CookieLoadError: | ||||||
|             raise CookiePathInvalid(video_id) |             raise CookiePathInvalid(video_id) | ||||||
|         if not cookie_jar: |  | ||||||
|             raise CookiesInvalid(video_id) |  | ||||||
|         return cookie_jar  |  | ||||||
|  |  | ||||||
|  | @ -40,10 +40,15 @@ class VideoUnavailable(CouldNotRetrieveTranscript): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TooManyRequests(CouldNotRetrieveTranscript): | class TooManyRequests(CouldNotRetrieveTranscript): | ||||||
|     CAUSE_MESSAGE = ("YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. One of the following things can be done to work around this:\n\ |     CAUSE_MESSAGE = ( | ||||||
|     - Manually solve the captcha in a browser and export the cookie. Read here how to use that cookie with youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\ |         'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. ' | ||||||
|     - Use a different IP address\n\ |         'One of the following things can be done to work around this:\n\ | ||||||
|     - Wait until the ban on your IP has been lifted") |         - Manually solve the captcha in a browser and export the cookie. ' | ||||||
|  |         'Read here how to use that cookie with ' | ||||||
|  |         'youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\ | ||||||
|  |         - Use a different IP address\n\ | ||||||
|  |         - Wait until the ban on your IP has been lifted' | ||||||
|  |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TranscriptsDisabled(CouldNotRetrieveTranscript): | class TranscriptsDisabled(CouldNotRetrieveTranscript): | ||||||
|  | @ -70,6 +75,10 @@ class CookiesInvalid(CouldNotRetrieveTranscript): | ||||||
|     CAUSE_MESSAGE = 'The cookies provided are not valid (may have expired)' |     CAUSE_MESSAGE = 'The cookies provided are not valid (may have expired)' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class FailedToCreateConsentCookie(CouldNotRetrieveTranscript): | ||||||
|  |     CAUSE_MESSAGE = 'Failed to automatically give consent to saving cookies' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class NoTranscriptFound(CouldNotRetrieveTranscript): | class NoTranscriptFound(CouldNotRetrieveTranscript): | ||||||
|     CAUSE_MESSAGE = ( |     CAUSE_MESSAGE = ( | ||||||
|         'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n' |         'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n' | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ from ._errors import ( | ||||||
|     NotTranslatable, |     NotTranslatable, | ||||||
|     TranslationLanguageNotAvailable, |     TranslationLanguageNotAvailable, | ||||||
|     NoTranscriptAvailable, |     NoTranscriptAvailable, | ||||||
|  |     FailedToCreateConsentCookie, | ||||||
| ) | ) | ||||||
| from ._settings import WATCH_URL | from ._settings import WATCH_URL | ||||||
| 
 | 
 | ||||||
|  | @ -32,7 +33,7 @@ class TranscriptListFetcher(object): | ||||||
|         return TranscriptList.build( |         return TranscriptList.build( | ||||||
|             self._http_client, |             self._http_client, | ||||||
|             video_id, |             video_id, | ||||||
|             self._extract_captions_json(self._fetch_html(video_id), video_id) |             self._extract_captions_json(self._fetch_video_html(video_id), video_id) | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def _extract_captions_json(self, html, video_id): |     def _extract_captions_json(self, html, video_id): | ||||||
|  | @ -55,6 +56,21 @@ class TranscriptListFetcher(object): | ||||||
| 
 | 
 | ||||||
|         return captions_json |         return captions_json | ||||||
| 
 | 
 | ||||||
|  |     def _create_consent_cookie(self, html, video_id): | ||||||
|  |         match = re.search('name="v" value="(.*?)"', html) | ||||||
|  |         if match is None: | ||||||
|  |             raise FailedToCreateConsentCookie(video_id) | ||||||
|  |         self._http_client.cookies.set('CONSENT', 'YES+' + match.group(1), domain='.youtube.com') | ||||||
|  | 
 | ||||||
|  |     def _fetch_video_html(self, video_id): | ||||||
|  |         html = self._fetch_html(video_id) | ||||||
|  |         if 'action="https://consent.youtube.com/s"' in html: | ||||||
|  |             self._create_consent_cookie(html, video_id) | ||||||
|  |             html = self._fetch_html(video_id) | ||||||
|  |             if 'action="https://consent.youtube.com/s"' in html: | ||||||
|  |                 raise FailedToCreateConsentCookie(video_id) | ||||||
|  |         return html | ||||||
|  | 
 | ||||||
|     def _fetch_html(self, video_id): |     def _fetch_html(self, video_id): | ||||||
|         return self._http_client.get(WATCH_URL.format(video_id=video_id)).text.replace( |         return self._http_client.get(WATCH_URL.format(video_id=video_id)).text.replace( | ||||||
|             '\\u0026', '&' |             '\\u0026', '&' | ||||||
|  |  | ||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							|  | @ -17,7 +17,8 @@ from youtube_transcript_api import ( | ||||||
|     NotTranslatable, |     NotTranslatable, | ||||||
|     TranslationLanguageNotAvailable, |     TranslationLanguageNotAvailable, | ||||||
|     CookiePathInvalid, |     CookiePathInvalid, | ||||||
|     CookiesInvalid |     CookiesInvalid, | ||||||
|  |     FailedToCreateConsentCookie, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -44,6 +45,7 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def tearDown(self): |     def tearDown(self): | ||||||
|  |         httpretty.reset() | ||||||
|         httpretty.disable() |         httpretty.disable() | ||||||
| 
 | 
 | ||||||
|     def test_get_transcript(self): |     def test_get_transcript(self): | ||||||
|  | @ -125,6 +127,43 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
|         self.assertEqual(len(query_string['lang']), 1) |         self.assertEqual(len(query_string['lang']), 1) | ||||||
|         self.assertEqual(query_string['lang'][0], 'en') |         self.assertEqual(query_string['lang'][0], 'en') | ||||||
| 
 | 
 | ||||||
|  |     def test_get_transcript__create_consent_cookie_if_needed(self): | ||||||
|  |         httpretty.register_uri( | ||||||
|  |             httpretty.GET, | ||||||
|  |             'https://www.youtube.com/watch', | ||||||
|  |             body=load_asset('youtube_consent_page.html.static') | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         YouTubeTranscriptApi.get_transcript('F1xioXWb8CY') | ||||||
|  |         self.assertEqual(len(httpretty.latest_requests()), 3) | ||||||
|  |         for request in httpretty.latest_requests()[1:]: | ||||||
|  |             self.assertEqual(request.headers['cookie'], 'CONSENT=YES+cb.20210328-17-p0.de+FX+119') | ||||||
|  | 
 | ||||||
|  |     def test_get_transcript__exception_if_create_consent_cookie_failed(self): | ||||||
|  |         httpretty.register_uri( | ||||||
|  |             httpretty.GET, | ||||||
|  |             'https://www.youtube.com/watch', | ||||||
|  |             body=load_asset('youtube_consent_page.html.static') | ||||||
|  |         ) | ||||||
|  |         httpretty.register_uri( | ||||||
|  |             httpretty.GET, | ||||||
|  |             'https://www.youtube.com/watch', | ||||||
|  |             body=load_asset('youtube_consent_page.html.static') | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         with self.assertRaises(FailedToCreateConsentCookie): | ||||||
|  |             YouTubeTranscriptApi.get_transcript('F1xioXWb8CY') | ||||||
|  | 
 | ||||||
|  |     def test_get_transcript__exception_if_consent_cookie_age_invalid(self): | ||||||
|  |         httpretty.register_uri( | ||||||
|  |             httpretty.GET, | ||||||
|  |             'https://www.youtube.com/watch', | ||||||
|  |             body=load_asset('youtube_consent_page_invalid.html.static') | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         with self.assertRaises(FailedToCreateConsentCookie): | ||||||
|  |             YouTubeTranscriptApi.get_transcript('F1xioXWb8CY') | ||||||
|  | 
 | ||||||
|     def test_get_transcript__exception_if_video_unavailable(self): |     def test_get_transcript__exception_if_video_unavailable(self): | ||||||
|         httpretty.register_uri( |         httpretty.register_uri( | ||||||
|             httpretty.GET, |             httpretty.GET, | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue