Merge pull request #108 from jdepoix/bugfix/ISSUE-107
added ability to create consent cookie
This commit is contained in:
commit
46be97ae35
|
@ -10,5 +10,6 @@ from ._errors import (
|
|||
TranslationLanguageNotAvailable,
|
||||
NoTranscriptAvailable,
|
||||
CookiePathInvalid,
|
||||
CookiesInvalid
|
||||
CookiesInvalid,
|
||||
FailedToCreateConsentCookie,
|
||||
)
|
||||
|
|
|
@ -129,12 +129,11 @@ class YouTubeTranscriptApi(object):
|
|||
|
||||
@classmethod
|
||||
def _load_cookies(cls, cookies, video_id):
|
||||
cookie_jar = {}
|
||||
try:
|
||||
cookie_jar = cookiejar.MozillaCookieJar()
|
||||
cookie_jar.load(cookies)
|
||||
except CookieLoadError:
|
||||
raise CookiePathInvalid(video_id)
|
||||
if not cookie_jar:
|
||||
raise CookiesInvalid(video_id)
|
||||
return cookie_jar
|
||||
except CookieLoadError:
|
||||
raise CookiePathInvalid(video_id)
|
||||
|
|
|
@ -40,10 +40,15 @@ class VideoUnavailable(CouldNotRetrieveTranscript):
|
|||
|
||||
|
||||
class TooManyRequests(CouldNotRetrieveTranscript):
|
||||
CAUSE_MESSAGE = ("YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. One of the following things can be done to work around this:\n\
|
||||
- Manually solve the captcha in a browser and export the cookie. Read here how to use that cookie with youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\
|
||||
CAUSE_MESSAGE = (
|
||||
'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. '
|
||||
'One of the following things can be done to work around this:\n\
|
||||
- Manually solve the captcha in a browser and export the cookie. '
|
||||
'Read here how to use that cookie with '
|
||||
'youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\
|
||||
- Use a different IP address\n\
|
||||
- Wait until the ban on your IP has been lifted")
|
||||
- Wait until the ban on your IP has been lifted'
|
||||
)
|
||||
|
||||
|
||||
class TranscriptsDisabled(CouldNotRetrieveTranscript):
|
||||
|
@ -70,6 +75,10 @@ class CookiesInvalid(CouldNotRetrieveTranscript):
|
|||
CAUSE_MESSAGE = 'The cookies provided are not valid (may have expired)'
|
||||
|
||||
|
||||
class FailedToCreateConsentCookie(CouldNotRetrieveTranscript):
|
||||
CAUSE_MESSAGE = 'Failed to automatically give consent to saving cookies'
|
||||
|
||||
|
||||
class NoTranscriptFound(CouldNotRetrieveTranscript):
|
||||
CAUSE_MESSAGE = (
|
||||
'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n'
|
||||
|
|
|
@ -20,6 +20,7 @@ from ._errors import (
|
|||
NotTranslatable,
|
||||
TranslationLanguageNotAvailable,
|
||||
NoTranscriptAvailable,
|
||||
FailedToCreateConsentCookie,
|
||||
)
|
||||
from ._settings import WATCH_URL
|
||||
|
||||
|
@ -32,7 +33,7 @@ class TranscriptListFetcher(object):
|
|||
return TranscriptList.build(
|
||||
self._http_client,
|
||||
video_id,
|
||||
self._extract_captions_json(self._fetch_html(video_id), video_id)
|
||||
self._extract_captions_json(self._fetch_video_html(video_id), video_id)
|
||||
)
|
||||
|
||||
def _extract_captions_json(self, html, video_id):
|
||||
|
@ -55,6 +56,21 @@ class TranscriptListFetcher(object):
|
|||
|
||||
return captions_json
|
||||
|
||||
def _create_consent_cookie(self, html, video_id):
|
||||
match = re.search('name="v" value="(.*?)"', html)
|
||||
if match is None:
|
||||
raise FailedToCreateConsentCookie(video_id)
|
||||
self._http_client.cookies.set('CONSENT', 'YES+' + match.group(1), domain='.youtube.com')
|
||||
|
||||
def _fetch_video_html(self, video_id):
|
||||
html = self._fetch_html(video_id)
|
||||
if 'action="https://consent.youtube.com/s"' in html:
|
||||
self._create_consent_cookie(html, video_id)
|
||||
html = self._fetch_html(video_id)
|
||||
if 'action="https://consent.youtube.com/s"' in html:
|
||||
raise FailedToCreateConsentCookie(video_id)
|
||||
return html
|
||||
|
||||
def _fetch_html(self, video_id):
|
||||
return self._http_client.get(WATCH_URL.format(video_id=video_id)).text.replace(
|
||||
'\\u0026', '&'
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -17,7 +17,8 @@ from youtube_transcript_api import (
|
|||
NotTranslatable,
|
||||
TranslationLanguageNotAvailable,
|
||||
CookiePathInvalid,
|
||||
CookiesInvalid
|
||||
CookiesInvalid,
|
||||
FailedToCreateConsentCookie,
|
||||
)
|
||||
|
||||
|
||||
|
@ -44,6 +45,7 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
)
|
||||
|
||||
def tearDown(self):
|
||||
httpretty.reset()
|
||||
httpretty.disable()
|
||||
|
||||
def test_get_transcript(self):
|
||||
|
@ -125,6 +127,43 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
self.assertEqual(len(query_string['lang']), 1)
|
||||
self.assertEqual(query_string['lang'][0], 'en')
|
||||
|
||||
def test_get_transcript__create_consent_cookie_if_needed(self):
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
'https://www.youtube.com/watch',
|
||||
body=load_asset('youtube_consent_page.html.static')
|
||||
)
|
||||
|
||||
YouTubeTranscriptApi.get_transcript('F1xioXWb8CY')
|
||||
self.assertEqual(len(httpretty.latest_requests()), 3)
|
||||
for request in httpretty.latest_requests()[1:]:
|
||||
self.assertEqual(request.headers['cookie'], 'CONSENT=YES+cb.20210328-17-p0.de+FX+119')
|
||||
|
||||
def test_get_transcript__exception_if_create_consent_cookie_failed(self):
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
'https://www.youtube.com/watch',
|
||||
body=load_asset('youtube_consent_page.html.static')
|
||||
)
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
'https://www.youtube.com/watch',
|
||||
body=load_asset('youtube_consent_page.html.static')
|
||||
)
|
||||
|
||||
with self.assertRaises(FailedToCreateConsentCookie):
|
||||
YouTubeTranscriptApi.get_transcript('F1xioXWb8CY')
|
||||
|
||||
def test_get_transcript__exception_if_consent_cookie_age_invalid(self):
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
'https://www.youtube.com/watch',
|
||||
body=load_asset('youtube_consent_page_invalid.html.static')
|
||||
)
|
||||
|
||||
with self.assertRaises(FailedToCreateConsentCookie):
|
||||
YouTubeTranscriptApi.get_transcript('F1xioXWb8CY')
|
||||
|
||||
def test_get_transcript__exception_if_video_unavailable(self):
|
||||
httpretty.register_uri(
|
||||
httpretty.GET,
|
||||
|
|
Loading…
Reference in New Issue