diff --git a/youtube_transcript_api/_api.py b/youtube_transcript_api/_api.py index 5664a1c..c418fd3 100644 --- a/youtube_transcript_api/_api.py +++ b/youtube_transcript_api/_api.py @@ -99,7 +99,7 @@ class YouTubeTranscriptApi(): class _TranscriptFetcher(): WATCH_URL = 'https://www.youtube.com/watch?v={video_id}' - API_BASE_URL = 'https://www.youtube.com/api/{api_url}' + API_BASE_URL = 'https://www.youtube.com/api/' LANGUAGE_REGEX = re.compile(r'(&lang=.*&)|(&lang=.*)') TIMEDTEXT_STRING = 'timedtext?v=' @@ -107,39 +107,28 @@ class _TranscriptFetcher(): self.video_id = video_id self.languages = languages self.proxies = proxies + self.matched_splits = [] def fetch(self): if self.proxies: fetched_site = requests.get(self.WATCH_URL.format(video_id=self.video_id), proxies=self.proxies).text else: fetched_site = requests.get(self.WATCH_URL.format(video_id=self.video_id)).text - timedtext_splits = fetched_site.split(self.TIMEDTEXT_STRING) - timedtext_url_start = ( - timedtext_splits[2].find(self.TIMEDTEXT_STRING) - + len(timedtext_splits[0]) - + len(timedtext_splits[1]) - + len(self.TIMEDTEXT_STRING) + 1 - ) - - for language in (self.languages if self.languages else [None,]): - response = self._execute_api_request(fetched_site, timedtext_url_start, language) + timedtext_splits = [split[:split.find('"')].replace('\\u0026', '&').replace('\\', '') for split in fetched_site.split(self.TIMEDTEXT_STRING)] + for language in (self.languages if self.languages else ['en']): + self.matched_splits = [split for split in timedtext_splits if f'&lang={language}' in split] + if self.matched_splits: + break + if self.matched_splits: + timedtext_url = min(self.matched_splits, key=len) + response = self._execute_api_request(timedtext_url, language) if response: return response return None - def _execute_api_request(self, fetched_site, timedtext_url_start, language): - url = self.API_BASE_URL.format( - api_url=fetched_site[ - timedtext_url_start:timedtext_url_start + fetched_site[timedtext_url_start:].find('"') - ].replace( - '\\u0026', '&' - ).replace( - '\\', '' - ) - ) - if language: - url = re.sub(self.LANGUAGE_REGEX, '&lang={language}&'.format(language=language), url) + def _execute_api_request(self, timedtext_url, language): + url = f'{self.API_BASE_URL}{self.TIMEDTEXT_STRING}{timedtext_url}' if self.proxies: return requests.get(url, proxies=self.proxies).text else: diff --git a/youtube_transcript_api/test/assets/youtubeWW1.html.static b/youtube_transcript_api/test/assets/youtubeWW1.html.static new file mode 100644 index 0000000..f5e5149 --- /dev/null +++ b/youtube_transcript_api/test/assets/youtubeWW1.html.static @@ -0,0 +1,1519 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + +The Tide Is Turning - Russian Civil War Fall 1919 I THE GREAT WAR 1919 - YouTube + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+
+
+ +
+
+
+ +
+
+
+

+ + + +Loading... + +

+ +
+
+
+ +
+
+
+ +
+
+ + +
+
+
+ +
+
+
+
+ + +
+
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+ +
+
+
+

+ + + + + The Tide Is Turning - Russian Civil War Fall 1919 I THE GREAT WAR 1919 + + +

+
+
+ + +
+ + + + + +
+
36,240 views
+
+
+
+
+
+ + + + + +
+
+ + + +
+
+
+
+

+ + + +Loading... + +

+ +
+
+
+ +
+ +
+
+
+

+ + + +Loading... + +

+ +
+
+
+

+Transcript +

+
+ +
+ + + +
+
+The interactive transcript could not be loaded. +
+ + +
+
+ +
+ +
+
+

+ + + +Loading... + +

+ +
+
+ + +
+
+ Rating is available when the video has been rented. +
+ +
+ +
+
+ This feature is not available right now. Please try again later. +
+
+ + +
+ + +
+ + +
Published on Oct 14, 2019

Support 16 Days in Berlin: https://realtimehistory.net/indiegogo

The White Russian advance on Moscow comes to a crashing end as the Red Army manages to turn the tide of the Russian Civil War in Fall 1919.

» SUPPORT THE CHANNEL
Patreon: https://www.patreon.com/thegreatwar
Merchandise: https://shop.spreadshirt.de/thegreatwar/
Become a member: https://www.youtube.com/channel/UCUcy...

» BUY OUR SOURCES IN OUR AMAZON STORES
Our Amazon US Store: https://www.amazon.com/shop/influence...
Our Amazon CA Store: https://www.amazon.ca/shop/influencer...
Our Amazon UK Store: https://www.amazon.co.uk/shop/influen...

» SOURCES
Figes, Orlando. A People’s Tragedy. The Russian Revolution (London: The Bodley Head, 2017 [1996]).
Mawdsley, Evan. The Russian Civil War (New York: Pegasus Books, 2005).
Smele, Jonathan. The ‘Russian’ Civil Wars 1916-1926 (London: Hurst, 2015).
Sumpf, Alexandre. “Russian Civil War,” in 1914-1918 online. International Encyclopedia of the First World War. https://encyclopedia.1914-1918-online.
Engelstein, Laura. Russia in Flames (Oxford University Press, 2017).

» SOCIAL MEDIA
Facebook: https://facebook.com/TheGreatWarYT
Instagram: https://instagram.com/the_great_war
Twitter: https://twitter.com/WW1_Series
Reddit: htpps://reddit.com/r/TheGreatWarChannel

»CREDITS
Presented by: Jesse Alexander
Written by: Jesse Alexander
Director: Toni Steller & Florian Wittig
Director of Photography: Toni Steller
Sound: Toni Steller
Editing: Toni Steller
Mixing, Mastering & Sound Design: http://above-zero.com
Maps: Daniel Kogosov (https://www.patreon.com/Zalezsky)
Research by: Jesse Alexander
Fact checking: Florian Wittig

Channel Design: Alexander Clark
Original Logo: David van Stephold


A Mediakraft Networks Original Channel

Contains licensed material by getty images
All rights reserved - Real Time History GmbH 2019

+ +
+
+ +
+ + +
+
+

+ + + +Loading... + +

+ +
+ +
+ + +
+
+
+ + + +
+
+ +
+ +
+
+
+Advertisement +
+
+
+
+ + +
+
+
+
+
+ + + +When autoplay is enabled, a suggested video will automatically play next. + + + +
+

+ Up next +

+ + +
+
+ + +
+
+
+ +
+
+ +
+
+ +
+
+
+ + +
+ +
+ +
+
+ + +
+
+ + +
+ to add this to Watch Later + +
+
+

+Add to +

+
+
+

+ + + + Loading playlists... + +

+ +
+
+ + + + + + + \ No newline at end of file diff --git a/youtube_transcript_api/test/test_api.py b/youtube_transcript_api/test/test_api.py index e298900..6fd48b5 100644 --- a/youtube_transcript_api/test/test_api.py +++ b/youtube_transcript_api/test/test_api.py @@ -53,11 +53,11 @@ class TestYouTubeTranscriptApi(TestCase): def test_get_transcript__fallback_language_is_used(self): httpretty.register_uri( httpretty.GET, - 'https://www.youtube.com/api/timedtext', - body='' + 'https://www.youtube.com/watch', + body=load_asset('youtubeWW1.html.static') ) - YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', ['de', 'en']) + YouTubeTranscriptApi.get_transcript('F1xioXWb8CY', ['de', 'en']) query_string = httpretty.last_request().querystring self.assertIn('lang', query_string)