Added cli support, fixed testing
This commit is contained in:
parent
dc9fc2ee93
commit
f9e553ebaf
|
@ -52,15 +52,22 @@ class YouTubeTranscriptApi():
|
||||||
:type video_id: str
|
:type video_id: str
|
||||||
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
|
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
|
||||||
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
||||||
|
:param cookies: a string of the path to a text file containing youtube authorization cookies
|
||||||
|
:type cookies: str - cookies.txt
|
||||||
:return: the list of available transcripts
|
:return: the list of available transcripts
|
||||||
:rtype TranscriptList:
|
:rtype TranscriptList:
|
||||||
"""
|
"""
|
||||||
print(cookies)
|
|
||||||
with requests.Session() as http_client:
|
with requests.Session() as http_client:
|
||||||
if cookies:
|
if cookies:
|
||||||
cj = cookiejar.MozillaCookieJar()
|
try:
|
||||||
cj.load(cookies)
|
cj = cookiejar.MozillaCookieJar()
|
||||||
http_client.cookies = cj
|
cj.load(cookies)
|
||||||
|
http_client.cookies = cj
|
||||||
|
except IOError as e:
|
||||||
|
print("Warning: Path for cookies file was not valid. Did not load any cookies")
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
print("Warning: Path for cookies file was not valid. Did not load any cookies")
|
||||||
|
|
||||||
http_client.proxies = proxies if proxies else {}
|
http_client.proxies = proxies if proxies else {}
|
||||||
return TranscriptListFetcher(http_client).fetch(video_id)
|
return TranscriptListFetcher(http_client).fetch(video_id)
|
||||||
|
|
||||||
|
@ -80,6 +87,8 @@ class YouTubeTranscriptApi():
|
||||||
:type continue_after_error: bool
|
:type continue_after_error: bool
|
||||||
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
|
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
|
||||||
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
||||||
|
:param cookies: a string of the path to a text file containing youtube authorization cookies
|
||||||
|
:type cookies: str - cookies.txt
|
||||||
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
|
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
|
||||||
video ids, which could not be retrieved
|
video ids, which could not be retrieved
|
||||||
:rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
|
:rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
|
||||||
|
@ -113,6 +122,8 @@ class YouTubeTranscriptApi():
|
||||||
:type languages: list[str]
|
:type languages: list[str]
|
||||||
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
|
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
|
||||||
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
||||||
|
:param cookies: a string of the path to a text file containing youtube authorization cookies
|
||||||
|
:type cookies: str - cookies.txt
|
||||||
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
||||||
:rtype [{'text': str, 'start': float, 'end': float}]:
|
:rtype [{'text': str, 'start': float, 'end': float}]:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -21,12 +21,14 @@ class YouTubeTranscriptCli():
|
||||||
if parsed_args.http_proxy != '' or parsed_args.https_proxy != '':
|
if parsed_args.http_proxy != '' or parsed_args.https_proxy != '':
|
||||||
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy}
|
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy}
|
||||||
|
|
||||||
|
cookies = parsed_args.cookies
|
||||||
|
|
||||||
transcripts = []
|
transcripts = []
|
||||||
exceptions = []
|
exceptions = []
|
||||||
|
|
||||||
for video_id in parsed_args.video_ids:
|
for video_id in parsed_args.video_ids:
|
||||||
try:
|
try:
|
||||||
transcripts.append(self._fetch_transcript(parsed_args, proxies, video_id))
|
transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id))
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
exceptions.append(exception)
|
exceptions.append(exception)
|
||||||
|
|
||||||
|
@ -35,8 +37,8 @@ class YouTubeTranscriptCli():
|
||||||
+ ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else [])
|
+ ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else [])
|
||||||
)
|
)
|
||||||
|
|
||||||
def _fetch_transcript(self, parsed_args, proxies, video_id):
|
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
|
||||||
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies)
|
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies)
|
||||||
|
|
||||||
if parsed_args.list_transcripts:
|
if parsed_args.list_transcripts:
|
||||||
return str(transcript_list)
|
return str(transcript_list)
|
||||||
|
@ -123,5 +125,10 @@ class YouTubeTranscriptCli():
|
||||||
metavar='URL',
|
metavar='URL',
|
||||||
help='Use the specified HTTPS proxy.'
|
help='Use the specified HTTPS proxy.'
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--cookies',
|
||||||
|
default=None,
|
||||||
|
help='The cookie file that will be used for authorization with youtube.'
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args(self._args)
|
return parser.parse_args(self._args)
|
||||||
|
|
|
@ -159,8 +159,8 @@ class TestYouTubeTranscriptApi(TestCase):
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
|
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None)
|
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None, None)
|
||||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None)
|
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None, None)
|
||||||
self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2)
|
self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2)
|
||||||
|
|
||||||
def test_get_transcripts__stop_on_error(self):
|
def test_get_transcripts__stop_on_error(self):
|
||||||
|
@ -176,15 +176,21 @@ class TestYouTubeTranscriptApi(TestCase):
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
|
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None)
|
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None, None)
|
||||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None)
|
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None, None)
|
||||||
|
|
||||||
|
def test_get_transcripts__check_cookies(self):
|
||||||
|
cookies='example_cookies.txt'
|
||||||
|
YouTubeTranscriptApi.get_transcript = MagicMock()
|
||||||
|
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
|
||||||
|
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies)
|
||||||
|
|
||||||
|
|
||||||
def test_get_transcript__with_proxies(self):
|
def test_get_transcript__with_proxies(self):
|
||||||
proxies = {'http': '', 'https:': ''}
|
proxies = {'http': '', 'https:': ''}
|
||||||
transcript = YouTubeTranscriptApi.get_transcript(
|
transcript = YouTubeTranscriptApi.get_transcript(
|
||||||
'GJLlxj_dtq8', proxies=proxies
|
'GJLlxj_dtq8', proxies=proxies
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
transcript,
|
transcript,
|
||||||
[
|
[
|
||||||
|
@ -195,4 +201,4 @@ class TestYouTubeTranscriptApi(TestCase):
|
||||||
)
|
)
|
||||||
YouTubeTranscriptApi.get_transcript = MagicMock()
|
YouTubeTranscriptApi.get_transcript = MagicMock()
|
||||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
|
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
|
||||||
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies)
|
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None)
|
||||||
|
|
|
@ -164,8 +164,8 @@ class TestYouTubeTranscriptCli(TestCase):
|
||||||
def test_run(self):
|
def test_run(self):
|
||||||
YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run()
|
YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run()
|
||||||
|
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None)
|
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None)
|
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
|
||||||
|
|
||||||
self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en'])
|
self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en'])
|
||||||
|
|
||||||
|
@ -200,8 +200,8 @@ class TestYouTubeTranscriptCli(TestCase):
|
||||||
def test_run__list_transcripts(self):
|
def test_run__list_transcripts(self):
|
||||||
YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run()
|
YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run()
|
||||||
|
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None)
|
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None)
|
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
|
||||||
|
|
||||||
def test_run__json_output(self):
|
def test_run__json_output(self):
|
||||||
output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run()
|
output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run()
|
||||||
|
@ -220,10 +220,23 @@ class TestYouTubeTranscriptCli(TestCase):
|
||||||
|
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
'v1',
|
'v1',
|
||||||
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}
|
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
|
||||||
|
cookies= None
|
||||||
)
|
)
|
||||||
|
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||||
'v2',
|
'v2',
|
||||||
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}
|
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
|
||||||
|
cookies=None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_run__cookies(self):
|
||||||
|
YouTubeTranscriptCli(
|
||||||
|
(
|
||||||
|
'v1 v2 --languages de en '
|
||||||
|
'--cookies blahblah.txt'
|
||||||
|
).split()
|
||||||
|
).run()
|
||||||
|
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies='blahblah.txt')
|
||||||
|
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies='blahblah.txt')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue