Added cli support, fixed testing
This commit is contained in:
parent
dc9fc2ee93
commit
f9e553ebaf
|
@ -52,15 +52,22 @@ class YouTubeTranscriptApi():
|
|||
:type video_id: str
|
||||
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
|
||||
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
||||
:param cookies: a string of the path to a text file containing youtube authorization cookies
|
||||
:type cookies: str - cookies.txt
|
||||
:return: the list of available transcripts
|
||||
:rtype TranscriptList:
|
||||
"""
|
||||
print(cookies)
|
||||
with requests.Session() as http_client:
|
||||
if cookies:
|
||||
try:
|
||||
cj = cookiejar.MozillaCookieJar()
|
||||
cj.load(cookies)
|
||||
http_client.cookies = cj
|
||||
except IOError as e:
|
||||
print("Warning: Path for cookies file was not valid. Did not load any cookies")
|
||||
except FileNotFoundError as e:
|
||||
print("Warning: Path for cookies file was not valid. Did not load any cookies")
|
||||
|
||||
http_client.proxies = proxies if proxies else {}
|
||||
return TranscriptListFetcher(http_client).fetch(video_id)
|
||||
|
||||
|
@ -80,6 +87,8 @@ class YouTubeTranscriptApi():
|
|||
:type continue_after_error: bool
|
||||
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
|
||||
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
||||
:param cookies: a string of the path to a text file containing youtube authorization cookies
|
||||
:type cookies: str - cookies.txt
|
||||
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
|
||||
video ids, which could not be retrieved
|
||||
:rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
|
||||
|
@ -113,6 +122,8 @@ class YouTubeTranscriptApi():
|
|||
:type languages: list[str]
|
||||
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
|
||||
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
||||
:param cookies: a string of the path to a text file containing youtube authorization cookies
|
||||
:type cookies: str - cookies.txt
|
||||
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
||||
:rtype [{'text': str, 'start': float, 'end': float}]:
|
||||
"""
|
||||
|
|
|
@ -21,12 +21,14 @@ class YouTubeTranscriptCli():
|
|||
if parsed_args.http_proxy != '' or parsed_args.https_proxy != '':
|
||||
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy}
|
||||
|
||||
cookies = parsed_args.cookies
|
||||
|
||||
transcripts = []
|
||||
exceptions = []
|
||||
|
||||
for video_id in parsed_args.video_ids:
|
||||
try:
|
||||
transcripts.append(self._fetch_transcript(parsed_args, proxies, video_id))
|
||||
transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id))
|
||||
except Exception as exception:
|
||||
exceptions.append(exception)
|
||||
|
||||
|
@ -35,8 +37,8 @@ class YouTubeTranscriptCli():
|
|||
+ ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else [])
|
||||
)
|
||||
|
||||
def _fetch_transcript(self, parsed_args, proxies, video_id):
|
||||
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies)
|
||||
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
|
||||
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies)
|
||||
|
||||
if parsed_args.list_transcripts:
|
||||
return str(transcript_list)
|
||||
|
@ -123,5 +125,10 @@ class YouTubeTranscriptCli():
|
|||
metavar='URL',
|
||||
help='Use the specified HTTPS proxy.'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--cookies',
|
||||
default=None,
|
||||
help='The cookie file that will be used for authorization with youtube.'
|
||||
)
|
||||
|
||||
return parser.parse_args(self._args)
|
||||
|
|
|
@ -159,8 +159,8 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
|
||||
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
|
||||
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None)
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None)
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None, None)
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None, None)
|
||||
self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2)
|
||||
|
||||
def test_get_transcripts__stop_on_error(self):
|
||||
|
@ -176,15 +176,21 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
|
||||
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
|
||||
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None)
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None)
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None, None)
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None, None)
|
||||
|
||||
def test_get_transcripts__check_cookies(self):
|
||||
cookies='example_cookies.txt'
|
||||
YouTubeTranscriptApi.get_transcript = MagicMock()
|
||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies)
|
||||
|
||||
|
||||
def test_get_transcript__with_proxies(self):
|
||||
proxies = {'http': '', 'https:': ''}
|
||||
transcript = YouTubeTranscriptApi.get_transcript(
|
||||
'GJLlxj_dtq8', proxies=proxies
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
transcript,
|
||||
[
|
||||
|
@ -195,4 +201,4 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
)
|
||||
YouTubeTranscriptApi.get_transcript = MagicMock()
|
||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies)
|
||||
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None)
|
||||
|
|
|
@ -164,8 +164,8 @@ class TestYouTubeTranscriptCli(TestCase):
|
|||
def test_run(self):
|
||||
YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run()
|
||||
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None)
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None)
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
|
||||
|
||||
self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en'])
|
||||
|
||||
|
@ -200,8 +200,8 @@ class TestYouTubeTranscriptCli(TestCase):
|
|||
def test_run__list_transcripts(self):
|
||||
YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run()
|
||||
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None)
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None)
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
|
||||
|
||||
def test_run__json_output(self):
|
||||
output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run()
|
||||
|
@ -220,10 +220,23 @@ class TestYouTubeTranscriptCli(TestCase):
|
|||
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||
'v1',
|
||||
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}
|
||||
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
|
||||
cookies= None
|
||||
)
|
||||
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call(
|
||||
'v2',
|
||||
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}
|
||||
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
|
||||
cookies=None
|
||||
)
|
||||
|
||||
def test_run__cookies(self):
|
||||
YouTubeTranscriptCli(
|
||||
(
|
||||
'v1 v2 --languages de en '
|
||||
'--cookies blahblah.txt'
|
||||
).split()
|
||||
).run()
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies='blahblah.txt')
|
||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies='blahblah.txt')
|
||||
|
||||
|
|
Loading…
Reference in New Issue