Added cli support, fixed testing
This commit is contained in:
		
							parent
							
								
									dc9fc2ee93
								
							
						
					
					
						commit
						f9e553ebaf
					
				|  | @ -52,15 +52,22 @@ class YouTubeTranscriptApi(): | ||||||
|         :type video_id: str |         :type video_id: str | ||||||
|         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests |         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests | ||||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||||
|  |         :param cookies: a string of the path to a text file containing youtube authorization cookies | ||||||
|  |         :type cookies: str - cookies.txt | ||||||
|         :return: the list of available transcripts |         :return: the list of available transcripts | ||||||
|         :rtype TranscriptList: |         :rtype TranscriptList: | ||||||
|         """ |         """ | ||||||
|         print(cookies) |  | ||||||
|         with requests.Session() as http_client: |         with requests.Session() as http_client: | ||||||
|             if cookies: |             if cookies: | ||||||
|  |                 try: | ||||||
|                     cj = cookiejar.MozillaCookieJar() |                     cj = cookiejar.MozillaCookieJar() | ||||||
|                     cj.load(cookies) |                     cj.load(cookies) | ||||||
|                     http_client.cookies = cj |                     http_client.cookies = cj | ||||||
|  |                 except IOError as e: | ||||||
|  |                     print("Warning: Path for cookies file was not valid. Did not load any cookies") | ||||||
|  |                 except FileNotFoundError as e: | ||||||
|  |                     print("Warning: Path for cookies file was not valid. Did not load any cookies") | ||||||
|  |              | ||||||
|             http_client.proxies = proxies if proxies else {} |             http_client.proxies = proxies if proxies else {} | ||||||
|             return TranscriptListFetcher(http_client).fetch(video_id) |             return TranscriptListFetcher(http_client).fetch(video_id) | ||||||
| 
 | 
 | ||||||
|  | @ -80,6 +87,8 @@ class YouTubeTranscriptApi(): | ||||||
|         :type continue_after_error: bool |         :type continue_after_error: bool | ||||||
|         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests |         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests | ||||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||||
|  |         :param cookies: a string of the path to a text file containing youtube authorization cookies | ||||||
|  |         :type cookies: str - cookies.txt | ||||||
|         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of |         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of | ||||||
|         video ids, which could not be retrieved |         video ids, which could not be retrieved | ||||||
|         :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): |         :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): | ||||||
|  | @ -113,6 +122,8 @@ class YouTubeTranscriptApi(): | ||||||
|         :type languages: list[str] |         :type languages: list[str] | ||||||
|         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests |         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests | ||||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||||
|  |         :param cookies: a string of the path to a text file containing youtube authorization cookies | ||||||
|  |         :type cookies: str - cookies.txt | ||||||
|         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys |         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys | ||||||
|         :rtype [{'text': str, 'start': float, 'end': float}]: |         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|  | @ -21,12 +21,14 @@ class YouTubeTranscriptCli(): | ||||||
|         if parsed_args.http_proxy != '' or parsed_args.https_proxy != '': |         if parsed_args.http_proxy != '' or parsed_args.https_proxy != '': | ||||||
|             proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy} |             proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy} | ||||||
| 
 | 
 | ||||||
|  |         cookies = parsed_args.cookies | ||||||
|  | 
 | ||||||
|         transcripts = [] |         transcripts = [] | ||||||
|         exceptions = [] |         exceptions = [] | ||||||
| 
 | 
 | ||||||
|         for video_id in parsed_args.video_ids: |         for video_id in parsed_args.video_ids: | ||||||
|             try: |             try: | ||||||
|                 transcripts.append(self._fetch_transcript(parsed_args, proxies, video_id)) |                 transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id)) | ||||||
|             except Exception as exception: |             except Exception as exception: | ||||||
|                 exceptions.append(exception) |                 exceptions.append(exception) | ||||||
| 
 | 
 | ||||||
|  | @ -35,8 +37,8 @@ class YouTubeTranscriptCli(): | ||||||
|             + ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else []) |             + ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else []) | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def _fetch_transcript(self, parsed_args, proxies, video_id): |     def _fetch_transcript(self, parsed_args, proxies, cookies, video_id): | ||||||
|         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies) |         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies) | ||||||
| 
 | 
 | ||||||
|         if parsed_args.list_transcripts: |         if parsed_args.list_transcripts: | ||||||
|             return str(transcript_list) |             return str(transcript_list) | ||||||
|  | @ -123,5 +125,10 @@ class YouTubeTranscriptCli(): | ||||||
|             metavar='URL', |             metavar='URL', | ||||||
|             help='Use the specified HTTPS proxy.' |             help='Use the specified HTTPS proxy.' | ||||||
|         ) |         ) | ||||||
|  |         parser.add_argument( | ||||||
|  |             '--cookies', | ||||||
|  |             default=None, | ||||||
|  |             help='The cookie file that will be used for authorization with youtube.' | ||||||
|  |         ) | ||||||
|              |              | ||||||
|         return parser.parse_args(self._args) |         return parser.parse_args(self._args) | ||||||
|  |  | ||||||
|  | @ -159,8 +159,8 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages) |         YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages) | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None) |         YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None, None) | ||||||
|         YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None) |         YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None, None) | ||||||
|         self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2) |         self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2) | ||||||
| 
 | 
 | ||||||
|     def test_get_transcripts__stop_on_error(self): |     def test_get_transcripts__stop_on_error(self): | ||||||
|  | @ -176,15 +176,21 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True) |         YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True) | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None) |         YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None, None) | ||||||
|         YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None) |         YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None, None) | ||||||
|  |     | ||||||
|  |     def test_get_transcripts__check_cookies(self): | ||||||
|  |         cookies='example_cookies.txt' | ||||||
|  |         YouTubeTranscriptApi.get_transcript = MagicMock() | ||||||
|  |         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies) | ||||||
|  |         YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies) | ||||||
|  |          | ||||||
| 
 | 
 | ||||||
|     def test_get_transcript__with_proxies(self): |     def test_get_transcript__with_proxies(self): | ||||||
|         proxies = {'http': '', 'https:': ''} |         proxies = {'http': '', 'https:': ''} | ||||||
|         transcript = YouTubeTranscriptApi.get_transcript( |         transcript = YouTubeTranscriptApi.get_transcript( | ||||||
|             'GJLlxj_dtq8', proxies=proxies |             'GJLlxj_dtq8', proxies=proxies | ||||||
|         ) |         ) | ||||||
| 
 |  | ||||||
|         self.assertEqual( |         self.assertEqual( | ||||||
|             transcript, |             transcript, | ||||||
|             [ |             [ | ||||||
|  | @ -195,4 +201,4 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
|         ) |         ) | ||||||
|         YouTubeTranscriptApi.get_transcript = MagicMock() |         YouTubeTranscriptApi.get_transcript = MagicMock() | ||||||
|         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies) |         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies) | ||||||
|         YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies) |         YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None) | ||||||
|  |  | ||||||
|  | @ -164,8 +164,8 @@ class TestYouTubeTranscriptCli(TestCase): | ||||||
|     def test_run(self): |     def test_run(self): | ||||||
|         YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run() |         YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run() | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None) |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None) | ||||||
|         YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None) |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None) | ||||||
| 
 | 
 | ||||||
|         self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en']) |         self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en']) | ||||||
| 
 | 
 | ||||||
|  | @ -200,8 +200,8 @@ class TestYouTubeTranscriptCli(TestCase): | ||||||
|     def test_run__list_transcripts(self): |     def test_run__list_transcripts(self): | ||||||
|         YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run() |         YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run() | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None) |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None) | ||||||
|         YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None) |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None) | ||||||
| 
 | 
 | ||||||
|     def test_run__json_output(self): |     def test_run__json_output(self): | ||||||
|         output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run() |         output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run() | ||||||
|  | @ -220,10 +220,23 @@ class TestYouTubeTranscriptCli(TestCase): | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.list_transcripts.assert_any_call( |         YouTubeTranscriptApi.list_transcripts.assert_any_call( | ||||||
|             'v1', |             'v1', | ||||||
|             proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'} |             proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}, | ||||||
|  |             cookies= None | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.list_transcripts.assert_any_call( |         YouTubeTranscriptApi.list_transcripts.assert_any_call( | ||||||
|             'v2', |             'v2', | ||||||
|             proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'} |             proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}, | ||||||
|  |             cookies=None | ||||||
|         ) |         ) | ||||||
|  | 
 | ||||||
|  |     def test_run__cookies(self): | ||||||
|  |         YouTubeTranscriptCli( | ||||||
|  |             ( | ||||||
|  |                 'v1 v2 --languages de en ' | ||||||
|  |                 '--cookies blahblah.txt' | ||||||
|  |             ).split() | ||||||
|  |         ).run() | ||||||
|  |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies='blahblah.txt') | ||||||
|  |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies='blahblah.txt') | ||||||
|  | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue