added new params to cli to make new features accessible using the cli
This commit is contained in:
		
							parent
							
								
									4b75a47a74
								
							
						
					
					
						commit
						f8416ab004
					
				|  | @ -72,11 +72,11 @@ class YouTubeTranscriptApi(): | ||||||
|         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests |         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests | ||||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||||
|         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of |         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of | ||||||
|         exceptions which occurred for the videos which could not be retrieved |         video ids, which could not be retrieved | ||||||
|         :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [CouldNotRetrieveTranscript]}): |         :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): | ||||||
|         """ |         """ | ||||||
|         data = {} |         data = {} | ||||||
|         exceptions = [] |         unretrievable_videos = [] | ||||||
| 
 | 
 | ||||||
|         for video_id in video_ids: |         for video_id in video_ids: | ||||||
|             try: |             try: | ||||||
|  | @ -85,9 +85,9 @@ class YouTubeTranscriptApi(): | ||||||
|                 if not continue_after_error: |                 if not continue_after_error: | ||||||
|                     raise exception |                     raise exception | ||||||
| 
 | 
 | ||||||
|                 exceptions.append(exception) |                 unretrievable_videos.append(video_id) | ||||||
| 
 | 
 | ||||||
|         return data, exceptions |         return data, unretrievable_videos | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_transcript(cls, video_id, languages=('en',), proxies=None): |     def get_transcript(cls, video_id, languages=('en',), proxies=None): | ||||||
|  |  | ||||||
|  | @ -14,22 +14,42 @@ class YouTubeTranscriptCli(): | ||||||
|     def run(self): |     def run(self): | ||||||
|         parsed_args = self._parse_args() |         parsed_args = self._parse_args() | ||||||
| 
 | 
 | ||||||
|  |         if parsed_args.exclude_manually_created and parsed_args.exclude_generated: | ||||||
|  |             return '' | ||||||
|  | 
 | ||||||
|         proxies = None |         proxies = None | ||||||
|         if parsed_args.http_proxy != '' or parsed_args.https_proxy != '': |         if parsed_args.http_proxy != '' or parsed_args.https_proxy != '': | ||||||
|             proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy} |             proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy} | ||||||
| 
 | 
 | ||||||
|         transcripts, unretrievable_videos = YouTubeTranscriptApi.get_transcripts( |         transcripts = [] | ||||||
|             parsed_args.video_ids, |         exceptions = [] | ||||||
|             languages=parsed_args.languages, | 
 | ||||||
|             continue_after_error=True, |         for video_id in parsed_args.video_ids: | ||||||
|             proxies=proxies |             try: | ||||||
|         ) |                 transcripts.append(self._fetch_transcript(parsed_args, proxies, video_id)) | ||||||
|  |             except Exception as exception: | ||||||
|  |                 exceptions.append(exception) | ||||||
| 
 | 
 | ||||||
|         return '\n\n'.join( |         return '\n\n'.join( | ||||||
|             [str(exception) for exception in unretrievable_videos] |             [str(exception) for exception in exceptions] | ||||||
|             + ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else []) |             + ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else []) | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  |     def _fetch_transcript(self, parsed_args, proxies, video_id): | ||||||
|  |         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies) | ||||||
|  | 
 | ||||||
|  |         if parsed_args.exclude_manually_created: | ||||||
|  |             transcript = transcript_list.find_generated_transcript(parsed_args.languages) | ||||||
|  |         elif parsed_args.exclude_generated: | ||||||
|  |             transcript = transcript_list.find_manually_created_transcript(parsed_args.languages) | ||||||
|  |         else: | ||||||
|  |             transcript = transcript_list.find_transcript(parsed_args.languages) | ||||||
|  | 
 | ||||||
|  |         if parsed_args.translate: | ||||||
|  |             transcript = transcript.translate(parsed_args.translate) | ||||||
|  | 
 | ||||||
|  |         return transcript.fetch() | ||||||
|  | 
 | ||||||
|     def _parse_args(self): |     def _parse_args(self): | ||||||
|         parser = argparse.ArgumentParser( |         parser = argparse.ArgumentParser( | ||||||
|             description=( |             description=( | ||||||
|  | @ -38,6 +58,13 @@ class YouTubeTranscriptCli(): | ||||||
|                 'other selenium based solutions do!' |                 'other selenium based solutions do!' | ||||||
|             ) |             ) | ||||||
|         ) |         ) | ||||||
|  |         parser.add_argument( | ||||||
|  |             '--list-transcripts', | ||||||
|  |             action='store_const', | ||||||
|  |             const=True, | ||||||
|  |             default=False, | ||||||
|  |             help='This will list the languages in which the given videos are available in.', | ||||||
|  |         ) | ||||||
|         parser.add_argument('video_ids', nargs='+', type=str, help='List of YouTube video IDs.') |         parser.add_argument('video_ids', nargs='+', type=str, help='List of YouTube video IDs.') | ||||||
|         parser.add_argument( |         parser.add_argument( | ||||||
|             '--languages', |             '--languages', | ||||||
|  | @ -46,11 +73,25 @@ class YouTubeTranscriptCli(): | ||||||
|             type=str, |             type=str, | ||||||
|             help=( |             help=( | ||||||
|                 'A list of language codes in a descending priority. For example, if this is set to "de en" it will ' |                 'A list of language codes in a descending priority. For example, if this is set to "de en" it will ' | ||||||
|                 'first try to fetch the german transcript (de) and then fetch the english transcipt (en) if it fails ' |                 'first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails ' | ||||||
|                 'to do so. As I can\'t provide a complete list of all working language codes with full certainty, you ' |                 'to do so. As I can\'t provide a complete list of all working language codes with full certainty, you ' | ||||||
|                 'may have to play around with the language codes a bit, to find the one which is working for you!' |                 'may have to play around with the language codes a bit, to find the one which is working for you!' | ||||||
|             ), |             ), | ||||||
|         ) |         ) | ||||||
|  |         parser.add_argument( | ||||||
|  |             '--exclude-generated', | ||||||
|  |             action='store_const', | ||||||
|  |             const=True, | ||||||
|  |             default=False, | ||||||
|  |             help='If this flag is set transcripts which have been generated by YouTube will not be retrieved.', | ||||||
|  |         ) | ||||||
|  |         parser.add_argument( | ||||||
|  |             '--exclude-manually-created', | ||||||
|  |             action='store_const', | ||||||
|  |             const=True, | ||||||
|  |             default=False, | ||||||
|  |             help='If this flag is set transcripts which have been manually created will not be retrieved.', | ||||||
|  |         ) | ||||||
|         parser.add_argument( |         parser.add_argument( | ||||||
|             '--json', |             '--json', | ||||||
|             action='store_const', |             action='store_const', | ||||||
|  | @ -59,13 +100,24 @@ class YouTubeTranscriptCli(): | ||||||
|             help='If this flag is set the output will be JSON formatted.', |             help='If this flag is set the output will be JSON formatted.', | ||||||
|         ) |         ) | ||||||
|         parser.add_argument( |         parser.add_argument( | ||||||
|             '--http-proxy', dest='http_proxy', |             '--translate', | ||||||
|             default='', metavar='URL', |             default='', | ||||||
|  |             help=( | ||||||
|  |                 'The language code for the language you want this transcript to be translated to. Use the ' | ||||||
|  |                 '--list-transcripts feature to find out which languages are translatable and which translation ' | ||||||
|  |                 'languages are available.' | ||||||
|  |             ) | ||||||
|  |         ) | ||||||
|  |         parser.add_argument( | ||||||
|  |             '--http-proxy', | ||||||
|  |             default='', | ||||||
|  |             metavar='URL', | ||||||
|             help='Use the specified HTTP proxy.' |             help='Use the specified HTTP proxy.' | ||||||
|         ) |         ) | ||||||
|         parser.add_argument( |         parser.add_argument( | ||||||
|             '--https-proxy', dest='https_proxy', |             '--https-proxy', | ||||||
|             default='', metavar='URL', |             default='', | ||||||
|  |             metavar='URL', | ||||||
|             help='Use the specified HTTPS proxy.' |             help='Use the specified HTTPS proxy.' | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3,10 +3,27 @@ from mock import MagicMock | ||||||
| 
 | 
 | ||||||
| import json | import json | ||||||
| 
 | 
 | ||||||
| from youtube_transcript_api._cli import YouTubeTranscriptCli, YouTubeTranscriptApi | from youtube_transcript_api import YouTubeTranscriptApi, VideoUnavailable | ||||||
|  | from youtube_transcript_api._cli import YouTubeTranscriptCli | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TestYouTubeTranscriptCli(TestCase): | class TestYouTubeTranscriptCli(TestCase): | ||||||
|  |     def setUp(self): | ||||||
|  |         self.transcript_mock = MagicMock() | ||||||
|  |         self.transcript_mock.fetch = MagicMock(return_value=[ | ||||||
|  |             {'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54}, | ||||||
|  |             {'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16}, | ||||||
|  |             {'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239} | ||||||
|  |         ]) | ||||||
|  |         self.transcript_mock.translate = MagicMock(return_value=self.transcript_mock) | ||||||
|  | 
 | ||||||
|  |         self.transcript_list_mock = MagicMock() | ||||||
|  |         self.transcript_list_mock.find_generated_transcript = MagicMock(return_value=self.transcript_mock) | ||||||
|  |         self.transcript_list_mock.find_manually_created_transcript = MagicMock(return_value=self.transcript_mock) | ||||||
|  |         self.transcript_list_mock.find_transcript = MagicMock(return_value=self.transcript_mock) | ||||||
|  | 
 | ||||||
|  |         YouTubeTranscriptApi.list_transcripts = MagicMock(return_value=self.transcript_list_mock) | ||||||
|  | 
 | ||||||
|     def test_argument_parsing(self): |     def test_argument_parsing(self): | ||||||
|         parsed_args = YouTubeTranscriptCli('v1 v2 --json --languages de en'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli('v1 v2 --json --languages de en'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|  | @ -106,32 +123,107 @@ class TestYouTubeTranscriptCli(TestCase): | ||||||
|         self.assertEqual(parsed_args.http_proxy, '') |         self.assertEqual(parsed_args.http_proxy, '') | ||||||
|         self.assertEqual(parsed_args.https_proxy, '') |         self.assertEqual(parsed_args.https_proxy, '') | ||||||
| 
 | 
 | ||||||
|  |     def test_argument_parsing__list_transcripts(self): | ||||||
|  |         parsed_args = YouTubeTranscriptCli('--list-transcripts v1 v2'.split())._parse_args() | ||||||
|  |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|  |         self.assertTrue(parsed_args.list_transcripts) | ||||||
|  | 
 | ||||||
|  |         parsed_args = YouTubeTranscriptCli('v1 v2 --list-transcripts'.split())._parse_args() | ||||||
|  |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|  |         self.assertTrue(parsed_args.list_transcripts) | ||||||
|  | 
 | ||||||
|  |     def test_argument_parsing__translate(self): | ||||||
|  |         parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split())._parse_args() | ||||||
|  |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|  |         self.assertEqual(parsed_args.json, False) | ||||||
|  |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|  |         self.assertEqual(parsed_args.translate, 'cz') | ||||||
|  | 
 | ||||||
|  |         parsed_args = YouTubeTranscriptCli('v1 v2 --translate cz --languages de en'.split())._parse_args() | ||||||
|  |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|  |         self.assertEqual(parsed_args.json, False) | ||||||
|  |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|  |         self.assertEqual(parsed_args.translate, 'cz') | ||||||
|  | 
 | ||||||
|  |     def test_argument_parsing__manually_or_generated(self): | ||||||
|  |         parsed_args = YouTubeTranscriptCli('v1 v2 --exclude-manually-created'.split())._parse_args() | ||||||
|  |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|  |         self.assertTrue(parsed_args.exclude_manually_created) | ||||||
|  |         self.assertFalse(parsed_args.exclude_generated) | ||||||
|  | 
 | ||||||
|  |         parsed_args = YouTubeTranscriptCli('v1 v2 --exclude-generated'.split())._parse_args() | ||||||
|  |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|  |         self.assertFalse(parsed_args.exclude_manually_created) | ||||||
|  |         self.assertTrue(parsed_args.exclude_generated) | ||||||
|  | 
 | ||||||
|  |         parsed_args = YouTubeTranscriptCli('v1 v2 --exclude-manually-created --exclude-generated'.split())._parse_args() | ||||||
|  |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|  |         self.assertTrue(parsed_args.exclude_manually_created) | ||||||
|  |         self.assertTrue(parsed_args.exclude_generated) | ||||||
|  | 
 | ||||||
|     def test_run(self): |     def test_run(self): | ||||||
|         YouTubeTranscriptApi.get_transcripts = MagicMock(return_value=([], [])) |  | ||||||
|         YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run() |         YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run() | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcripts.assert_called_once_with( |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None) | ||||||
|             ['v1', 'v2'], |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None) | ||||||
|             languages=['de', 'en'], | 
 | ||||||
|             continue_after_error=True, |         self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en']) | ||||||
|             proxies=None | 
 | ||||||
|  |     def test_run__failing_transcripts(self): | ||||||
|  |         YouTubeTranscriptApi.list_transcripts = MagicMock(side_effect=VideoUnavailable('video_id')) | ||||||
|  | 
 | ||||||
|  |         output = YouTubeTranscriptCli('v1 --languages de en'.split()).run() | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(output, str(VideoUnavailable('video_id'))) | ||||||
|  | 
 | ||||||
|  |     def test_run__exclude_generated(self): | ||||||
|  |         YouTubeTranscriptCli('v1 v2 --languages de en --exclude-generated'.split()).run() | ||||||
|  | 
 | ||||||
|  |         self.transcript_list_mock.find_manually_created_transcript.assert_any_call(['de', 'en']) | ||||||
|  | 
 | ||||||
|  |     def test_run__exclude_manually_created(self): | ||||||
|  |         YouTubeTranscriptCli('v1 v2 --languages de en --exclude-manually-created'.split()).run() | ||||||
|  | 
 | ||||||
|  |         self.transcript_list_mock.find_generated_transcript.assert_any_call(['de', 'en']) | ||||||
|  | 
 | ||||||
|  |     def test_run__exclude_manually_created_and_generated(self): | ||||||
|  |         self.assertEqual( | ||||||
|  |             YouTubeTranscriptCli('v1 v2 --languages de en --exclude-manually-created --exclude-generated'.split()).run(), | ||||||
|  |             '' | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  |     def test_run__translate(self): | ||||||
|  |         YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split()).run(), | ||||||
|  | 
 | ||||||
|  |         self.transcript_mock.translate.assert_any_call('cz') | ||||||
|  | 
 | ||||||
|  |     def test_run__list_transcripts(self): | ||||||
|  |         YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run() | ||||||
|  | 
 | ||||||
|  |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None) | ||||||
|  |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None) | ||||||
|  | 
 | ||||||
|     def test_run__json_output(self): |     def test_run__json_output(self): | ||||||
|         YouTubeTranscriptApi.get_transcripts = MagicMock(return_value=([{'boolean': True}], [])) |  | ||||||
|         output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run() |         output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run() | ||||||
| 
 | 
 | ||||||
|         # will fail if output is not valid json |         # will fail if output is not valid json | ||||||
|         json.loads(output) |         json.loads(output) | ||||||
| 
 | 
 | ||||||
|     def test_run__proxies(self): |     def test_run__proxies(self): | ||||||
|         YouTubeTranscriptApi.get_transcripts = MagicMock(return_value=([], [])) |  | ||||||
|         YouTubeTranscriptCli( |         YouTubeTranscriptCli( | ||||||
|             'v1 v2 --languages de en --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port'.split()).run() |             ( | ||||||
|  |                 'v1 v2 --languages de en ' | ||||||
|  |                 '--http-proxy http://user:pass@domain:port ' | ||||||
|  |                 '--https-proxy https://user:pass@domain:port' | ||||||
|  |             ).split() | ||||||
|  |         ).run() | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcripts.assert_called_once_with( |         YouTubeTranscriptApi.list_transcripts.assert_any_call( | ||||||
|             ['v1', 'v2'], |             'v1', | ||||||
|             languages=['de', 'en'], |             proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'} | ||||||
|             continue_after_error=True, |         ) | ||||||
|  | 
 | ||||||
|  |         YouTubeTranscriptApi.list_transcripts.assert_any_call( | ||||||
|  |             'v2', | ||||||
|             proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'} |             proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'} | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue