Revert "Add formatters module"
Reverting again, apparently I had a misunderstanding of how to revert. Trying again.
This reverts commit dca4021dd7.
			
			
This commit is contained in:
		
							parent
							
								
									d75ad8c402
								
							
						
					
					
						commit
						1e9b2c7727
					
				
							
								
								
									
										4
									
								
								setup.py
								
								
								
								
							
							
						
						
									
										4
									
								
								setup.py
								
								
								
								
							|  | @ -24,10 +24,10 @@ def get_test_suite(): | ||||||
| 
 | 
 | ||||||
| setuptools.setup( | setuptools.setup( | ||||||
|     name="youtube_transcript_api", |     name="youtube_transcript_api", | ||||||
|     version="0.3.0", |     version="0.3.1", | ||||||
|     author="Jonas Depoix", |     author="Jonas Depoix", | ||||||
|     author_email="jonas.depoix@web.de", |     author_email="jonas.depoix@web.de", | ||||||
|     description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles and it does not require a headless browser, like other selenium based solutions do!", |     description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles, supports translating subtitles and it does not require a headless browser, like other selenium based solutions do!", | ||||||
|     long_description=get_long_description(), |     long_description=get_long_description(), | ||||||
|     long_description_content_type="text/markdown", |     long_description_content_type="text/markdown", | ||||||
|     keywords="youtube-api subtitles youtube transcripts transcript subtitle youtube-subtitles youtube-transcripts cli", |     keywords="youtube-api subtitles youtube transcripts transcript subtitle youtube-subtitles youtube-transcripts cli", | ||||||
|  |  | ||||||
|  | @ -12,8 +12,6 @@ from ._errors import ( | ||||||
|     CookiePathInvalid, |     CookiePathInvalid, | ||||||
|     CookiesInvalid |     CookiesInvalid | ||||||
| ) | ) | ||||||
| from .formatters import formats |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| class YouTubeTranscriptApi(): | class YouTubeTranscriptApi(): | ||||||
|     @classmethod |     @classmethod | ||||||
|  | @ -72,8 +70,7 @@ class YouTubeTranscriptApi(): | ||||||
|             return TranscriptListFetcher(http_client).fetch(video_id) |             return TranscriptListFetcher(http_client).fetch(video_id) | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_transcripts(cls, video_ids, languages=('en',), |     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, cookies=None): | ||||||
|             continue_after_error=False, proxies=None, cookies=None, format=None): |  | ||||||
|         """ |         """ | ||||||
|         Retrieves the transcripts for a list of videos. |         Retrieves the transcripts for a list of videos. | ||||||
| 
 | 
 | ||||||
|  | @ -99,8 +96,7 @@ class YouTubeTranscriptApi(): | ||||||
| 
 | 
 | ||||||
|         for video_id in video_ids: |         for video_id in video_ids: | ||||||
|             try: |             try: | ||||||
|                 data[video_id] = cls.get_transcript(video_id, languages, |                 data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies) | ||||||
|                                     proxies, cookies, format=format) |  | ||||||
|             except Exception as exception: |             except Exception as exception: | ||||||
|                 if not continue_after_error: |                 if not continue_after_error: | ||||||
|                     raise exception |                     raise exception | ||||||
|  | @ -110,8 +106,7 @@ class YouTubeTranscriptApi(): | ||||||
|         return data, unretrievable_videos |         return data, unretrievable_videos | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_transcript(cls, video_id, languages=('en',), proxies=None, |     def get_transcript(cls, video_id, languages=('en',), proxies=None, cookies=None): | ||||||
|             cookies=None, format=None): |  | ||||||
|         """ |         """ | ||||||
|         Retrieves the transcript for a single video. This is just a shortcut for calling:: |         Retrieves the transcript for a single video. This is just a shortcut for calling:: | ||||||
| 
 | 
 | ||||||
|  | @ -130,10 +125,7 @@ class YouTubeTranscriptApi(): | ||||||
|         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys |         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys | ||||||
|         :rtype [{'text': str, 'start': float, 'end': float}]: |         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||||
|         """ |         """ | ||||||
|         Formatter = formats.get_formatter(format) |         return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch() | ||||||
|         transcript = cls.list_transcripts( |  | ||||||
|             video_id,proxies, cookies).find_transcript(languages).fetch() |  | ||||||
|         return Formatter.format(transcript) |  | ||||||
|      |      | ||||||
|     @classmethod |     @classmethod | ||||||
|     def _load_cookies(cls, cookies, video_id): |     def _load_cookies(cls, cookies, video_id): | ||||||
|  |  | ||||||
|  | @ -1,9 +1,10 @@ | ||||||
| import json | import json | ||||||
| 
 | 
 | ||||||
|  | import pprint | ||||||
|  | 
 | ||||||
| import argparse | import argparse | ||||||
| 
 | 
 | ||||||
| from ._api import YouTubeTranscriptApi | from ._api import YouTubeTranscriptApi | ||||||
| from .formatters import formats |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class YouTubeTranscriptCli(): | class YouTubeTranscriptCli(): | ||||||
|  | @ -25,24 +26,19 @@ class YouTubeTranscriptCli(): | ||||||
|         transcripts = [] |         transcripts = [] | ||||||
|         exceptions = [] |         exceptions = [] | ||||||
| 
 | 
 | ||||||
|         Formatter = formats.get_formatter(parsed_args.format) |  | ||||||
| 
 |  | ||||||
|         for video_id in parsed_args.video_ids: |         for video_id in parsed_args.video_ids: | ||||||
|             try: |             try: | ||||||
|                 transcript = self._fetch_transcript( |                 transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id)) | ||||||
|                     parsed_args, proxies, cookies, video_id) |  | ||||||
|                 transcripts.append(Formatter.format(transcript)) |  | ||||||
|             except Exception as exception: |             except Exception as exception: | ||||||
|                 exceptions.append(exception) |                 exceptions.append(exception) | ||||||
| 
 | 
 | ||||||
|         return ''.join( |         return '\n\n'.join( | ||||||
|             [str(exception) for exception in exceptions] |             [str(exception) for exception in exceptions] | ||||||
|             + ([Formatter.combine(transcripts)] if transcripts else []) |             + ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else []) | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def _fetch_transcript(self, parsed_args, proxies, cookies, video_id): |     def _fetch_transcript(self, parsed_args, proxies, cookies, video_id): | ||||||
|         transcript_list = YouTubeTranscriptApi.list_transcripts( |         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies) | ||||||
|                             video_id, proxies=proxies, cookies=cookies) |  | ||||||
| 
 | 
 | ||||||
|         if parsed_args.list_transcripts: |         if parsed_args.list_transcripts: | ||||||
|             return str(transcript_list) |             return str(transcript_list) | ||||||
|  | @ -102,9 +98,11 @@ class YouTubeTranscriptCli(): | ||||||
|             help='If this flag is set transcripts which have been manually created will not be retrieved.', |             help='If this flag is set transcripts which have been manually created will not be retrieved.', | ||||||
|         ) |         ) | ||||||
|         parser.add_argument( |         parser.add_argument( | ||||||
|             '--format', |             '--json', | ||||||
|             default=None, |             action='store_const', | ||||||
|             help="Use this flag to set which parser format to use, default is 'json'", |             const=True, | ||||||
|  |             default=False, | ||||||
|  |             help='If this flag is set the output will be JSON formatted.', | ||||||
|         ) |         ) | ||||||
|         parser.add_argument( |         parser.add_argument( | ||||||
|             '--translate', |             '--translate', | ||||||
|  |  | ||||||
|  | @ -1,174 +0,0 @@ | ||||||
| from collections import defaultdict |  | ||||||
| import json |  | ||||||
| import re |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def parse_timecode(time): |  | ||||||
|     """Converts a `time` into a formatted transcript timecode. |  | ||||||
| 
 |  | ||||||
|     :param time: a float representing time in seconds. |  | ||||||
|     :type time: float |  | ||||||
|     :return: a string formatted as a timecode, 'HH:MM:SS,MS' |  | ||||||
|     :rtype str |  | ||||||
| 
 |  | ||||||
|     :example: |  | ||||||
|     >>> parse_timecode(6.93) |  | ||||||
|     '00:00:06,930' |  | ||||||
|     """ |  | ||||||
|      |  | ||||||
|     time = float(time) |  | ||||||
|     times = { |  | ||||||
|         'hours': str(int(time) // 3600).rjust(2, '0'), |  | ||||||
|         'mins': str(int(time) // 60).rjust(2, '0'), |  | ||||||
|         'secs': str(int(time) % 60).rjust(2, '0'), |  | ||||||
|         'ms': str(int(round((time - int(time))*1000, 2))).rjust(3, '0') |  | ||||||
|     } |  | ||||||
|     return "{hours}:{mins}:{secs},{ms}".format(**times) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class TranscriptFormatter(object): |  | ||||||
|     """Abstract Base TranscriptFormatter class |  | ||||||
| 
 |  | ||||||
|     This class should be inherited from to create additional |  | ||||||
|      custom transcript formatters. |  | ||||||
|     """ |  | ||||||
|     HTML_TAG_REGEX = re.compile(r'<[^>]*>', re.IGNORECASE) |  | ||||||
|     DELIMITER = '' |  | ||||||
| 
 |  | ||||||
|     @classmethod |  | ||||||
|     def combine(cls, transcripts): |  | ||||||
|         """Subclass may override this class method. |  | ||||||
| 
 |  | ||||||
|         Default behavior of this method will ''.join() the str()  |  | ||||||
|          of each transcript in transcripts. |  | ||||||
| 
 |  | ||||||
|         :param transcripts: a list of many transcripts |  | ||||||
|         :type transcript_data: list[<formatted transcript>, ...] |  | ||||||
|         :return: A string joined on the `cls.DELIMITER` to combine transcripts |  | ||||||
|         :rtype: str |  | ||||||
|         """ |  | ||||||
|         return cls.DELIMITER.join( |  | ||||||
|                 str(transcript) for transcript in transcripts) |  | ||||||
| 
 |  | ||||||
|     @classmethod |  | ||||||
|     def format(cls, transcript_data): |  | ||||||
|         """Any subclass must implement this format class method. |  | ||||||
| 
 |  | ||||||
|         :param transcript_data: a list of transcripts, 1 or more. |  | ||||||
|         :type transcript_data: list[list[dict], list[dict]] |  | ||||||
|         :return: A list where each item is an individual transcript |  | ||||||
|          as a string. |  | ||||||
|         :rtype: list[str] |  | ||||||
|         """ |  | ||||||
|         raise NotImplementedError( |  | ||||||
|             cls.__name__ + '.format' |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class JSONTranscriptFormatter(TranscriptFormatter): |  | ||||||
|     """Formatter for outputting JSON data""" |  | ||||||
|     DELIMITER = ',' |  | ||||||
| 
 |  | ||||||
|     @classmethod |  | ||||||
|     def combine(cls, transcripts): |  | ||||||
|         return json.dumps(transcripts) |  | ||||||
| 
 |  | ||||||
|     @classmethod |  | ||||||
|     def format(cls, transcript_data): |  | ||||||
|         return transcript_data |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class TextTranscriptFormatter(TranscriptFormatter): |  | ||||||
|     """Formatter for outputting a Plain Text Format |  | ||||||
| 
 |  | ||||||
|     Converts the fetched transcript data into separated lines of |  | ||||||
|      plain text separated by newline breaks (\n) with no timecodes. |  | ||||||
|     """ |  | ||||||
|     DELIMITER = '\n\n' |  | ||||||
| 
 |  | ||||||
|     @classmethod |  | ||||||
|     def format(cls, transcript_data): |  | ||||||
|         return '{}\n'.format('\n'.join( |  | ||||||
|                     line['text']for line in transcript_data)) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class SRTTranscriptFormatter(TranscriptFormatter): |  | ||||||
|     """Formatter for outputting the SRT Format |  | ||||||
| 
 |  | ||||||
|     Converts the fetched transcript data into a simple .srt file format. |  | ||||||
|     """ |  | ||||||
|     DELIMITER = '\n\n' |  | ||||||
| 
 |  | ||||||
|     @classmethod |  | ||||||
|     def format(cls, transcript_data): |  | ||||||
|         output = [] |  | ||||||
|         for frame, item in enumerate(transcript_data, start=1): |  | ||||||
|             start_time = float(item.get('start')) |  | ||||||
|             duration = float(item.get('duration', '0.0')) |  | ||||||
| 
 |  | ||||||
|             output.append("{frame}\n".format(frame=frame)) |  | ||||||
|             output.append("{start_time} --> {end_time}\n".format( |  | ||||||
|                 start_time=parse_timecode(start_time), |  | ||||||
|                 end_time=parse_timecode(start_time + duration) |  | ||||||
|             )) |  | ||||||
|             output.append("{text}".format(text=item.get('text'))) |  | ||||||
|             if frame < len(transcript_data): |  | ||||||
|                 output.append('\n\n') |  | ||||||
|         return '{}\n'.format(''.join(output)) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class TranscriptFormatterFactory(object): |  | ||||||
|     """A Transcript Class Factory |  | ||||||
| 
 |  | ||||||
|     Allows for adding additional custom Transcript classes for the API |  | ||||||
|     to use. Custom Transcript classes must inherit from the |  | ||||||
|     TranscriptFormatter abstract base class. |  | ||||||
|     """ |  | ||||||
|     def __init__(self): |  | ||||||
|         self._formatters = defaultdict(JSONTranscriptFormatter) |  | ||||||
| 
 |  | ||||||
|     def add_formatter(self, name, formatter_class): |  | ||||||
|         """Allows for creating additional transcript formatters. |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
|         :param name: a name given to the `formatter_class` |  | ||||||
|         :type name: str |  | ||||||
|         :param formatter_class: a subclass of TranscriptFormatter |  | ||||||
|         :type formatter_class: class |  | ||||||
|         :rtype None |  | ||||||
|         """ |  | ||||||
|         if not issubclass(formatter_class, TranscriptFormatter): |  | ||||||
|             raise TypeError(( |  | ||||||
|                 '{0} must be a subclass of TranscriptFormatter' |  | ||||||
|                 ).format(formatter_class) |  | ||||||
|             ) |  | ||||||
|         self._formatters.update({name: formatter_class}) |  | ||||||
| 
 |  | ||||||
|     def add_formatters(self, formatters_dict): |  | ||||||
|         """Allow creation of multiple transcript formatters at a time. |  | ||||||
| 
 |  | ||||||
|         :param formatters_dict: key(s) are the string name to be given |  | ||||||
|          to the formatter class, value for each key should be a subclass |  | ||||||
|          of TranscriptFormatter. |  | ||||||
|         :type formatters_dict: dict |  | ||||||
|         :rtype None |  | ||||||
|         """ |  | ||||||
|         for name, formatter_class in formatters_dict.items(): |  | ||||||
|             self.add_formatter(name, formatter_class) |  | ||||||
| 
 |  | ||||||
|     def get_formatter(self, name): |  | ||||||
|         """Retrieve a formatter class by its assigned name. |  | ||||||
| 
 |  | ||||||
|         :param name: the string name given to the formatter class. |  | ||||||
|         :type name: str |  | ||||||
|         :return: a subclass of `TranscriptFormatter` |  | ||||||
|         """ |  | ||||||
|         return self._formatters[name] |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| formats = TranscriptFormatterFactory() |  | ||||||
| formats.add_formatters({ |  | ||||||
|     'json': JSONTranscriptFormatter, |  | ||||||
|     'srt': SRTTranscriptFormatter, |  | ||||||
|     'text': TextTranscriptFormatter |  | ||||||
| }) |  | ||||||
|  | @ -1,6 +1,6 @@ | ||||||
| from unittest import TestCase | from unittest import TestCase | ||||||
| from mock import patch | from mock import patch | ||||||
| import json | 
 | ||||||
| import os | import os | ||||||
| 
 | 
 | ||||||
| import requests | import requests | ||||||
|  | @ -21,10 +21,7 @@ from youtube_transcript_api import ( | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def load_asset(filename): | def load_asset(filename): | ||||||
|     filepath = '{dirname}/assets/{filename}'.format( |     with open('{dirname}/assets/{filename}'.format(dirname=os.path.dirname(__file__), filename=filename)) as file: | ||||||
|                 dirname=os.path.dirname(__file__), filename=filename) |  | ||||||
|      |  | ||||||
|     with open(filepath, 'r', encoding='utf-8') as file: |  | ||||||
|         return file.read() |         return file.read() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -161,7 +158,7 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
|     def test_get_transcript__with_proxy(self): |     def test_get_transcript__with_proxy(self): | ||||||
|         proxies = {'http': '', 'https:': ''} |         proxies = {'http': '', 'https:': ''} | ||||||
|         transcript = YouTubeTranscriptApi.get_transcript( |         transcript = YouTubeTranscriptApi.get_transcript( | ||||||
|             'GJLlxj_dtq8', proxies=proxies, format=None |             'GJLlxj_dtq8', proxies=proxies | ||||||
|         ) |         ) | ||||||
|         self.assertEqual( |         self.assertEqual( | ||||||
|             transcript, |             transcript, | ||||||
|  | @ -194,8 +191,8 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages) |         YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages) | ||||||
| 
 | 
 | ||||||
|         mock_get_transcript.assert_any_call(video_id_1, languages, None, None, format=None) |         mock_get_transcript.assert_any_call(video_id_1, languages, None, None) | ||||||
|         mock_get_transcript.assert_any_call(video_id_2, languages, None, None, format=None) |         mock_get_transcript.assert_any_call(video_id_2, languages, None, None) | ||||||
|         self.assertEqual(mock_get_transcript.call_count, 2) |         self.assertEqual(mock_get_transcript.call_count, 2) | ||||||
| 
 | 
 | ||||||
|     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error')) |     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error')) | ||||||
|  | @ -210,20 +207,20 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True) |         YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True) | ||||||
| 
 | 
 | ||||||
|         mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None, format=None) |         mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None) | ||||||
|         mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None, format=None) |         mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None) | ||||||
|      |      | ||||||
|     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') |     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') | ||||||
|     def test_get_transcripts__with_cookies(self, mock_get_transcript): |     def test_get_transcripts__with_cookies(self, mock_get_transcript): | ||||||
|         cookies = '/example_cookies.txt' |         cookies = '/example_cookies.txt' | ||||||
|         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies) |         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies) | ||||||
|         mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies, format=None) |         mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies) | ||||||
| 
 | 
 | ||||||
|     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') |     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') | ||||||
|     def test_get_transcripts__with_proxies(self, mock_get_transcript): |     def test_get_transcripts__with_proxies(self, mock_get_transcript): | ||||||
|         proxies = {'http': '', 'https:': ''} |         proxies = {'http': '', 'https:': ''} | ||||||
|         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies) |         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies) | ||||||
|         mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None, format=None) |         mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None) | ||||||
| 
 | 
 | ||||||
|     def test_load_cookies(self): |     def test_load_cookies(self): | ||||||
|         dirname, filename = os.path.split(os.path.abspath(__file__)) |         dirname, filename = os.path.split(os.path.abspath(__file__)) | ||||||
|  |  | ||||||
|  | @ -25,50 +25,50 @@ class TestYouTubeTranscriptCli(TestCase): | ||||||
|         YouTubeTranscriptApi.list_transcripts = MagicMock(return_value=self.transcript_list_mock) |         YouTubeTranscriptApi.list_transcripts = MagicMock(return_value=self.transcript_list_mock) | ||||||
| 
 | 
 | ||||||
|     def test_argument_parsing(self): |     def test_argument_parsing(self): | ||||||
|         parsed_args = YouTubeTranscriptCli('v1 v2 --format json --languages de en'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli('v1 v2 --json --languages de en'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, 'json') |         self.assertEqual(parsed_args.json, True) | ||||||
|         self.assertEqual(parsed_args.languages, ['de', 'en']) |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|         self.assertEqual(parsed_args.http_proxy, '') |         self.assertEqual(parsed_args.http_proxy, '') | ||||||
|         self.assertEqual(parsed_args.https_proxy, '') |         self.assertEqual(parsed_args.https_proxy, '') | ||||||
| 
 | 
 | ||||||
|         parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, 'json') |         self.assertEqual(parsed_args.json, True) | ||||||
|         self.assertEqual(parsed_args.languages, ['de', 'en']) |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|         self.assertEqual(parsed_args.http_proxy, '') |         self.assertEqual(parsed_args.http_proxy, '') | ||||||
|         self.assertEqual(parsed_args.https_proxy, '') |         self.assertEqual(parsed_args.https_proxy, '') | ||||||
| 
 | 
 | ||||||
|         parsed_args = YouTubeTranscriptCli(' --format json v1 v2 --languages de en'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli(' --json v1 v2 --languages de en'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, 'json') |         self.assertEqual(parsed_args.json, True) | ||||||
|         self.assertEqual(parsed_args.languages, ['de', 'en']) |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|         self.assertEqual(parsed_args.http_proxy, '') |         self.assertEqual(parsed_args.http_proxy, '') | ||||||
|         self.assertEqual(parsed_args.https_proxy, '') |         self.assertEqual(parsed_args.https_proxy, '') | ||||||
| 
 | 
 | ||||||
|         parsed_args = YouTubeTranscriptCli( |         parsed_args = YouTubeTranscriptCli( | ||||||
|             'v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port'.split() |             'v1 v2 --languages de en --json --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port'.split() | ||||||
|         )._parse_args() |         )._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, 'json') |         self.assertEqual(parsed_args.json, True) | ||||||
|         self.assertEqual(parsed_args.languages, ['de', 'en']) |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|         self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port') |         self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port') | ||||||
|         self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port') |         self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port') | ||||||
| 
 | 
 | ||||||
|         parsed_args = YouTubeTranscriptCli( |         parsed_args = YouTubeTranscriptCli( | ||||||
|             'v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port'.split() |             'v1 v2 --languages de en --json --http-proxy http://user:pass@domain:port'.split() | ||||||
|         )._parse_args() |         )._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, 'json') |         self.assertEqual(parsed_args.json, True) | ||||||
|         self.assertEqual(parsed_args.languages, ['de', 'en']) |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|         self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port') |         self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port') | ||||||
|         self.assertEqual(parsed_args.https_proxy, '') |         self.assertEqual(parsed_args.https_proxy, '') | ||||||
| 
 | 
 | ||||||
|         parsed_args = YouTubeTranscriptCli( |         parsed_args = YouTubeTranscriptCli( | ||||||
|             'v1 v2 --languages de en --format json --https-proxy https://user:pass@domain:port'.split() |             'v1 v2 --languages de en --json --https-proxy https://user:pass@domain:port'.split() | ||||||
|         )._parse_args() |         )._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, 'json') |         self.assertEqual(parsed_args.json, True) | ||||||
|         self.assertEqual(parsed_args.languages, ['de', 'en']) |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|         self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port') |         self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port') | ||||||
|         self.assertEqual(parsed_args.http_proxy, '') |         self.assertEqual(parsed_args.http_proxy, '') | ||||||
|  | @ -76,28 +76,28 @@ class TestYouTubeTranscriptCli(TestCase): | ||||||
|     def test_argument_parsing__only_video_ids(self): |     def test_argument_parsing__only_video_ids(self): | ||||||
|         parsed_args = YouTubeTranscriptCli('v1 v2'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli('v1 v2'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, None) |         self.assertEqual(parsed_args.json, False) | ||||||
|         self.assertEqual(parsed_args.languages, ['en']) |         self.assertEqual(parsed_args.languages, ['en']) | ||||||
| 
 | 
 | ||||||
|     def test_argument_parsing__fail_without_video_ids(self): |     def test_argument_parsing__fail_without_video_ids(self): | ||||||
|         with self.assertRaises(SystemExit): |         with self.assertRaises(SystemExit): | ||||||
|             YouTubeTranscriptCli('--format json'.split())._parse_args() |             YouTubeTranscriptCli('--json'.split())._parse_args() | ||||||
| 
 | 
 | ||||||
|     def test_argument_parsing__json(self): |     def test_argument_parsing__json(self): | ||||||
|         parsed_args = YouTubeTranscriptCli('v1 v2 --format json'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli('v1 v2 --json'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, 'json') |         self.assertEqual(parsed_args.json, True) | ||||||
|         self.assertEqual(parsed_args.languages, ['en']) |         self.assertEqual(parsed_args.languages, ['en']) | ||||||
| 
 | 
 | ||||||
|         parsed_args = YouTubeTranscriptCli('--format json v1 v2'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli('--json v1 v2'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, 'json') |         self.assertEqual(parsed_args.json, True) | ||||||
|         self.assertEqual(parsed_args.languages, ['en']) |         self.assertEqual(parsed_args.languages, ['en']) | ||||||
| 
 | 
 | ||||||
|     def test_argument_parsing__languages(self): |     def test_argument_parsing__languages(self): | ||||||
|         parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, None) |         self.assertEqual(parsed_args.json, False) | ||||||
|         self.assertEqual(parsed_args.languages, ['de', 'en']) |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
| 
 | 
 | ||||||
|     def test_argument_parsing__proxies(self): |     def test_argument_parsing__proxies(self): | ||||||
|  | @ -135,13 +135,13 @@ class TestYouTubeTranscriptCli(TestCase): | ||||||
|     def test_argument_parsing__translate(self): |     def test_argument_parsing__translate(self): | ||||||
|         parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, None) |         self.assertEqual(parsed_args.json, False) | ||||||
|         self.assertEqual(parsed_args.languages, ['de', 'en']) |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|         self.assertEqual(parsed_args.translate, 'cz') |         self.assertEqual(parsed_args.translate, 'cz') | ||||||
| 
 | 
 | ||||||
|         parsed_args = YouTubeTranscriptCli('v1 v2 --translate cz --languages de en'.split())._parse_args() |         parsed_args = YouTubeTranscriptCli('v1 v2 --translate cz --languages de en'.split())._parse_args() | ||||||
|         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) |         self.assertEqual(parsed_args.video_ids, ['v1', 'v2']) | ||||||
|         self.assertEqual(parsed_args.format, None) |         self.assertEqual(parsed_args.json, False) | ||||||
|         self.assertEqual(parsed_args.languages, ['de', 'en']) |         self.assertEqual(parsed_args.languages, ['de', 'en']) | ||||||
|         self.assertEqual(parsed_args.translate, 'cz') |         self.assertEqual(parsed_args.translate, 'cz') | ||||||
| 
 | 
 | ||||||
|  | @ -204,7 +204,8 @@ class TestYouTubeTranscriptCli(TestCase): | ||||||
|         YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None) |         YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None) | ||||||
| 
 | 
 | ||||||
|     def test_run__json_output(self): |     def test_run__json_output(self): | ||||||
|         output = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split()).run() |         output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run() | ||||||
|  | 
 | ||||||
|         # will fail if output is not valid json |         # will fail if output is not valid json | ||||||
|         json.loads(output) |         json.loads(output) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,99 +0,0 @@ | ||||||
| from unittest import TestCase |  | ||||||
| from mock import MagicMock |  | ||||||
| import json |  | ||||||
| 
 |  | ||||||
| from youtube_transcript_api.formatters import ( |  | ||||||
|     JSONTranscriptFormatter, |  | ||||||
|     parse_timecode, |  | ||||||
|     SRTTranscriptFormatter, |  | ||||||
|     TextTranscriptFormatter, |  | ||||||
|     TranscriptFormatter, |  | ||||||
|     TranscriptFormatterFactory |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class TestTranscriptFormatters(TestCase): |  | ||||||
|     @classmethod |  | ||||||
|     def setUpClass(cls): |  | ||||||
|         cls.transcript = [ |  | ||||||
|             { |  | ||||||
|                 'text': 'Hey, this is just a test', |  | ||||||
|                 'start': 0.0, |  | ||||||
|                 'duration': 1.54 |  | ||||||
|             }, |  | ||||||
|             { |  | ||||||
|                 'text': 'this is not the original transcript', |  | ||||||
|                 'start': 1.54, |  | ||||||
|                 'duration': 4.16 |  | ||||||
|             }, |  | ||||||
|             { |  | ||||||
|                 'text': 'just something shorter, I made up for testing', |  | ||||||
|                 'start': 5.7, |  | ||||||
|                 'duration': 3.239 |  | ||||||
|             } |  | ||||||
|         ] |  | ||||||
| 
 |  | ||||||
|     def test_base_formatter_combine(self): |  | ||||||
|         expecting = ''.join([str(line) for line in self.transcript]) |  | ||||||
| 
 |  | ||||||
|         self.assertEqual( |  | ||||||
|             TranscriptFormatter.combine(self.transcript), |  | ||||||
|             expecting |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
|     def test_base_format_not_implemented(self): |  | ||||||
|         with self.assertRaises(NotImplementedError): |  | ||||||
|             TranscriptFormatter.format(self.transcript) |  | ||||||
| 
 |  | ||||||
|     def test_text_formatter_format(self): |  | ||||||
|         text = '\n'.join([line.get('text') for line in self.transcript]) |  | ||||||
|         text_fmt = TextTranscriptFormatter.format(self.transcript) |  | ||||||
|         self.assertIn(text + '\n', text_fmt) |  | ||||||
| 
 |  | ||||||
|     def test_srt_formatter_format(self): |  | ||||||
|         start = self.transcript[0].get('start') |  | ||||||
|         duration = self.transcript[0].get('duration') |  | ||||||
|         srt_fmt = SRTTranscriptFormatter.format(self.transcript) |  | ||||||
|         self.assertIn('{start} --> {end}'.format( |  | ||||||
|                 start=parse_timecode(start), |  | ||||||
|                 end=parse_timecode(start+duration) |  | ||||||
|             ), srt_fmt) |  | ||||||
| 
 |  | ||||||
|     def test_json_formatter_format(self): |  | ||||||
|         json_fmt = JSONTranscriptFormatter.format(self.transcript) |  | ||||||
|         self.assertIsInstance(json.dumps(json_fmt), str) |  | ||||||
| 
 |  | ||||||
|     def test_invalid_parse_timecode(self): |  | ||||||
|         start_time = 'not_float' |  | ||||||
| 
 |  | ||||||
|         with self.assertRaises(ValueError): |  | ||||||
|             parse_timecode(start_time) |  | ||||||
| 
 |  | ||||||
|     def test_valid_parse_timecode(self): |  | ||||||
|         start_time = 0.0 |  | ||||||
|         end_time = 5.20 |  | ||||||
| 
 |  | ||||||
|         self.assertEqual( |  | ||||||
|             parse_timecode(start_time), |  | ||||||
|             '00:00:00,000' |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
|         self.assertEqual( |  | ||||||
|             parse_timecode(end_time), |  | ||||||
|             '00:00:05,200' |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
|     def test_formatter_factory_valid_single_add(self): |  | ||||||
|         factory = TranscriptFormatterFactory() |  | ||||||
|         factory.add_formatter('json', JSONTranscriptFormatter) |  | ||||||
| 
 |  | ||||||
|         self.assertDictEqual( |  | ||||||
|             getattr(factory, '_formatters'), |  | ||||||
|             {'json': JSONTranscriptFormatter} |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
|     def test_formatter_factory_invalid_single_add(self): |  | ||||||
|         factory = TranscriptFormatterFactory() |  | ||||||
| 
 |  | ||||||
|         with self.assertRaises(TypeError): |  | ||||||
|             factory.add_formatter('magic', MagicMock) |  | ||||||
		Loading…
	
		Reference in New Issue