added FormatterLoader
This commit is contained in:
		
							parent
							
								
									71268dfad9
								
							
						
					
					
						commit
						d314139329
					
				|  | @ -38,12 +38,14 @@ class CouldNotRetrieveTranscript(Exception): | ||||||
| class VideoUnavailable(CouldNotRetrieveTranscript): | class VideoUnavailable(CouldNotRetrieveTranscript): | ||||||
|     CAUSE_MESSAGE = 'The video is no longer available' |     CAUSE_MESSAGE = 'The video is no longer available' | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| class TooManyRequests(CouldNotRetrieveTranscript): | class TooManyRequests(CouldNotRetrieveTranscript): | ||||||
|     CAUSE_MESSAGE = ("YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. One of the following things can be done to work around this:\n\ |     CAUSE_MESSAGE = ("YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. One of the following things can be done to work around this:\n\ | ||||||
|     - Manually solve the captcha in a browser and export the cookie. Read here how to use that cookie with youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\ |     - Manually solve the captcha in a browser and export the cookie. Read here how to use that cookie with youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\ | ||||||
|     - Use a different IP address\n\ |     - Use a different IP address\n\ | ||||||
|     - Wait until the ban on your IP has been lifted") |     - Wait until the ban on your IP has been lifted") | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| class TranscriptsDisabled(CouldNotRetrieveTranscript): | class TranscriptsDisabled(CouldNotRetrieveTranscript): | ||||||
|     CAUSE_MESSAGE = 'Subtitles are disabled for this video' |     CAUSE_MESSAGE = 'Subtitles are disabled for this video' | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,5 +1,7 @@ | ||||||
| import json | import json | ||||||
| 
 | 
 | ||||||
|  | import pprint | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class Formatter(object): | class Formatter(object): | ||||||
|     """Formatter should be used as an abstract base class. |     """Formatter should be used as an abstract base class. | ||||||
|  | @ -22,6 +24,16 @@ class Formatter(object): | ||||||
|             'their own .format() method.') |             'their own .format() method.') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class PrettyPrintFormatter(Formatter): | ||||||
|  |     def format(self, **kwargs): | ||||||
|  |         """Pretty prints a transcript. | ||||||
|  | 
 | ||||||
|  |         :return: A pretty printed string representation of the transcript dict.' | ||||||
|  |         :rtype str | ||||||
|  |         """ | ||||||
|  |         return pprint.pformat(self._transcript, **kwargs) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class JSONFormatter(Formatter): | class JSONFormatter(Formatter): | ||||||
|     def format(self, **kwargs): |     def format(self, **kwargs): | ||||||
|         """Converts a transcript into a JSON string. |         """Converts a transcript into a JSON string. | ||||||
|  | @ -72,12 +84,12 @@ class WebVTTFormatter(Formatter): | ||||||
|         """ |         """ | ||||||
|         lines = [] |         lines = [] | ||||||
|         for i, line in enumerate(self._transcript): |         for i, line in enumerate(self._transcript): | ||||||
|             if i < len(self._transcript)-1: |             if i < len(self._transcript) - 1: | ||||||
|                 # Looks ahead, use next start time since duration value |                 # Looks ahead, use next start time since duration value | ||||||
|                 # would create an overlap between start times. |                 # would create an overlap between start times. | ||||||
|                 time_text = "{} --> {}".format( |                 time_text = "{} --> {}".format( | ||||||
|                     self._seconds_to_timestamp(line['start']), |                     self._seconds_to_timestamp(line['start']), | ||||||
|                     self._seconds_to_timestamp(self._transcript[i+1]['start']) |                     self._seconds_to_timestamp(self._transcript[i + 1]['start']) | ||||||
|                 ) |                 ) | ||||||
|             else: |             else: | ||||||
|                 # Reached the end, cannot look ahead, use duration now. |                 # Reached the end, cannot look ahead, use duration now. | ||||||
|  | @ -89,3 +101,27 @@ class WebVTTFormatter(Formatter): | ||||||
|             lines.append("{}\n{}".format(time_text, line['text'])) |             lines.append("{}\n{}".format(time_text, line['text'])) | ||||||
|          |          | ||||||
|         return "WEBVTT\n\n" + "\n\n".join(lines) + "\n" |         return "WEBVTT\n\n" + "\n\n".join(lines) + "\n" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class FormatterLoader(object): | ||||||
|  |     TYPES = { | ||||||
|  |         'json': JSONFormatter, | ||||||
|  |         'pretty': PrettyPrintFormatter, | ||||||
|  |         'text': TextFormatter, | ||||||
|  |         'webvvt': WebVTTFormatter, | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     class UnknownFormatterType(Exception): | ||||||
|  |         def __init__(self, formatter_type): | ||||||
|  |             super(FormatterLoader.UnknownFormatterType, self).__init__( | ||||||
|  |                 f'The format \'{formatter_type}\' is not supported. ' | ||||||
|  |                 f'Choose one of the following formats: {", ".join(FormatterLoader.TYPES.keys())}' | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |     def __init__(self, formatter_type='pretty'): | ||||||
|  |         if formatter_type not in FormatterLoader.TYPES.keys(): | ||||||
|  |             raise FormatterLoader.UnknownFormatterType(formatter_type) | ||||||
|  |         self._formatter = FormatterLoader.TYPES[formatter_type] | ||||||
|  | 
 | ||||||
|  |     def load(self, transcript): | ||||||
|  |         return self._formatter(transcript) | ||||||
|  |  | ||||||
|  | @ -1,12 +1,15 @@ | ||||||
| import json |  | ||||||
| from mock import MagicMock |  | ||||||
| from unittest import TestCase | from unittest import TestCase | ||||||
| 
 | 
 | ||||||
|  | import json | ||||||
|  | 
 | ||||||
|  | import pprint | ||||||
|  | 
 | ||||||
| from youtube_transcript_api.formatters import ( | from youtube_transcript_api.formatters import ( | ||||||
|     Formatter, |     Formatter, | ||||||
|     JSONFormatter, |     JSONFormatter, | ||||||
|     TextFormatter, |     TextFormatter, | ||||||
|     WebVTTFormatter |     WebVTTFormatter, | ||||||
|  |     PrettyPrintFormatter, FormatterLoader | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -35,6 +38,7 @@ class TestFormatters(TestCase): | ||||||
|     def test_webvtt_formatter_starting(self): |     def test_webvtt_formatter_starting(self): | ||||||
|         content = WebVTTFormatter(self.transcript).format() |         content = WebVTTFormatter(self.transcript).format() | ||||||
|         lines = content.split('\n') |         lines = content.split('\n') | ||||||
|  | 
 | ||||||
|         # test starting lines |         # test starting lines | ||||||
|         self.assertEqual(lines[0], "WEBVTT") |         self.assertEqual(lines[0], "WEBVTT") | ||||||
|         self.assertEqual(lines[1], "") |         self.assertEqual(lines[1], "") | ||||||
|  | @ -42,16 +46,40 @@ class TestFormatters(TestCase): | ||||||
|     def test_webvtt_formatter_ending(self): |     def test_webvtt_formatter_ending(self): | ||||||
|         content = WebVTTFormatter(self.transcript).format() |         content = WebVTTFormatter(self.transcript).format() | ||||||
|         lines = content.split('\n') |         lines = content.split('\n') | ||||||
|  | 
 | ||||||
|         # test ending lines |         # test ending lines | ||||||
|         self.assertEqual(lines[-2], self.transcript[-1]['text']) |         self.assertEqual(lines[-2], self.transcript[-1]['text']) | ||||||
|         self.assertEqual(lines[-1], "") |         self.assertEqual(lines[-1], "") | ||||||
| 
 | 
 | ||||||
|  |     def test_pretty_print_formatter(self): | ||||||
|  |         content = PrettyPrintFormatter(self.transcript).format() | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(content, pprint.pformat(self.transcript)) | ||||||
|  | 
 | ||||||
|     def test_json_formatter(self): |     def test_json_formatter(self): | ||||||
|         content = JSONFormatter(self.transcript).format() |         content = JSONFormatter(self.transcript).format() | ||||||
|  | 
 | ||||||
|         self.assertEqual(json.loads(content), self.transcript) |         self.assertEqual(json.loads(content), self.transcript) | ||||||
| 
 | 
 | ||||||
|     def test_text_formatter(self): |     def test_text_formatter(self): | ||||||
|         content = TextFormatter(self.transcript).format() |         content = TextFormatter(self.transcript).format() | ||||||
|         lines = content.split('\n') |         lines = content.split('\n') | ||||||
|  | 
 | ||||||
|         self.assertEqual(lines[0], self.transcript[0]["text"]) |         self.assertEqual(lines[0], self.transcript[0]["text"]) | ||||||
|         self.assertEqual(lines[-1], self.transcript[-1]["text"]) |         self.assertEqual(lines[-1], self.transcript[-1]["text"]) | ||||||
|  | 
 | ||||||
|  |     def test_formatter_loader(self): | ||||||
|  |         loader = FormatterLoader('json') | ||||||
|  |         formatter = loader.load(self.transcript) | ||||||
|  | 
 | ||||||
|  |         self.assertTrue(isinstance(formatter, JSONFormatter)) | ||||||
|  | 
 | ||||||
|  |     def test_formatter_loader__default_formatter(self): | ||||||
|  |         loader = FormatterLoader() | ||||||
|  |         formatter = loader.load(self.transcript) | ||||||
|  | 
 | ||||||
|  |         self.assertTrue(isinstance(formatter, PrettyPrintFormatter)) | ||||||
|  | 
 | ||||||
|  |     def test_formatter_loader__unknown_format(self): | ||||||
|  |         with self.assertRaises(FormatterLoader.UnknownFormatterType): | ||||||
|  |             FormatterLoader('png') | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue