added FormatterLoader
This commit is contained in:
		
							parent
							
								
									71268dfad9
								
							
						
					
					
						commit
						d314139329
					
				|  | @ -38,12 +38,14 @@ class CouldNotRetrieveTranscript(Exception): | |||
| class VideoUnavailable(CouldNotRetrieveTranscript): | ||||
|     CAUSE_MESSAGE = 'The video is no longer available' | ||||
| 
 | ||||
| 
 | ||||
| class TooManyRequests(CouldNotRetrieveTranscript): | ||||
|     CAUSE_MESSAGE = ("YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. One of the following things can be done to work around this:\n\ | ||||
|     - Manually solve the captcha in a browser and export the cookie. Read here how to use that cookie with youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\ | ||||
|     - Use a different IP address\n\ | ||||
|     - Wait until the ban on your IP has been lifted") | ||||
| 
 | ||||
| 
 | ||||
| class TranscriptsDisabled(CouldNotRetrieveTranscript): | ||||
|     CAUSE_MESSAGE = 'Subtitles are disabled for this video' | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,5 +1,7 @@ | |||
| import json | ||||
| 
 | ||||
| import pprint | ||||
| 
 | ||||
| 
 | ||||
| class Formatter(object): | ||||
|     """Formatter should be used as an abstract base class. | ||||
|  | @ -22,6 +24,16 @@ class Formatter(object): | |||
|             'their own .format() method.') | ||||
| 
 | ||||
| 
 | ||||
| class PrettyPrintFormatter(Formatter): | ||||
|     def format(self, **kwargs): | ||||
|         """Pretty prints a transcript. | ||||
| 
 | ||||
|         :return: A pretty printed string representation of the transcript dict.' | ||||
|         :rtype str | ||||
|         """ | ||||
|         return pprint.pformat(self._transcript, **kwargs) | ||||
| 
 | ||||
| 
 | ||||
| class JSONFormatter(Formatter): | ||||
|     def format(self, **kwargs): | ||||
|         """Converts a transcript into a JSON string. | ||||
|  | @ -89,3 +101,27 @@ class WebVTTFormatter(Formatter): | |||
|             lines.append("{}\n{}".format(time_text, line['text'])) | ||||
|          | ||||
|         return "WEBVTT\n\n" + "\n\n".join(lines) + "\n" | ||||
| 
 | ||||
| 
 | ||||
| class FormatterLoader(object): | ||||
|     TYPES = { | ||||
|         'json': JSONFormatter, | ||||
|         'pretty': PrettyPrintFormatter, | ||||
|         'text': TextFormatter, | ||||
|         'webvvt': WebVTTFormatter, | ||||
|     } | ||||
| 
 | ||||
|     class UnknownFormatterType(Exception): | ||||
|         def __init__(self, formatter_type): | ||||
|             super(FormatterLoader.UnknownFormatterType, self).__init__( | ||||
|                 f'The format \'{formatter_type}\' is not supported. ' | ||||
|                 f'Choose one of the following formats: {", ".join(FormatterLoader.TYPES.keys())}' | ||||
|             ) | ||||
| 
 | ||||
|     def __init__(self, formatter_type='pretty'): | ||||
|         if formatter_type not in FormatterLoader.TYPES.keys(): | ||||
|             raise FormatterLoader.UnknownFormatterType(formatter_type) | ||||
|         self._formatter = FormatterLoader.TYPES[formatter_type] | ||||
| 
 | ||||
|     def load(self, transcript): | ||||
|         return self._formatter(transcript) | ||||
|  |  | |||
|  | @ -1,12 +1,15 @@ | |||
| import json | ||||
| from mock import MagicMock | ||||
| from unittest import TestCase | ||||
| 
 | ||||
| import json | ||||
| 
 | ||||
| import pprint | ||||
| 
 | ||||
| from youtube_transcript_api.formatters import ( | ||||
|     Formatter, | ||||
|     JSONFormatter, | ||||
|     TextFormatter, | ||||
|     WebVTTFormatter | ||||
|     WebVTTFormatter, | ||||
|     PrettyPrintFormatter, FormatterLoader | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
|  | @ -35,6 +38,7 @@ class TestFormatters(TestCase): | |||
|     def test_webvtt_formatter_starting(self): | ||||
|         content = WebVTTFormatter(self.transcript).format() | ||||
|         lines = content.split('\n') | ||||
| 
 | ||||
|         # test starting lines | ||||
|         self.assertEqual(lines[0], "WEBVTT") | ||||
|         self.assertEqual(lines[1], "") | ||||
|  | @ -42,16 +46,40 @@ class TestFormatters(TestCase): | |||
|     def test_webvtt_formatter_ending(self): | ||||
|         content = WebVTTFormatter(self.transcript).format() | ||||
|         lines = content.split('\n') | ||||
| 
 | ||||
|         # test ending lines | ||||
|         self.assertEqual(lines[-2], self.transcript[-1]['text']) | ||||
|         self.assertEqual(lines[-1], "") | ||||
| 
 | ||||
|     def test_pretty_print_formatter(self): | ||||
|         content = PrettyPrintFormatter(self.transcript).format() | ||||
| 
 | ||||
|         self.assertEqual(content, pprint.pformat(self.transcript)) | ||||
| 
 | ||||
|     def test_json_formatter(self): | ||||
|         content = JSONFormatter(self.transcript).format() | ||||
| 
 | ||||
|         self.assertEqual(json.loads(content), self.transcript) | ||||
| 
 | ||||
|     def test_text_formatter(self): | ||||
|         content = TextFormatter(self.transcript).format() | ||||
|         lines = content.split('\n') | ||||
| 
 | ||||
|         self.assertEqual(lines[0], self.transcript[0]["text"]) | ||||
|         self.assertEqual(lines[-1], self.transcript[-1]["text"]) | ||||
| 
 | ||||
|     def test_formatter_loader(self): | ||||
|         loader = FormatterLoader('json') | ||||
|         formatter = loader.load(self.transcript) | ||||
| 
 | ||||
|         self.assertTrue(isinstance(formatter, JSONFormatter)) | ||||
| 
 | ||||
|     def test_formatter_loader__default_formatter(self): | ||||
|         loader = FormatterLoader() | ||||
|         formatter = loader.load(self.transcript) | ||||
| 
 | ||||
|         self.assertTrue(isinstance(formatter, PrettyPrintFormatter)) | ||||
| 
 | ||||
|     def test_formatter_loader__unknown_format(self): | ||||
|         with self.assertRaises(FormatterLoader.UnknownFormatterType): | ||||
|             FormatterLoader('png') | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue