added FormatterLoader
This commit is contained in:
parent
71268dfad9
commit
d314139329
|
@ -37,13 +37,15 @@ class CouldNotRetrieveTranscript(Exception):
|
|||
|
||||
class VideoUnavailable(CouldNotRetrieveTranscript):
|
||||
CAUSE_MESSAGE = 'The video is no longer available'
|
||||
|
||||
|
||||
|
||||
class TooManyRequests(CouldNotRetrieveTranscript):
|
||||
CAUSE_MESSAGE = ("YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. One of the following things can be done to work around this:\n\
|
||||
- Manually solve the captcha in a browser and export the cookie. Read here how to use that cookie with youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\
|
||||
- Use a different IP address\n\
|
||||
- Wait until the ban on your IP has been lifted")
|
||||
|
||||
|
||||
class TranscriptsDisabled(CouldNotRetrieveTranscript):
|
||||
CAUSE_MESSAGE = 'Subtitles are disabled for this video'
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import json
|
||||
|
||||
import pprint
|
||||
|
||||
|
||||
class Formatter(object):
|
||||
"""Formatter should be used as an abstract base class.
|
||||
|
@ -22,6 +24,16 @@ class Formatter(object):
|
|||
'their own .format() method.')
|
||||
|
||||
|
||||
class PrettyPrintFormatter(Formatter):
|
||||
def format(self, **kwargs):
|
||||
"""Pretty prints a transcript.
|
||||
|
||||
:return: A pretty printed string representation of the transcript dict.'
|
||||
:rtype str
|
||||
"""
|
||||
return pprint.pformat(self._transcript, **kwargs)
|
||||
|
||||
|
||||
class JSONFormatter(Formatter):
|
||||
def format(self, **kwargs):
|
||||
"""Converts a transcript into a JSON string.
|
||||
|
@ -72,12 +84,12 @@ class WebVTTFormatter(Formatter):
|
|||
"""
|
||||
lines = []
|
||||
for i, line in enumerate(self._transcript):
|
||||
if i < len(self._transcript)-1:
|
||||
if i < len(self._transcript) - 1:
|
||||
# Looks ahead, use next start time since duration value
|
||||
# would create an overlap between start times.
|
||||
time_text = "{} --> {}".format(
|
||||
self._seconds_to_timestamp(line['start']),
|
||||
self._seconds_to_timestamp(self._transcript[i+1]['start'])
|
||||
self._seconds_to_timestamp(self._transcript[i + 1]['start'])
|
||||
)
|
||||
else:
|
||||
# Reached the end, cannot look ahead, use duration now.
|
||||
|
@ -89,3 +101,27 @@ class WebVTTFormatter(Formatter):
|
|||
lines.append("{}\n{}".format(time_text, line['text']))
|
||||
|
||||
return "WEBVTT\n\n" + "\n\n".join(lines) + "\n"
|
||||
|
||||
|
||||
class FormatterLoader(object):
|
||||
TYPES = {
|
||||
'json': JSONFormatter,
|
||||
'pretty': PrettyPrintFormatter,
|
||||
'text': TextFormatter,
|
||||
'webvvt': WebVTTFormatter,
|
||||
}
|
||||
|
||||
class UnknownFormatterType(Exception):
|
||||
def __init__(self, formatter_type):
|
||||
super(FormatterLoader.UnknownFormatterType, self).__init__(
|
||||
f'The format \'{formatter_type}\' is not supported. '
|
||||
f'Choose one of the following formats: {", ".join(FormatterLoader.TYPES.keys())}'
|
||||
)
|
||||
|
||||
def __init__(self, formatter_type='pretty'):
|
||||
if formatter_type not in FormatterLoader.TYPES.keys():
|
||||
raise FormatterLoader.UnknownFormatterType(formatter_type)
|
||||
self._formatter = FormatterLoader.TYPES[formatter_type]
|
||||
|
||||
def load(self, transcript):
|
||||
return self._formatter(transcript)
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
import json
|
||||
from mock import MagicMock
|
||||
from unittest import TestCase
|
||||
|
||||
import json
|
||||
|
||||
import pprint
|
||||
|
||||
from youtube_transcript_api.formatters import (
|
||||
Formatter,
|
||||
JSONFormatter,
|
||||
TextFormatter,
|
||||
WebVTTFormatter
|
||||
WebVTTFormatter,
|
||||
PrettyPrintFormatter, FormatterLoader
|
||||
)
|
||||
|
||||
|
||||
|
@ -35,6 +38,7 @@ class TestFormatters(TestCase):
|
|||
def test_webvtt_formatter_starting(self):
|
||||
content = WebVTTFormatter(self.transcript).format()
|
||||
lines = content.split('\n')
|
||||
|
||||
# test starting lines
|
||||
self.assertEqual(lines[0], "WEBVTT")
|
||||
self.assertEqual(lines[1], "")
|
||||
|
@ -42,16 +46,40 @@ class TestFormatters(TestCase):
|
|||
def test_webvtt_formatter_ending(self):
|
||||
content = WebVTTFormatter(self.transcript).format()
|
||||
lines = content.split('\n')
|
||||
|
||||
# test ending lines
|
||||
self.assertEqual(lines[-2], self.transcript[-1]['text'])
|
||||
self.assertEqual(lines[-1], "")
|
||||
|
||||
|
||||
def test_pretty_print_formatter(self):
|
||||
content = PrettyPrintFormatter(self.transcript).format()
|
||||
|
||||
self.assertEqual(content, pprint.pformat(self.transcript))
|
||||
|
||||
def test_json_formatter(self):
|
||||
content = JSONFormatter(self.transcript).format()
|
||||
|
||||
self.assertEqual(json.loads(content), self.transcript)
|
||||
|
||||
def test_text_formatter(self):
|
||||
content = TextFormatter(self.transcript).format()
|
||||
lines = content.split('\n')
|
||||
|
||||
self.assertEqual(lines[0], self.transcript[0]["text"])
|
||||
self.assertEqual(lines[-1], self.transcript[-1]["text"])
|
||||
|
||||
def test_formatter_loader(self):
|
||||
loader = FormatterLoader('json')
|
||||
formatter = loader.load(self.transcript)
|
||||
|
||||
self.assertTrue(isinstance(formatter, JSONFormatter))
|
||||
|
||||
def test_formatter_loader__default_formatter(self):
|
||||
loader = FormatterLoader()
|
||||
formatter = loader.load(self.transcript)
|
||||
|
||||
self.assertTrue(isinstance(formatter, PrettyPrintFormatter))
|
||||
|
||||
def test_formatter_loader__unknown_format(self):
|
||||
with self.assertRaises(FormatterLoader.UnknownFormatterType):
|
||||
FormatterLoader('png')
|
||||
|
|
Loading…
Reference in New Issue