Revert "Add formatters module"
Reverting again, apparently I had a misunderstanding of how to revert. Trying again.
This reverts commit dca4021dd7
.
This commit is contained in:
parent
d75ad8c402
commit
1e9b2c7727
4
setup.py
4
setup.py
|
@ -24,10 +24,10 @@ def get_test_suite():
|
|||
|
||||
setuptools.setup(
|
||||
name="youtube_transcript_api",
|
||||
version="0.3.0",
|
||||
version="0.3.1",
|
||||
author="Jonas Depoix",
|
||||
author_email="jonas.depoix@web.de",
|
||||
description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles and it does not require a headless browser, like other selenium based solutions do!",
|
||||
description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles, supports translating subtitles and it does not require a headless browser, like other selenium based solutions do!",
|
||||
long_description=get_long_description(),
|
||||
long_description_content_type="text/markdown",
|
||||
keywords="youtube-api subtitles youtube transcripts transcript subtitle youtube-subtitles youtube-transcripts cli",
|
||||
|
|
|
@ -12,8 +12,6 @@ from ._errors import (
|
|||
CookiePathInvalid,
|
||||
CookiesInvalid
|
||||
)
|
||||
from .formatters import formats
|
||||
|
||||
|
||||
class YouTubeTranscriptApi():
|
||||
@classmethod
|
||||
|
@ -72,8 +70,7 @@ class YouTubeTranscriptApi():
|
|||
return TranscriptListFetcher(http_client).fetch(video_id)
|
||||
|
||||
@classmethod
|
||||
def get_transcripts(cls, video_ids, languages=('en',),
|
||||
continue_after_error=False, proxies=None, cookies=None, format=None):
|
||||
def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, cookies=None):
|
||||
"""
|
||||
Retrieves the transcripts for a list of videos.
|
||||
|
||||
|
@ -99,8 +96,7 @@ class YouTubeTranscriptApi():
|
|||
|
||||
for video_id in video_ids:
|
||||
try:
|
||||
data[video_id] = cls.get_transcript(video_id, languages,
|
||||
proxies, cookies, format=format)
|
||||
data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies)
|
||||
except Exception as exception:
|
||||
if not continue_after_error:
|
||||
raise exception
|
||||
|
@ -110,8 +106,7 @@ class YouTubeTranscriptApi():
|
|||
return data, unretrievable_videos
|
||||
|
||||
@classmethod
|
||||
def get_transcript(cls, video_id, languages=('en',), proxies=None,
|
||||
cookies=None, format=None):
|
||||
def get_transcript(cls, video_id, languages=('en',), proxies=None, cookies=None):
|
||||
"""
|
||||
Retrieves the transcript for a single video. This is just a shortcut for calling::
|
||||
|
||||
|
@ -130,10 +125,7 @@ class YouTubeTranscriptApi():
|
|||
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
||||
:rtype [{'text': str, 'start': float, 'end': float}]:
|
||||
"""
|
||||
Formatter = formats.get_formatter(format)
|
||||
transcript = cls.list_transcripts(
|
||||
video_id,proxies, cookies).find_transcript(languages).fetch()
|
||||
return Formatter.format(transcript)
|
||||
return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch()
|
||||
|
||||
@classmethod
|
||||
def _load_cookies(cls, cookies, video_id):
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import json
|
||||
|
||||
import pprint
|
||||
|
||||
import argparse
|
||||
|
||||
from ._api import YouTubeTranscriptApi
|
||||
from .formatters import formats
|
||||
|
||||
|
||||
class YouTubeTranscriptCli():
|
||||
|
@ -25,24 +26,19 @@ class YouTubeTranscriptCli():
|
|||
transcripts = []
|
||||
exceptions = []
|
||||
|
||||
Formatter = formats.get_formatter(parsed_args.format)
|
||||
|
||||
for video_id in parsed_args.video_ids:
|
||||
try:
|
||||
transcript = self._fetch_transcript(
|
||||
parsed_args, proxies, cookies, video_id)
|
||||
transcripts.append(Formatter.format(transcript))
|
||||
transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id))
|
||||
except Exception as exception:
|
||||
exceptions.append(exception)
|
||||
|
||||
return ''.join(
|
||||
return '\n\n'.join(
|
||||
[str(exception) for exception in exceptions]
|
||||
+ ([Formatter.combine(transcripts)] if transcripts else [])
|
||||
+ ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else [])
|
||||
)
|
||||
|
||||
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
|
||||
transcript_list = YouTubeTranscriptApi.list_transcripts(
|
||||
video_id, proxies=proxies, cookies=cookies)
|
||||
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies)
|
||||
|
||||
if parsed_args.list_transcripts:
|
||||
return str(transcript_list)
|
||||
|
@ -102,9 +98,11 @@ class YouTubeTranscriptCli():
|
|||
help='If this flag is set transcripts which have been manually created will not be retrieved.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--format',
|
||||
default=None,
|
||||
help="Use this flag to set which parser format to use, default is 'json'",
|
||||
'--json',
|
||||
action='store_const',
|
||||
const=True,
|
||||
default=False,
|
||||
help='If this flag is set the output will be JSON formatted.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--translate',
|
||||
|
|
|
@ -1,174 +0,0 @@
|
|||
from collections import defaultdict
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
def parse_timecode(time):
|
||||
"""Converts a `time` into a formatted transcript timecode.
|
||||
|
||||
:param time: a float representing time in seconds.
|
||||
:type time: float
|
||||
:return: a string formatted as a timecode, 'HH:MM:SS,MS'
|
||||
:rtype str
|
||||
|
||||
:example:
|
||||
>>> parse_timecode(6.93)
|
||||
'00:00:06,930'
|
||||
"""
|
||||
|
||||
time = float(time)
|
||||
times = {
|
||||
'hours': str(int(time) // 3600).rjust(2, '0'),
|
||||
'mins': str(int(time) // 60).rjust(2, '0'),
|
||||
'secs': str(int(time) % 60).rjust(2, '0'),
|
||||
'ms': str(int(round((time - int(time))*1000, 2))).rjust(3, '0')
|
||||
}
|
||||
return "{hours}:{mins}:{secs},{ms}".format(**times)
|
||||
|
||||
|
||||
class TranscriptFormatter(object):
|
||||
"""Abstract Base TranscriptFormatter class
|
||||
|
||||
This class should be inherited from to create additional
|
||||
custom transcript formatters.
|
||||
"""
|
||||
HTML_TAG_REGEX = re.compile(r'<[^>]*>', re.IGNORECASE)
|
||||
DELIMITER = ''
|
||||
|
||||
@classmethod
|
||||
def combine(cls, transcripts):
|
||||
"""Subclass may override this class method.
|
||||
|
||||
Default behavior of this method will ''.join() the str()
|
||||
of each transcript in transcripts.
|
||||
|
||||
:param transcripts: a list of many transcripts
|
||||
:type transcript_data: list[<formatted transcript>, ...]
|
||||
:return: A string joined on the `cls.DELIMITER` to combine transcripts
|
||||
:rtype: str
|
||||
"""
|
||||
return cls.DELIMITER.join(
|
||||
str(transcript) for transcript in transcripts)
|
||||
|
||||
@classmethod
|
||||
def format(cls, transcript_data):
|
||||
"""Any subclass must implement this format class method.
|
||||
|
||||
:param transcript_data: a list of transcripts, 1 or more.
|
||||
:type transcript_data: list[list[dict], list[dict]]
|
||||
:return: A list where each item is an individual transcript
|
||||
as a string.
|
||||
:rtype: list[str]
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
cls.__name__ + '.format'
|
||||
)
|
||||
|
||||
|
||||
class JSONTranscriptFormatter(TranscriptFormatter):
|
||||
"""Formatter for outputting JSON data"""
|
||||
DELIMITER = ','
|
||||
|
||||
@classmethod
|
||||
def combine(cls, transcripts):
|
||||
return json.dumps(transcripts)
|
||||
|
||||
@classmethod
|
||||
def format(cls, transcript_data):
|
||||
return transcript_data
|
||||
|
||||
|
||||
class TextTranscriptFormatter(TranscriptFormatter):
|
||||
"""Formatter for outputting a Plain Text Format
|
||||
|
||||
Converts the fetched transcript data into separated lines of
|
||||
plain text separated by newline breaks (\n) with no timecodes.
|
||||
"""
|
||||
DELIMITER = '\n\n'
|
||||
|
||||
@classmethod
|
||||
def format(cls, transcript_data):
|
||||
return '{}\n'.format('\n'.join(
|
||||
line['text']for line in transcript_data))
|
||||
|
||||
|
||||
class SRTTranscriptFormatter(TranscriptFormatter):
|
||||
"""Formatter for outputting the SRT Format
|
||||
|
||||
Converts the fetched transcript data into a simple .srt file format.
|
||||
"""
|
||||
DELIMITER = '\n\n'
|
||||
|
||||
@classmethod
|
||||
def format(cls, transcript_data):
|
||||
output = []
|
||||
for frame, item in enumerate(transcript_data, start=1):
|
||||
start_time = float(item.get('start'))
|
||||
duration = float(item.get('duration', '0.0'))
|
||||
|
||||
output.append("{frame}\n".format(frame=frame))
|
||||
output.append("{start_time} --> {end_time}\n".format(
|
||||
start_time=parse_timecode(start_time),
|
||||
end_time=parse_timecode(start_time + duration)
|
||||
))
|
||||
output.append("{text}".format(text=item.get('text')))
|
||||
if frame < len(transcript_data):
|
||||
output.append('\n\n')
|
||||
return '{}\n'.format(''.join(output))
|
||||
|
||||
|
||||
class TranscriptFormatterFactory(object):
|
||||
"""A Transcript Class Factory
|
||||
|
||||
Allows for adding additional custom Transcript classes for the API
|
||||
to use. Custom Transcript classes must inherit from the
|
||||
TranscriptFormatter abstract base class.
|
||||
"""
|
||||
def __init__(self):
|
||||
self._formatters = defaultdict(JSONTranscriptFormatter)
|
||||
|
||||
def add_formatter(self, name, formatter_class):
|
||||
"""Allows for creating additional transcript formatters.
|
||||
|
||||
|
||||
:param name: a name given to the `formatter_class`
|
||||
:type name: str
|
||||
:param formatter_class: a subclass of TranscriptFormatter
|
||||
:type formatter_class: class
|
||||
:rtype None
|
||||
"""
|
||||
if not issubclass(formatter_class, TranscriptFormatter):
|
||||
raise TypeError((
|
||||
'{0} must be a subclass of TranscriptFormatter'
|
||||
).format(formatter_class)
|
||||
)
|
||||
self._formatters.update({name: formatter_class})
|
||||
|
||||
def add_formatters(self, formatters_dict):
|
||||
"""Allow creation of multiple transcript formatters at a time.
|
||||
|
||||
:param formatters_dict: key(s) are the string name to be given
|
||||
to the formatter class, value for each key should be a subclass
|
||||
of TranscriptFormatter.
|
||||
:type formatters_dict: dict
|
||||
:rtype None
|
||||
"""
|
||||
for name, formatter_class in formatters_dict.items():
|
||||
self.add_formatter(name, formatter_class)
|
||||
|
||||
def get_formatter(self, name):
|
||||
"""Retrieve a formatter class by its assigned name.
|
||||
|
||||
:param name: the string name given to the formatter class.
|
||||
:type name: str
|
||||
:return: a subclass of `TranscriptFormatter`
|
||||
"""
|
||||
return self._formatters[name]
|
||||
|
||||
|
||||
formats = TranscriptFormatterFactory()
|
||||
formats.add_formatters({
|
||||
'json': JSONTranscriptFormatter,
|
||||
'srt': SRTTranscriptFormatter,
|
||||
'text': TextTranscriptFormatter
|
||||
})
|
|
@ -1,6 +1,6 @@
|
|||
from unittest import TestCase
|
||||
from mock import patch
|
||||
import json
|
||||
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
@ -21,10 +21,7 @@ from youtube_transcript_api import (
|
|||
|
||||
|
||||
def load_asset(filename):
|
||||
filepath = '{dirname}/assets/{filename}'.format(
|
||||
dirname=os.path.dirname(__file__), filename=filename)
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as file:
|
||||
with open('{dirname}/assets/{filename}'.format(dirname=os.path.dirname(__file__), filename=filename)) as file:
|
||||
return file.read()
|
||||
|
||||
|
||||
|
@ -161,7 +158,7 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
def test_get_transcript__with_proxy(self):
|
||||
proxies = {'http': '', 'https:': ''}
|
||||
transcript = YouTubeTranscriptApi.get_transcript(
|
||||
'GJLlxj_dtq8', proxies=proxies, format=None
|
||||
'GJLlxj_dtq8', proxies=proxies
|
||||
)
|
||||
self.assertEqual(
|
||||
transcript,
|
||||
|
@ -194,8 +191,8 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
|
||||
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
|
||||
|
||||
mock_get_transcript.assert_any_call(video_id_1, languages, None, None, format=None)
|
||||
mock_get_transcript.assert_any_call(video_id_2, languages, None, None, format=None)
|
||||
mock_get_transcript.assert_any_call(video_id_1, languages, None, None)
|
||||
mock_get_transcript.assert_any_call(video_id_2, languages, None, None)
|
||||
self.assertEqual(mock_get_transcript.call_count, 2)
|
||||
|
||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error'))
|
||||
|
@ -210,20 +207,20 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
|
||||
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
|
||||
|
||||
mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None, format=None)
|
||||
mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None, format=None)
|
||||
mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None)
|
||||
mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None)
|
||||
|
||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
||||
def test_get_transcripts__with_cookies(self, mock_get_transcript):
|
||||
cookies = '/example_cookies.txt'
|
||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
|
||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies, format=None)
|
||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies)
|
||||
|
||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
||||
def test_get_transcripts__with_proxies(self, mock_get_transcript):
|
||||
proxies = {'http': '', 'https:': ''}
|
||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
|
||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None, format=None)
|
||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None)
|
||||
|
||||
def test_load_cookies(self):
|
||||
dirname, filename = os.path.split(os.path.abspath(__file__))
|
||||
|
|
|
@ -25,50 +25,50 @@ class TestYouTubeTranscriptCli(TestCase):
|
|||
YouTubeTranscriptApi.list_transcripts = MagicMock(return_value=self.transcript_list_mock)
|
||||
|
||||
def test_argument_parsing(self):
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2 --format json --languages de en'.split())._parse_args()
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2 --json --languages de en'.split())._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, 'json')
|
||||
self.assertEqual(parsed_args.json, True)
|
||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||
self.assertEqual(parsed_args.http_proxy, '')
|
||||
self.assertEqual(parsed_args.https_proxy, '')
|
||||
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split())._parse_args()
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split())._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, 'json')
|
||||
self.assertEqual(parsed_args.json, True)
|
||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||
self.assertEqual(parsed_args.http_proxy, '')
|
||||
self.assertEqual(parsed_args.https_proxy, '')
|
||||
|
||||
parsed_args = YouTubeTranscriptCli(' --format json v1 v2 --languages de en'.split())._parse_args()
|
||||
parsed_args = YouTubeTranscriptCli(' --json v1 v2 --languages de en'.split())._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, 'json')
|
||||
self.assertEqual(parsed_args.json, True)
|
||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||
self.assertEqual(parsed_args.http_proxy, '')
|
||||
self.assertEqual(parsed_args.https_proxy, '')
|
||||
|
||||
parsed_args = YouTubeTranscriptCli(
|
||||
'v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port'.split()
|
||||
'v1 v2 --languages de en --json --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port'.split()
|
||||
)._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, 'json')
|
||||
self.assertEqual(parsed_args.json, True)
|
||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
||||
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
||||
|
||||
parsed_args = YouTubeTranscriptCli(
|
||||
'v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port'.split()
|
||||
'v1 v2 --languages de en --json --http-proxy http://user:pass@domain:port'.split()
|
||||
)._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, 'json')
|
||||
self.assertEqual(parsed_args.json, True)
|
||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
||||
self.assertEqual(parsed_args.https_proxy, '')
|
||||
|
||||
parsed_args = YouTubeTranscriptCli(
|
||||
'v1 v2 --languages de en --format json --https-proxy https://user:pass@domain:port'.split()
|
||||
'v1 v2 --languages de en --json --https-proxy https://user:pass@domain:port'.split()
|
||||
)._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, 'json')
|
||||
self.assertEqual(parsed_args.json, True)
|
||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
||||
self.assertEqual(parsed_args.http_proxy, '')
|
||||
|
@ -76,28 +76,28 @@ class TestYouTubeTranscriptCli(TestCase):
|
|||
def test_argument_parsing__only_video_ids(self):
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2'.split())._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, None)
|
||||
self.assertEqual(parsed_args.json, False)
|
||||
self.assertEqual(parsed_args.languages, ['en'])
|
||||
|
||||
def test_argument_parsing__fail_without_video_ids(self):
|
||||
with self.assertRaises(SystemExit):
|
||||
YouTubeTranscriptCli('--format json'.split())._parse_args()
|
||||
YouTubeTranscriptCli('--json'.split())._parse_args()
|
||||
|
||||
def test_argument_parsing__json(self):
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2 --format json'.split())._parse_args()
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2 --json'.split())._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, 'json')
|
||||
self.assertEqual(parsed_args.json, True)
|
||||
self.assertEqual(parsed_args.languages, ['en'])
|
||||
|
||||
parsed_args = YouTubeTranscriptCli('--format json v1 v2'.split())._parse_args()
|
||||
parsed_args = YouTubeTranscriptCli('--json v1 v2'.split())._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, 'json')
|
||||
self.assertEqual(parsed_args.json, True)
|
||||
self.assertEqual(parsed_args.languages, ['en'])
|
||||
|
||||
def test_argument_parsing__languages(self):
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en'.split())._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, None)
|
||||
self.assertEqual(parsed_args.json, False)
|
||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||
|
||||
def test_argument_parsing__proxies(self):
|
||||
|
@ -135,13 +135,13 @@ class TestYouTubeTranscriptCli(TestCase):
|
|||
def test_argument_parsing__translate(self):
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split())._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, None)
|
||||
self.assertEqual(parsed_args.json, False)
|
||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||
self.assertEqual(parsed_args.translate, 'cz')
|
||||
|
||||
parsed_args = YouTubeTranscriptCli('v1 v2 --translate cz --languages de en'.split())._parse_args()
|
||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||
self.assertEqual(parsed_args.format, None)
|
||||
self.assertEqual(parsed_args.json, False)
|
||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||
self.assertEqual(parsed_args.translate, 'cz')
|
||||
|
||||
|
@ -204,7 +204,8 @@ class TestYouTubeTranscriptCli(TestCase):
|
|||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
|
||||
|
||||
def test_run__json_output(self):
|
||||
output = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split()).run()
|
||||
output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run()
|
||||
|
||||
# will fail if output is not valid json
|
||||
json.loads(output)
|
||||
|
||||
|
|
|
@ -1,99 +0,0 @@
|
|||
from unittest import TestCase
|
||||
from mock import MagicMock
|
||||
import json
|
||||
|
||||
from youtube_transcript_api.formatters import (
|
||||
JSONTranscriptFormatter,
|
||||
parse_timecode,
|
||||
SRTTranscriptFormatter,
|
||||
TextTranscriptFormatter,
|
||||
TranscriptFormatter,
|
||||
TranscriptFormatterFactory
|
||||
)
|
||||
|
||||
|
||||
class TestTranscriptFormatters(TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.transcript = [
|
||||
{
|
||||
'text': 'Hey, this is just a test',
|
||||
'start': 0.0,
|
||||
'duration': 1.54
|
||||
},
|
||||
{
|
||||
'text': 'this is not the original transcript',
|
||||
'start': 1.54,
|
||||
'duration': 4.16
|
||||
},
|
||||
{
|
||||
'text': 'just something shorter, I made up for testing',
|
||||
'start': 5.7,
|
||||
'duration': 3.239
|
||||
}
|
||||
]
|
||||
|
||||
def test_base_formatter_combine(self):
|
||||
expecting = ''.join([str(line) for line in self.transcript])
|
||||
|
||||
self.assertEqual(
|
||||
TranscriptFormatter.combine(self.transcript),
|
||||
expecting
|
||||
)
|
||||
|
||||
def test_base_format_not_implemented(self):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
TranscriptFormatter.format(self.transcript)
|
||||
|
||||
def test_text_formatter_format(self):
|
||||
text = '\n'.join([line.get('text') for line in self.transcript])
|
||||
text_fmt = TextTranscriptFormatter.format(self.transcript)
|
||||
self.assertIn(text + '\n', text_fmt)
|
||||
|
||||
def test_srt_formatter_format(self):
|
||||
start = self.transcript[0].get('start')
|
||||
duration = self.transcript[0].get('duration')
|
||||
srt_fmt = SRTTranscriptFormatter.format(self.transcript)
|
||||
self.assertIn('{start} --> {end}'.format(
|
||||
start=parse_timecode(start),
|
||||
end=parse_timecode(start+duration)
|
||||
), srt_fmt)
|
||||
|
||||
def test_json_formatter_format(self):
|
||||
json_fmt = JSONTranscriptFormatter.format(self.transcript)
|
||||
self.assertIsInstance(json.dumps(json_fmt), str)
|
||||
|
||||
def test_invalid_parse_timecode(self):
|
||||
start_time = 'not_float'
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
parse_timecode(start_time)
|
||||
|
||||
def test_valid_parse_timecode(self):
|
||||
start_time = 0.0
|
||||
end_time = 5.20
|
||||
|
||||
self.assertEqual(
|
||||
parse_timecode(start_time),
|
||||
'00:00:00,000'
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
parse_timecode(end_time),
|
||||
'00:00:05,200'
|
||||
)
|
||||
|
||||
def test_formatter_factory_valid_single_add(self):
|
||||
factory = TranscriptFormatterFactory()
|
||||
factory.add_formatter('json', JSONTranscriptFormatter)
|
||||
|
||||
self.assertDictEqual(
|
||||
getattr(factory, '_formatters'),
|
||||
{'json': JSONTranscriptFormatter}
|
||||
)
|
||||
|
||||
def test_formatter_factory_invalid_single_add(self):
|
||||
factory = TranscriptFormatterFactory()
|
||||
|
||||
with self.assertRaises(TypeError):
|
||||
factory.add_formatter('magic', MagicMock)
|
Loading…
Reference in New Issue