add preserve_formatting to get_transcripts() + tests
also assertion tests for `get_transcripts()` updated to include `False` at the end for new `preserve_formatting` param
This commit is contained in:
parent
fdedfff681
commit
72e9781528
|
@ -74,7 +74,8 @@ class YouTubeTranscriptApi(object):
|
|||
preserve_formatting=preserve_formatting)
|
||||
|
||||
@classmethod
|
||||
def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, cookies=None):
|
||||
def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None,
|
||||
cookies=None, preserve_formatting=False):
|
||||
"""
|
||||
Retrieves the transcripts for a list of videos.
|
||||
|
||||
|
@ -91,6 +92,8 @@ class YouTubeTranscriptApi(object):
|
|||
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
||||
:param cookies: a string of the path to a text file containing youtube authorization cookies
|
||||
:type cookies: str
|
||||
:param preserve_formatting: whether to keep select HTML text formatting
|
||||
:type preserve_formatting: bool
|
||||
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
|
||||
video ids, which could not be retrieved
|
||||
:rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
|
||||
|
@ -102,7 +105,7 @@ class YouTubeTranscriptApi(object):
|
|||
|
||||
for video_id in video_ids:
|
||||
try:
|
||||
data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies)
|
||||
data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies, preserve_formatting)
|
||||
except Exception as exception:
|
||||
if not continue_after_error:
|
||||
raise exception
|
||||
|
|
|
@ -283,8 +283,8 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
|
||||
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
|
||||
|
||||
mock_get_transcript.assert_any_call(video_id_1, languages, None, None)
|
||||
mock_get_transcript.assert_any_call(video_id_2, languages, None, None)
|
||||
mock_get_transcript.assert_any_call(video_id_1, languages, None, None, False)
|
||||
mock_get_transcript.assert_any_call(video_id_2, languages, None, None, False)
|
||||
self.assertEqual(mock_get_transcript.call_count, 2)
|
||||
|
||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error'))
|
||||
|
@ -299,20 +299,20 @@ class TestYouTubeTranscriptApi(TestCase):
|
|||
|
||||
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
|
||||
|
||||
mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None)
|
||||
mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None)
|
||||
mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None, False)
|
||||
mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None, False)
|
||||
|
||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
||||
def test_get_transcripts__with_cookies(self, mock_get_transcript):
|
||||
cookies = '/example_cookies.txt'
|
||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
|
||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies)
|
||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies, False)
|
||||
|
||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
||||
def test_get_transcripts__with_proxies(self, mock_get_transcript):
|
||||
proxies = {'http': '', 'https:': ''}
|
||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
|
||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None)
|
||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None, False)
|
||||
|
||||
def test_load_cookies(self):
|
||||
dirname, filename = os.path.split(os.path.abspath(__file__))
|
||||
|
|
Loading…
Reference in New Issue