From 72e97815289016e6296575f1ee03f175fadce870 Mon Sep 17 00:00:00 2001 From: "E. Seiver" <5547078+eseiver@users.noreply.github.com> Date: Wed, 12 Apr 2023 12:43:48 -0700 Subject: [PATCH] add preserve_formatting to get_transcripts() + tests also assertion tests for `get_transcripts()` updated to include `False` at the end for new `preserve_formatting` param --- youtube_transcript_api/_api.py | 7 +++++-- youtube_transcript_api/test/test_api.py | 12 ++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/youtube_transcript_api/_api.py b/youtube_transcript_api/_api.py index 99f7d53..d57fa3c 100644 --- a/youtube_transcript_api/_api.py +++ b/youtube_transcript_api/_api.py @@ -74,7 +74,8 @@ class YouTubeTranscriptApi(object): preserve_formatting=preserve_formatting) @classmethod - def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, cookies=None): + def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, + cookies=None, preserve_formatting=False): """ Retrieves the transcripts for a list of videos. @@ -91,6 +92,8 @@ class YouTubeTranscriptApi(object): :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies :param cookies: a string of the path to a text file containing youtube authorization cookies :type cookies: str + :param preserve_formatting: whether to keep select HTML text formatting + :type preserve_formatting: bool :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of video ids, which could not be retrieved :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): @@ -102,7 +105,7 @@ class YouTubeTranscriptApi(object): for video_id in video_ids: try: - data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies) + data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies, preserve_formatting) except Exception as exception: if not continue_after_error: raise exception diff --git a/youtube_transcript_api/test/test_api.py b/youtube_transcript_api/test/test_api.py index 122ffd7..36d60a5 100644 --- a/youtube_transcript_api/test/test_api.py +++ b/youtube_transcript_api/test/test_api.py @@ -283,8 +283,8 @@ class TestYouTubeTranscriptApi(TestCase): YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages) - mock_get_transcript.assert_any_call(video_id_1, languages, None, None) - mock_get_transcript.assert_any_call(video_id_2, languages, None, None) + mock_get_transcript.assert_any_call(video_id_1, languages, None, None, False) + mock_get_transcript.assert_any_call(video_id_2, languages, None, None, False) self.assertEqual(mock_get_transcript.call_count, 2) @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error')) @@ -299,20 +299,20 @@ class TestYouTubeTranscriptApi(TestCase): YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True) - mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None) - mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None) + mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None, False) + mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None, False) @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') def test_get_transcripts__with_cookies(self, mock_get_transcript): cookies = '/example_cookies.txt' YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies) - mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies) + mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies, False) @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') def test_get_transcripts__with_proxies(self, mock_get_transcript): proxies = {'http': '', 'https:': ''} YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies) - mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None) + mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None, False) def test_load_cookies(self): dirname, filename = os.path.split(os.path.abspath(__file__))