add preserve_formatting to get_transcripts() + tests
also assertion tests for `get_transcripts()` updated to include `False` at the end for new `preserve_formatting` param
This commit is contained in:
		
							parent
							
								
									fdedfff681
								
							
						
					
					
						commit
						72e9781528
					
				|  | @ -74,7 +74,8 @@ class YouTubeTranscriptApi(object): | ||||||
|                                                             preserve_formatting=preserve_formatting) |                                                             preserve_formatting=preserve_formatting) | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, cookies=None): |     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, | ||||||
|  |                         cookies=None, preserve_formatting=False): | ||||||
|         """ |         """ | ||||||
|         Retrieves the transcripts for a list of videos. |         Retrieves the transcripts for a list of videos. | ||||||
| 
 | 
 | ||||||
|  | @ -91,6 +92,8 @@ class YouTubeTranscriptApi(object): | ||||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||||
|         :param cookies: a string of the path to a text file containing youtube authorization cookies |         :param cookies: a string of the path to a text file containing youtube authorization cookies | ||||||
|         :type cookies: str |         :type cookies: str | ||||||
|  |         :param preserve_formatting: whether to keep select HTML text formatting | ||||||
|  |         :type preserve_formatting: bool | ||||||
|         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of |         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of | ||||||
|         video ids, which could not be retrieved |         video ids, which could not be retrieved | ||||||
|         :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): |         :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): | ||||||
|  | @ -102,7 +105,7 @@ class YouTubeTranscriptApi(object): | ||||||
| 
 | 
 | ||||||
|         for video_id in video_ids: |         for video_id in video_ids: | ||||||
|             try: |             try: | ||||||
|                 data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies) |                 data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies, preserve_formatting) | ||||||
|             except Exception as exception: |             except Exception as exception: | ||||||
|                 if not continue_after_error: |                 if not continue_after_error: | ||||||
|                     raise exception |                     raise exception | ||||||
|  |  | ||||||
|  | @ -283,8 +283,8 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages) |         YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages) | ||||||
| 
 | 
 | ||||||
|         mock_get_transcript.assert_any_call(video_id_1, languages, None, None) |         mock_get_transcript.assert_any_call(video_id_1, languages, None, None, False) | ||||||
|         mock_get_transcript.assert_any_call(video_id_2, languages, None, None) |         mock_get_transcript.assert_any_call(video_id_2, languages, None, None, False) | ||||||
|         self.assertEqual(mock_get_transcript.call_count, 2) |         self.assertEqual(mock_get_transcript.call_count, 2) | ||||||
| 
 | 
 | ||||||
|     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error')) |     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error')) | ||||||
|  | @ -299,20 +299,20 @@ class TestYouTubeTranscriptApi(TestCase): | ||||||
| 
 | 
 | ||||||
|         YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True) |         YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True) | ||||||
| 
 | 
 | ||||||
|         mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None) |         mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None, False) | ||||||
|         mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None) |         mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None, False) | ||||||
|      |      | ||||||
|     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') |     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') | ||||||
|     def test_get_transcripts__with_cookies(self, mock_get_transcript): |     def test_get_transcripts__with_cookies(self, mock_get_transcript): | ||||||
|         cookies = '/example_cookies.txt' |         cookies = '/example_cookies.txt' | ||||||
|         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies) |         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies) | ||||||
|         mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies) |         mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies, False) | ||||||
| 
 | 
 | ||||||
|     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') |     @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript') | ||||||
|     def test_get_transcripts__with_proxies(self, mock_get_transcript): |     def test_get_transcripts__with_proxies(self, mock_get_transcript): | ||||||
|         proxies = {'http': '', 'https:': ''} |         proxies = {'http': '', 'https:': ''} | ||||||
|         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies) |         YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies) | ||||||
|         mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None) |         mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None, False) | ||||||
| 
 | 
 | ||||||
|     def test_load_cookies(self): |     def test_load_cookies(self): | ||||||
|         dirname, filename = os.path.split(os.path.abspath(__file__)) |         dirname, filename = os.path.split(os.path.abspath(__file__)) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue