move preserve_formatting from init to fetch()
also remove from transcriptlist & transcriptlistfetcher
This commit is contained in:
		
							parent
							
								
									79fd63d585
								
							
						
					
					
						commit
						ca93c48fa1
					
				|  | @ -16,7 +16,7 @@ from ._errors import ( | |||
| 
 | ||||
| class YouTubeTranscriptApi(object): | ||||
|     @classmethod | ||||
|     def list_transcripts(cls, video_id, proxies=None, cookies=None, preserve_formatting=False): | ||||
|     def list_transcripts(cls, video_id, proxies=None, cookies=None): | ||||
|         """ | ||||
|         Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object | ||||
|         which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating | ||||
|  | @ -61,8 +61,6 @@ class YouTubeTranscriptApi(object): | |||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||
|         :param cookies: a string of the path to a text file containing youtube authorization cookies | ||||
|         :type cookies: str | ||||
|         :param preserve_formatting: whether to keep select HTML text formatting | ||||
|         :type preserve_formatting: bool | ||||
|         :return: the list of available transcripts | ||||
|         :rtype TranscriptList: | ||||
|         """ | ||||
|  | @ -70,8 +68,7 @@ class YouTubeTranscriptApi(object): | |||
|             if cookies: | ||||
|                 http_client.cookies = cls._load_cookies(cookies, video_id) | ||||
|             http_client.proxies = proxies if proxies else {} | ||||
|             return TranscriptListFetcher(http_client).fetch(video_id, | ||||
|                                                             preserve_formatting=preserve_formatting) | ||||
|             return TranscriptListFetcher(http_client).fetch(video_id) | ||||
| 
 | ||||
|     @classmethod | ||||
|     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, | ||||
|  | @ -137,7 +134,7 @@ class YouTubeTranscriptApi(object): | |||
|         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||
|         """ | ||||
|         assert isinstance(video_id, str), "`video_id` must be a string" | ||||
|         return cls.list_transcripts(video_id, proxies, cookies, preserve_formatting=preserve_formatting).find_transcript(languages).fetch() | ||||
|         return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch(preserve_formatting=preserve_formatting) | ||||
| 
 | ||||
|     @classmethod | ||||
|     def _load_cookies(cls, cookies, video_id): | ||||
|  |  | |||
|  | @ -40,12 +40,12 @@ class TranscriptListFetcher(object): | |||
|     def __init__(self, http_client): | ||||
|         self._http_client = http_client | ||||
| 
 | ||||
|     def fetch(self, video_id, preserve_formatting=False): | ||||
|     def fetch(self, video_id): | ||||
| 
 | ||||
|         return TranscriptList.build( | ||||
|             self._http_client, | ||||
|             video_id, | ||||
|             self._extract_captions_json(self._fetch_video_html(video_id), video_id), | ||||
|             preserve_formatting=preserve_formatting, | ||||
|         ) | ||||
| 
 | ||||
|     def _extract_captions_json(self, html, video_id): | ||||
|  | @ -114,7 +114,7 @@ class TranscriptList(object): | |||
|         self._translation_languages = translation_languages | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def build(http_client, video_id, captions_json, preserve_formatting=False): | ||||
|     def build(http_client, video_id, captions_json): | ||||
|         """ | ||||
|         Factory method for TranscriptList. | ||||
| 
 | ||||
|  | @ -124,8 +124,6 @@ class TranscriptList(object): | |||
|         :type video_id: str | ||||
|         :param captions_json: the JSON parsed from the YouTube pages static HTML | ||||
|         :type captions_json: dict | ||||
|         :param preserve_formatting: whether to keep select HTML text formatting | ||||
|         :type preserve_formatting: bool | ||||
|         :return: the created TranscriptList | ||||
|         :rtype TranscriptList: | ||||
|         """ | ||||
|  | @ -153,7 +151,6 @@ class TranscriptList(object): | |||
|                 caption['languageCode'], | ||||
|                 caption.get('kind', '') == 'asr', | ||||
|                 translation_languages if caption.get('isTranslatable', False) else [], | ||||
|                 preserve_formatting=preserve_formatting, | ||||
|             ) | ||||
| 
 | ||||
|         return TranscriptList( | ||||
|  | @ -253,8 +250,7 @@ class TranscriptList(object): | |||
| 
 | ||||
| 
 | ||||
| class Transcript(object): | ||||
|     def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages, | ||||
|                  preserve_formatting=False): | ||||
|     def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages): | ||||
|         """ | ||||
|         You probably don't want to initialize this directly. Usually you'll access Transcript objects using a | ||||
|         TranscriptList. | ||||
|  | @ -268,8 +264,6 @@ class Transcript(object): | |||
|         :param language_code: | ||||
|         :param is_generated: | ||||
|         :param translation_languages: | ||||
|         :param preserve_formatting: whether to keep select HTML text formatting | ||||
|         :type preserve_formatting: bool | ||||
|         """ | ||||
|         self._http_client = http_client | ||||
|         self.video_id = video_id | ||||
|  | @ -282,17 +276,17 @@ class Transcript(object): | |||
|             translation_language['language_code']: translation_language['language'] | ||||
|             for translation_language in translation_languages | ||||
|         } | ||||
|         self.preserve_formatting = preserve_formatting | ||||
| 
 | ||||
|     def fetch(self): | ||||
|     def fetch(self, preserve_formatting=False): | ||||
|         """ | ||||
|         Loads the actual transcript data. | ||||
| 
 | ||||
|         :param preserve_formatting: whether to keep select HTML text formatting | ||||
|         :type preserve_formatting: bool | ||||
|         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys | ||||
|         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||
|         """ | ||||
|         response = self._http_client.get(self._url) | ||||
|         return _TranscriptParser(preserve_formatting=self.preserve_formatting).parse( | ||||
|         return _TranscriptParser(preserve_formatting=preserve_formatting).parse( | ||||
|             _raise_http_errors(response, self.video_id).text,) | ||||
| 
 | ||||
|     def __str__(self): | ||||
|  | @ -321,7 +315,6 @@ class Transcript(object): | |||
|             language_code, | ||||
|             True, | ||||
|             [], | ||||
|             preserve_formatting=self.preserve_formatting, | ||||
|         ) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue