move preserve_formatting from init to fetch()
also remove from transcriptlist & transcriptlistfetcher
This commit is contained in:
		
							parent
							
								
									79fd63d585
								
							
						
					
					
						commit
						ca93c48fa1
					
				|  | @ -16,7 +16,7 @@ from ._errors import ( | ||||||
| 
 | 
 | ||||||
| class YouTubeTranscriptApi(object): | class YouTubeTranscriptApi(object): | ||||||
|     @classmethod |     @classmethod | ||||||
|     def list_transcripts(cls, video_id, proxies=None, cookies=None, preserve_formatting=False): |     def list_transcripts(cls, video_id, proxies=None, cookies=None): | ||||||
|         """ |         """ | ||||||
|         Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object |         Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object | ||||||
|         which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating |         which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating | ||||||
|  | @ -61,8 +61,6 @@ class YouTubeTranscriptApi(object): | ||||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||||
|         :param cookies: a string of the path to a text file containing youtube authorization cookies |         :param cookies: a string of the path to a text file containing youtube authorization cookies | ||||||
|         :type cookies: str |         :type cookies: str | ||||||
|         :param preserve_formatting: whether to keep select HTML text formatting |  | ||||||
|         :type preserve_formatting: bool |  | ||||||
|         :return: the list of available transcripts |         :return: the list of available transcripts | ||||||
|         :rtype TranscriptList: |         :rtype TranscriptList: | ||||||
|         """ |         """ | ||||||
|  | @ -70,8 +68,7 @@ class YouTubeTranscriptApi(object): | ||||||
|             if cookies: |             if cookies: | ||||||
|                 http_client.cookies = cls._load_cookies(cookies, video_id) |                 http_client.cookies = cls._load_cookies(cookies, video_id) | ||||||
|             http_client.proxies = proxies if proxies else {} |             http_client.proxies = proxies if proxies else {} | ||||||
|             return TranscriptListFetcher(http_client).fetch(video_id, |             return TranscriptListFetcher(http_client).fetch(video_id) | ||||||
|                                                             preserve_formatting=preserve_formatting) |  | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, |     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, | ||||||
|  | @ -137,7 +134,7 @@ class YouTubeTranscriptApi(object): | ||||||
|         :rtype [{'text': str, 'start': float, 'end': float}]: |         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||||
|         """ |         """ | ||||||
|         assert isinstance(video_id, str), "`video_id` must be a string" |         assert isinstance(video_id, str), "`video_id` must be a string" | ||||||
|         return cls.list_transcripts(video_id, proxies, cookies, preserve_formatting=preserve_formatting).find_transcript(languages).fetch() |         return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch(preserve_formatting=preserve_formatting) | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def _load_cookies(cls, cookies, video_id): |     def _load_cookies(cls, cookies, video_id): | ||||||
|  |  | ||||||
|  | @ -40,12 +40,12 @@ class TranscriptListFetcher(object): | ||||||
|     def __init__(self, http_client): |     def __init__(self, http_client): | ||||||
|         self._http_client = http_client |         self._http_client = http_client | ||||||
| 
 | 
 | ||||||
|     def fetch(self, video_id, preserve_formatting=False): |     def fetch(self, video_id): | ||||||
|  | 
 | ||||||
|         return TranscriptList.build( |         return TranscriptList.build( | ||||||
|             self._http_client, |             self._http_client, | ||||||
|             video_id, |             video_id, | ||||||
|             self._extract_captions_json(self._fetch_video_html(video_id), video_id), |             self._extract_captions_json(self._fetch_video_html(video_id), video_id), | ||||||
|             preserve_formatting=preserve_formatting, |  | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def _extract_captions_json(self, html, video_id): |     def _extract_captions_json(self, html, video_id): | ||||||
|  | @ -114,7 +114,7 @@ class TranscriptList(object): | ||||||
|         self._translation_languages = translation_languages |         self._translation_languages = translation_languages | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def build(http_client, video_id, captions_json, preserve_formatting=False): |     def build(http_client, video_id, captions_json): | ||||||
|         """ |         """ | ||||||
|         Factory method for TranscriptList. |         Factory method for TranscriptList. | ||||||
| 
 | 
 | ||||||
|  | @ -124,8 +124,6 @@ class TranscriptList(object): | ||||||
|         :type video_id: str |         :type video_id: str | ||||||
|         :param captions_json: the JSON parsed from the YouTube pages static HTML |         :param captions_json: the JSON parsed from the YouTube pages static HTML | ||||||
|         :type captions_json: dict |         :type captions_json: dict | ||||||
|         :param preserve_formatting: whether to keep select HTML text formatting |  | ||||||
|         :type preserve_formatting: bool |  | ||||||
|         :return: the created TranscriptList |         :return: the created TranscriptList | ||||||
|         :rtype TranscriptList: |         :rtype TranscriptList: | ||||||
|         """ |         """ | ||||||
|  | @ -153,7 +151,6 @@ class TranscriptList(object): | ||||||
|                 caption['languageCode'], |                 caption['languageCode'], | ||||||
|                 caption.get('kind', '') == 'asr', |                 caption.get('kind', '') == 'asr', | ||||||
|                 translation_languages if caption.get('isTranslatable', False) else [], |                 translation_languages if caption.get('isTranslatable', False) else [], | ||||||
|                 preserve_formatting=preserve_formatting, |  | ||||||
|             ) |             ) | ||||||
| 
 | 
 | ||||||
|         return TranscriptList( |         return TranscriptList( | ||||||
|  | @ -253,8 +250,7 @@ class TranscriptList(object): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Transcript(object): | class Transcript(object): | ||||||
|     def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages, |     def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages): | ||||||
|                  preserve_formatting=False): |  | ||||||
|         """ |         """ | ||||||
|         You probably don't want to initialize this directly. Usually you'll access Transcript objects using a |         You probably don't want to initialize this directly. Usually you'll access Transcript objects using a | ||||||
|         TranscriptList. |         TranscriptList. | ||||||
|  | @ -268,8 +264,6 @@ class Transcript(object): | ||||||
|         :param language_code: |         :param language_code: | ||||||
|         :param is_generated: |         :param is_generated: | ||||||
|         :param translation_languages: |         :param translation_languages: | ||||||
|         :param preserve_formatting: whether to keep select HTML text formatting |  | ||||||
|         :type preserve_formatting: bool |  | ||||||
|         """ |         """ | ||||||
|         self._http_client = http_client |         self._http_client = http_client | ||||||
|         self.video_id = video_id |         self.video_id = video_id | ||||||
|  | @ -282,17 +276,17 @@ class Transcript(object): | ||||||
|             translation_language['language_code']: translation_language['language'] |             translation_language['language_code']: translation_language['language'] | ||||||
|             for translation_language in translation_languages |             for translation_language in translation_languages | ||||||
|         } |         } | ||||||
|         self.preserve_formatting = preserve_formatting |  | ||||||
| 
 | 
 | ||||||
|     def fetch(self): |     def fetch(self, preserve_formatting=False): | ||||||
|         """ |         """ | ||||||
|         Loads the actual transcript data. |         Loads the actual transcript data. | ||||||
| 
 |         :param preserve_formatting: whether to keep select HTML text formatting | ||||||
|  |         :type preserve_formatting: bool | ||||||
|         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys |         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys | ||||||
|         :rtype [{'text': str, 'start': float, 'end': float}]: |         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||||
|         """ |         """ | ||||||
|         response = self._http_client.get(self._url) |         response = self._http_client.get(self._url) | ||||||
|         return _TranscriptParser(preserve_formatting=self.preserve_formatting).parse( |         return _TranscriptParser(preserve_formatting=preserve_formatting).parse( | ||||||
|             _raise_http_errors(response, self.video_id).text,) |             _raise_http_errors(response, self.video_id).text,) | ||||||
| 
 | 
 | ||||||
|     def __str__(self): |     def __str__(self): | ||||||
|  | @ -321,7 +315,6 @@ class Transcript(object): | ||||||
|             language_code, |             language_code, | ||||||
|             True, |             True, | ||||||
|             [], |             [], | ||||||
|             preserve_formatting=self.preserve_formatting, |  | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue