Propagate formatting up to user level
This commit is contained in:
		
							parent
							
								
									1f1c8b249b
								
							
						
					
					
						commit
						c1a037c39c
					
				|  | @ -16,7 +16,7 @@ from ._errors import ( | ||||||
| 
 | 
 | ||||||
| class YouTubeTranscriptApi(object): | class YouTubeTranscriptApi(object): | ||||||
|     @classmethod |     @classmethod | ||||||
|     def list_transcripts(cls, video_id, proxies=None, cookies=None): |     def list_transcripts(cls, video_id, proxies=None, cookies=None, preserve_formatting=False): | ||||||
|         """ |         """ | ||||||
|         Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object |         Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object | ||||||
|         which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating |         which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating | ||||||
|  | @ -68,7 +68,8 @@ class YouTubeTranscriptApi(object): | ||||||
|             if cookies: |             if cookies: | ||||||
|                 http_client.cookies = cls._load_cookies(cookies, video_id) |                 http_client.cookies = cls._load_cookies(cookies, video_id) | ||||||
|             http_client.proxies = proxies if proxies else {} |             http_client.proxies = proxies if proxies else {} | ||||||
|             return TranscriptListFetcher(http_client).fetch(video_id) |             return TranscriptListFetcher(http_client).fetch(video_id, | ||||||
|  |                                                             preserve_formatting=preserve_formatting) | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, cookies=None): |     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, cookies=None): | ||||||
|  | @ -109,7 +110,7 @@ class YouTubeTranscriptApi(object): | ||||||
|         return data, unretrievable_videos |         return data, unretrievable_videos | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_transcript(cls, video_id, languages=('en',), proxies=None, cookies=None): |     def get_transcript(cls, video_id, languages=('en',), proxies=None, cookies=None, preserve_formatting=False): | ||||||
|         """ |         """ | ||||||
|         Retrieves the transcript for a single video. This is just a shortcut for calling:: |         Retrieves the transcript for a single video. This is just a shortcut for calling:: | ||||||
| 
 | 
 | ||||||
|  | @ -129,7 +130,7 @@ class YouTubeTranscriptApi(object): | ||||||
|         :rtype [{'text': str, 'start': float, 'end': float}]: |         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||||
|         """ |         """ | ||||||
|         assert isinstance(video_id, str), "`video_id` must be a string" |         assert isinstance(video_id, str), "`video_id` must be a string" | ||||||
|         return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch() |         return cls.list_transcripts(video_id, proxies, cookies, preserve_formatting=preserve_formatting).find_transcript(languages).fetch() | ||||||
|      |      | ||||||
|     @classmethod |     @classmethod | ||||||
|     def _load_cookies(cls, cookies, video_id): |     def _load_cookies(cls, cookies, video_id): | ||||||
|  |  | ||||||
|  | @ -53,11 +53,12 @@ class TranscriptListFetcher(object): | ||||||
|     def __init__(self, http_client): |     def __init__(self, http_client): | ||||||
|         self._http_client = http_client |         self._http_client = http_client | ||||||
| 
 | 
 | ||||||
|     def fetch(self, video_id): |     def fetch(self, video_id, preserve_formatting=False): | ||||||
|         return TranscriptList.build( |         return TranscriptList.build( | ||||||
|             self._http_client, |             self._http_client, | ||||||
|             video_id, |             video_id, | ||||||
|             self._extract_captions_json(self._fetch_video_html(video_id), video_id) |             self._extract_captions_json(self._fetch_video_html(video_id), video_id), | ||||||
|  |             preserve_formatting=preserve_formatting, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def _extract_captions_json(self, html, video_id): |     def _extract_captions_json(self, html, video_id): | ||||||
|  | @ -107,7 +108,8 @@ class TranscriptList(object): | ||||||
|     This object represents a list of transcripts. It can be iterated over to list all transcripts which are available |     This object represents a list of transcripts. It can be iterated over to list all transcripts which are available | ||||||
|     for a given YouTube video. Also it provides functionality to search for a transcript in a given language. |     for a given YouTube video. Also it provides functionality to search for a transcript in a given language. | ||||||
|     """ |     """ | ||||||
|     def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages): |     def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages, | ||||||
|  | ): | ||||||
|         """ |         """ | ||||||
|         The constructor is only for internal use. Use the static build method instead. |         The constructor is only for internal use. Use the static build method instead. | ||||||
| 
 | 
 | ||||||
|  | @ -126,7 +128,7 @@ class TranscriptList(object): | ||||||
|         self._translation_languages = translation_languages |         self._translation_languages = translation_languages | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def build(http_client, video_id, captions_json): |     def build(http_client, video_id, captions_json, preserve_formatting=False): | ||||||
|         """ |         """ | ||||||
|         Factory method for TranscriptList. |         Factory method for TranscriptList. | ||||||
| 
 | 
 | ||||||
|  | @ -162,7 +164,8 @@ class TranscriptList(object): | ||||||
|                 caption['name']['simpleText'], |                 caption['name']['simpleText'], | ||||||
|                 caption['languageCode'], |                 caption['languageCode'], | ||||||
|                 caption.get('kind', '') == 'asr', |                 caption.get('kind', '') == 'asr', | ||||||
|                 translation_languages if caption.get('isTranslatable', False) else [] |                 translation_languages if caption.get('isTranslatable', False) else [], | ||||||
|  |                 preserve_formatting=preserve_formatting, | ||||||
|             ) |             ) | ||||||
| 
 | 
 | ||||||
|         return TranscriptList( |         return TranscriptList( | ||||||
|  | @ -262,7 +265,8 @@ class TranscriptList(object): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Transcript(object): | class Transcript(object): | ||||||
|     def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages): |     def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages, | ||||||
|  |                  preserve_formatting=False): | ||||||
|         """ |         """ | ||||||
|         You probably don't want to initialize this directly. Usually you'll access Transcript objects using a |         You probably don't want to initialize this directly. Usually you'll access Transcript objects using a | ||||||
|         TranscriptList. |         TranscriptList. | ||||||
|  | @ -276,6 +280,7 @@ class Transcript(object): | ||||||
|         :param language_code: |         :param language_code: | ||||||
|         :param is_generated: |         :param is_generated: | ||||||
|         :param translation_languages: |         :param translation_languages: | ||||||
|  |         :param preserve_formatting: whether to keep select HTMl text formatting | ||||||
|         """ |         """ | ||||||
|         self._http_client = http_client |         self._http_client = http_client | ||||||
|         self.video_id = video_id |         self.video_id = video_id | ||||||
|  | @ -288,6 +293,7 @@ class Transcript(object): | ||||||
|             translation_language['language_code']: translation_language['language'] |             translation_language['language_code']: translation_language['language'] | ||||||
|             for translation_language in translation_languages |             for translation_language in translation_languages | ||||||
|         } |         } | ||||||
|  |         self.preserve_formatting = preserve_formatting | ||||||
| 
 | 
 | ||||||
|     def fetch(self): |     def fetch(self): | ||||||
|         """ |         """ | ||||||
|  | @ -297,7 +303,7 @@ class Transcript(object): | ||||||
|         :rtype [{'text': str, 'start': float, 'end': float}]: |         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||||
|         """ |         """ | ||||||
|         response = self._http_client.get(self._url) |         response = self._http_client.get(self._url) | ||||||
|         return _TranscriptParser().parse( |         return _TranscriptParser(preserve_formatting=self.preserve_formatting).parse( | ||||||
|             _raise_http_errors(response, self.video_id).text, |             _raise_http_errors(response, self.video_id).text, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  | @ -327,6 +333,7 @@ class Transcript(object): | ||||||
|             language_code, |             language_code, | ||||||
|             True, |             True, | ||||||
|             [], |             [], | ||||||
|  |             preserve_formatting=self.preserve_formatting, | ||||||
|         ) |         ) | ||||||
| class _TranscriptParser(object): | class _TranscriptParser(object): | ||||||
|     def __init__(self, preserve_formatting=False): |     def __init__(self, preserve_formatting=False): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue