added public list_transcripts method
This commit is contained in:
		
							parent
							
								
									8287d1088e
								
							
						
					
					
						commit
						1bc5087575
					
				|  | @ -4,17 +4,68 @@ from ._transcripts import TranscriptListFetcher | |||
| 
 | ||||
| 
 | ||||
| class YouTubeTranscriptApi(): | ||||
|     @classmethod | ||||
|     def list_transcripts(cls, video_id, proxies=None): | ||||
|         """ | ||||
|         Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object | ||||
|         which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating | ||||
|         over the `TranscriptList` the individual transcripts are represented by `Transcript` objects, which provide | ||||
|         metadata and can either be fetched by calling `transcript.fetch()` or translated by calling | ||||
|         `transcript.translate('en')`. Example:: | ||||
| 
 | ||||
|             # retrieve the available transcripts | ||||
|             transcript_list = YouTubeTranscriptApi.get('video_id') | ||||
| 
 | ||||
|             # iterate over all available transcripts | ||||
|             for transcript in transcript_list: | ||||
|                 # the Transcript object provides metadata properties | ||||
|                 print( | ||||
|                     transcript.video_id, | ||||
|                     transcript.language, | ||||
|                     transcript.language_code, | ||||
|                     # whether it has been manually created or generated by YouTube | ||||
|                     transcript.is_generated, | ||||
|                     # a list of languages the transcript can be translated to | ||||
|                     transcript.translation_languages, | ||||
|                 ) | ||||
| 
 | ||||
|                 # fetch the actual transcript data | ||||
|                 print(transcript.fetch()) | ||||
| 
 | ||||
|                 # translating the transcript will return another transcript object | ||||
|                 print(transcript.translate('en').fetch()) | ||||
| 
 | ||||
|             # you can also directly filter for the language you are looking for, using the transcript list | ||||
|             transcript = transcript_list.find_transcript(['de', 'en']) | ||||
| 
 | ||||
|             # or just filter for manually created transcripts | ||||
|             transcript = transcript_list.find_manually_created_transcript(['de', 'en']) | ||||
| 
 | ||||
|             # or automatically generated ones | ||||
|             transcript = transcript_list.find_generated_transcript(['de', 'en']) | ||||
| 
 | ||||
|         :param video_id: the youtube video id | ||||
|         :type video_id: str | ||||
|         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests | ||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||
|         :return: the list of available transcripts | ||||
|         :rtype TranscriptList: | ||||
|         """ | ||||
|         with requests.Session() as http_client: | ||||
|             http_client.proxies = proxies if proxies else {} | ||||
|             return TranscriptListFetcher(http_client).fetch(video_id) | ||||
| 
 | ||||
|     @classmethod | ||||
|     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None): | ||||
|         """ | ||||
|         Retrieves the transcripts for a list of videos. | ||||
| 
 | ||||
|         :param video_ids: a list of youtube video ids | ||||
|         :type video_ids: [str] | ||||
|         :type video_ids: list[str] | ||||
|         :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] | ||||
|         it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to | ||||
|         do so. | ||||
|         :type languages: [str] | ||||
|         :type languages: list[str] | ||||
|         :param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving | ||||
|         one of the video transcripts | ||||
|         :type continue_after_error: bool | ||||
|  | @ -22,7 +73,7 @@ class YouTubeTranscriptApi(): | |||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||
|         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of | ||||
|         video ids, which could not be retrieved | ||||
|         :rtype: ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}) | ||||
|         :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): | ||||
|         """ | ||||
|         data = {} | ||||
|         unretrievable_videos = [] | ||||
|  | @ -41,19 +92,19 @@ class YouTubeTranscriptApi(): | |||
|     @classmethod | ||||
|     def get_transcript(cls, video_id, languages=('en',), proxies=None): | ||||
|         """ | ||||
|         Retrieves the transcript for a single video. | ||||
|         Retrieves the transcript for a single video. This is just a shortcut for calling:: | ||||
| 
 | ||||
|             YouTubeTranscriptApi.list_transcripts(video_id, proxies).find_transcript(languages).fetch() | ||||
| 
 | ||||
|         :param video_id: the youtube video id | ||||
|         :type video_id: str | ||||
|         :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] | ||||
|         it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to | ||||
|         do so. | ||||
|         :type languages: [str] | ||||
|         :type languages: list[str] | ||||
|         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests | ||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||
|         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys | ||||
|         :rtype: [{'text': str, 'start': float, 'end': float}] | ||||
|         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||
|         """ | ||||
|         with requests.Session() as http_client: | ||||
|             http_client.proxies = proxies if proxies else {} | ||||
|             return TranscriptListFetcher(http_client).fetch(video_id).find_transcript(languages).fetch() | ||||
|         return cls.list_transcripts(video_id, proxies).find_transcript(languages).fetch() | ||||
|  |  | |||
|  | @ -95,7 +95,7 @@ class TranscriptList(): | |||
|         :param captions_json: the JSON parsed from the YouTube pages static HTML | ||||
|         :type captions_json: dict | ||||
|         :return: the created TranscriptList | ||||
|         :rtype TranscriptList | ||||
|         :rtype TranscriptList: | ||||
|         """ | ||||
|         translation_languages = [ | ||||
|             { | ||||
|  | @ -142,9 +142,9 @@ class TranscriptList(): | |||
|         :param language_codes: A list of language codes in a descending priority. For example, if this is set to | ||||
|         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if | ||||
|         it fails to do so. | ||||
|         :type languages: [str] | ||||
|         :type languages: list[str] | ||||
|         :return: the found Transcript | ||||
|         :rtype: Transcript | ||||
|         :rtype Transcript: | ||||
|         :raises: NoTranscriptFound | ||||
|         """ | ||||
|         return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts]) | ||||
|  | @ -156,9 +156,9 @@ class TranscriptList(): | |||
|         :param language_codes: A list of language codes in a descending priority. For example, if this is set to | ||||
|         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if | ||||
|         it fails to do so. | ||||
|         :type languages: [str] | ||||
|         :type languages: list[str] | ||||
|         :return: the found Transcript | ||||
|         :rtype: Transcript | ||||
|         :rtype Transcript: | ||||
|         :raises: NoTranscriptFound | ||||
|         """ | ||||
|         return self._find_transcript(language_codes, [self._generated_transcripts,]) | ||||
|  | @ -170,9 +170,9 @@ class TranscriptList(): | |||
|         :param language_codes: A list of language codes in a descending priority. For example, if this is set to | ||||
|         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if | ||||
|         it fails to do so. | ||||
|         :type languages: [str] | ||||
|         :type languages: list[str] | ||||
|         :return: the found Transcript | ||||
|         :rtype: Transcript | ||||
|         :rtype Transcript: | ||||
|         :raises: NoTranscriptFound | ||||
|         """ | ||||
|         return self._find_transcript(language_codes, [self._manually_created_transcripts,]) | ||||
|  | @ -252,7 +252,7 @@ class Transcript(): | |||
|         Loads the actual transcript data. | ||||
| 
 | ||||
|         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys | ||||
|         :rtype: [{'text': str, 'start': float, 'end': float}] | ||||
|         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||
|         """ | ||||
|         return _TranscriptParser().parse( | ||||
|             self._http_client.get(self._url).text | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue