added public list_transcripts method
This commit is contained in:
		
							parent
							
								
									8287d1088e
								
							
						
					
					
						commit
						1bc5087575
					
				|  | @ -4,17 +4,68 @@ from ._transcripts import TranscriptListFetcher | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class YouTubeTranscriptApi(): | class YouTubeTranscriptApi(): | ||||||
|  |     @classmethod | ||||||
|  |     def list_transcripts(cls, video_id, proxies=None): | ||||||
|  |         """ | ||||||
|  |         Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object | ||||||
|  |         which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating | ||||||
|  |         over the `TranscriptList` the individual transcripts are represented by `Transcript` objects, which provide | ||||||
|  |         metadata and can either be fetched by calling `transcript.fetch()` or translated by calling | ||||||
|  |         `transcript.translate('en')`. Example:: | ||||||
|  | 
 | ||||||
|  |             # retrieve the available transcripts | ||||||
|  |             transcript_list = YouTubeTranscriptApi.get('video_id') | ||||||
|  | 
 | ||||||
|  |             # iterate over all available transcripts | ||||||
|  |             for transcript in transcript_list: | ||||||
|  |                 # the Transcript object provides metadata properties | ||||||
|  |                 print( | ||||||
|  |                     transcript.video_id, | ||||||
|  |                     transcript.language, | ||||||
|  |                     transcript.language_code, | ||||||
|  |                     # whether it has been manually created or generated by YouTube | ||||||
|  |                     transcript.is_generated, | ||||||
|  |                     # a list of languages the transcript can be translated to | ||||||
|  |                     transcript.translation_languages, | ||||||
|  |                 ) | ||||||
|  | 
 | ||||||
|  |                 # fetch the actual transcript data | ||||||
|  |                 print(transcript.fetch()) | ||||||
|  | 
 | ||||||
|  |                 # translating the transcript will return another transcript object | ||||||
|  |                 print(transcript.translate('en').fetch()) | ||||||
|  | 
 | ||||||
|  |             # you can also directly filter for the language you are looking for, using the transcript list | ||||||
|  |             transcript = transcript_list.find_transcript(['de', 'en']) | ||||||
|  | 
 | ||||||
|  |             # or just filter for manually created transcripts | ||||||
|  |             transcript = transcript_list.find_manually_created_transcript(['de', 'en']) | ||||||
|  | 
 | ||||||
|  |             # or automatically generated ones | ||||||
|  |             transcript = transcript_list.find_generated_transcript(['de', 'en']) | ||||||
|  | 
 | ||||||
|  |         :param video_id: the youtube video id | ||||||
|  |         :type video_id: str | ||||||
|  |         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests | ||||||
|  |         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||||
|  |         :return: the list of available transcripts | ||||||
|  |         :rtype TranscriptList: | ||||||
|  |         """ | ||||||
|  |         with requests.Session() as http_client: | ||||||
|  |             http_client.proxies = proxies if proxies else {} | ||||||
|  |             return TranscriptListFetcher(http_client).fetch(video_id) | ||||||
|  | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None): |     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None): | ||||||
|         """ |         """ | ||||||
|         Retrieves the transcripts for a list of videos. |         Retrieves the transcripts for a list of videos. | ||||||
| 
 | 
 | ||||||
|         :param video_ids: a list of youtube video ids |         :param video_ids: a list of youtube video ids | ||||||
|         :type video_ids: [str] |         :type video_ids: list[str] | ||||||
|         :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] |         :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] | ||||||
|         it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to |         it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to | ||||||
|         do so. |         do so. | ||||||
|         :type languages: [str] |         :type languages: list[str] | ||||||
|         :param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving |         :param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving | ||||||
|         one of the video transcripts |         one of the video transcripts | ||||||
|         :type continue_after_error: bool |         :type continue_after_error: bool | ||||||
|  | @ -22,7 +73,7 @@ class YouTubeTranscriptApi(): | ||||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||||
|         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of |         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of | ||||||
|         video ids, which could not be retrieved |         video ids, which could not be retrieved | ||||||
|         :rtype: ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}) |         :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): | ||||||
|         """ |         """ | ||||||
|         data = {} |         data = {} | ||||||
|         unretrievable_videos = [] |         unretrievable_videos = [] | ||||||
|  | @ -41,19 +92,19 @@ class YouTubeTranscriptApi(): | ||||||
|     @classmethod |     @classmethod | ||||||
|     def get_transcript(cls, video_id, languages=('en',), proxies=None): |     def get_transcript(cls, video_id, languages=('en',), proxies=None): | ||||||
|         """ |         """ | ||||||
|         Retrieves the transcript for a single video. |         Retrieves the transcript for a single video. This is just a shortcut for calling:: | ||||||
|  | 
 | ||||||
|  |             YouTubeTranscriptApi.list_transcripts(video_id, proxies).find_transcript(languages).fetch() | ||||||
| 
 | 
 | ||||||
|         :param video_id: the youtube video id |         :param video_id: the youtube video id | ||||||
|         :type video_id: str |         :type video_id: str | ||||||
|         :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] |         :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] | ||||||
|         it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to |         it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to | ||||||
|         do so. |         do so. | ||||||
|         :type languages: [str] |         :type languages: list[str] | ||||||
|         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests |         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests | ||||||
|         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies |         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies | ||||||
|         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys |         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys | ||||||
|         :rtype: [{'text': str, 'start': float, 'end': float}] |         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||||
|         """ |         """ | ||||||
|         with requests.Session() as http_client: |         return cls.list_transcripts(video_id, proxies).find_transcript(languages).fetch() | ||||||
|             http_client.proxies = proxies if proxies else {} |  | ||||||
|             return TranscriptListFetcher(http_client).fetch(video_id).find_transcript(languages).fetch() |  | ||||||
|  |  | ||||||
|  | @ -95,7 +95,7 @@ class TranscriptList(): | ||||||
|         :param captions_json: the JSON parsed from the YouTube pages static HTML |         :param captions_json: the JSON parsed from the YouTube pages static HTML | ||||||
|         :type captions_json: dict |         :type captions_json: dict | ||||||
|         :return: the created TranscriptList |         :return: the created TranscriptList | ||||||
|         :rtype TranscriptList |         :rtype TranscriptList: | ||||||
|         """ |         """ | ||||||
|         translation_languages = [ |         translation_languages = [ | ||||||
|             { |             { | ||||||
|  | @ -142,9 +142,9 @@ class TranscriptList(): | ||||||
|         :param language_codes: A list of language codes in a descending priority. For example, if this is set to |         :param language_codes: A list of language codes in a descending priority. For example, if this is set to | ||||||
|         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if |         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if | ||||||
|         it fails to do so. |         it fails to do so. | ||||||
|         :type languages: [str] |         :type languages: list[str] | ||||||
|         :return: the found Transcript |         :return: the found Transcript | ||||||
|         :rtype: Transcript |         :rtype Transcript: | ||||||
|         :raises: NoTranscriptFound |         :raises: NoTranscriptFound | ||||||
|         """ |         """ | ||||||
|         return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts]) |         return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts]) | ||||||
|  | @ -156,9 +156,9 @@ class TranscriptList(): | ||||||
|         :param language_codes: A list of language codes in a descending priority. For example, if this is set to |         :param language_codes: A list of language codes in a descending priority. For example, if this is set to | ||||||
|         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if |         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if | ||||||
|         it fails to do so. |         it fails to do so. | ||||||
|         :type languages: [str] |         :type languages: list[str] | ||||||
|         :return: the found Transcript |         :return: the found Transcript | ||||||
|         :rtype: Transcript |         :rtype Transcript: | ||||||
|         :raises: NoTranscriptFound |         :raises: NoTranscriptFound | ||||||
|         """ |         """ | ||||||
|         return self._find_transcript(language_codes, [self._generated_transcripts,]) |         return self._find_transcript(language_codes, [self._generated_transcripts,]) | ||||||
|  | @ -170,9 +170,9 @@ class TranscriptList(): | ||||||
|         :param language_codes: A list of language codes in a descending priority. For example, if this is set to |         :param language_codes: A list of language codes in a descending priority. For example, if this is set to | ||||||
|         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if |         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if | ||||||
|         it fails to do so. |         it fails to do so. | ||||||
|         :type languages: [str] |         :type languages: list[str] | ||||||
|         :return: the found Transcript |         :return: the found Transcript | ||||||
|         :rtype: Transcript |         :rtype Transcript: | ||||||
|         :raises: NoTranscriptFound |         :raises: NoTranscriptFound | ||||||
|         """ |         """ | ||||||
|         return self._find_transcript(language_codes, [self._manually_created_transcripts,]) |         return self._find_transcript(language_codes, [self._manually_created_transcripts,]) | ||||||
|  | @ -252,7 +252,7 @@ class Transcript(): | ||||||
|         Loads the actual transcript data. |         Loads the actual transcript data. | ||||||
| 
 | 
 | ||||||
|         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys |         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys | ||||||
|         :rtype: [{'text': str, 'start': float, 'end': float}] |         :rtype [{'text': str, 'start': float, 'end': float}]: | ||||||
|         """ |         """ | ||||||
|         return _TranscriptParser().parse( |         return _TranscriptParser().parse( | ||||||
|             self._http_client.get(self._url).text |             self._http_client.get(self._url).text | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue