move preserve_formatting from init to fetch()
also remove from transcriptlist & transcriptlistfetcher
This commit is contained in:
parent
79fd63d585
commit
ca93c48fa1
|
@ -16,7 +16,7 @@ from ._errors import (
|
|||
|
||||
class YouTubeTranscriptApi(object):
|
||||
@classmethod
|
||||
def list_transcripts(cls, video_id, proxies=None, cookies=None, preserve_formatting=False):
|
||||
def list_transcripts(cls, video_id, proxies=None, cookies=None):
|
||||
"""
|
||||
Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object
|
||||
which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating
|
||||
|
@ -61,8 +61,6 @@ class YouTubeTranscriptApi(object):
|
|||
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
|
||||
:param cookies: a string of the path to a text file containing youtube authorization cookies
|
||||
:type cookies: str
|
||||
:param preserve_formatting: whether to keep select HTML text formatting
|
||||
:type preserve_formatting: bool
|
||||
:return: the list of available transcripts
|
||||
:rtype TranscriptList:
|
||||
"""
|
||||
|
@ -70,8 +68,7 @@ class YouTubeTranscriptApi(object):
|
|||
if cookies:
|
||||
http_client.cookies = cls._load_cookies(cookies, video_id)
|
||||
http_client.proxies = proxies if proxies else {}
|
||||
return TranscriptListFetcher(http_client).fetch(video_id,
|
||||
preserve_formatting=preserve_formatting)
|
||||
return TranscriptListFetcher(http_client).fetch(video_id)
|
||||
|
||||
@classmethod
|
||||
def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None,
|
||||
|
@ -137,8 +134,8 @@ class YouTubeTranscriptApi(object):
|
|||
:rtype [{'text': str, 'start': float, 'end': float}]:
|
||||
"""
|
||||
assert isinstance(video_id, str), "`video_id` must be a string"
|
||||
return cls.list_transcripts(video_id, proxies, cookies, preserve_formatting=preserve_formatting).find_transcript(languages).fetch()
|
||||
|
||||
return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch(preserve_formatting=preserve_formatting)
|
||||
|
||||
@classmethod
|
||||
def _load_cookies(cls, cookies, video_id):
|
||||
try:
|
||||
|
|
|
@ -40,12 +40,12 @@ class TranscriptListFetcher(object):
|
|||
def __init__(self, http_client):
|
||||
self._http_client = http_client
|
||||
|
||||
def fetch(self, video_id, preserve_formatting=False):
|
||||
def fetch(self, video_id):
|
||||
|
||||
return TranscriptList.build(
|
||||
self._http_client,
|
||||
video_id,
|
||||
self._extract_captions_json(self._fetch_video_html(video_id), video_id),
|
||||
preserve_formatting=preserve_formatting,
|
||||
)
|
||||
|
||||
def _extract_captions_json(self, html, video_id):
|
||||
|
@ -114,7 +114,7 @@ class TranscriptList(object):
|
|||
self._translation_languages = translation_languages
|
||||
|
||||
@staticmethod
|
||||
def build(http_client, video_id, captions_json, preserve_formatting=False):
|
||||
def build(http_client, video_id, captions_json):
|
||||
"""
|
||||
Factory method for TranscriptList.
|
||||
|
||||
|
@ -124,8 +124,6 @@ class TranscriptList(object):
|
|||
:type video_id: str
|
||||
:param captions_json: the JSON parsed from the YouTube pages static HTML
|
||||
:type captions_json: dict
|
||||
:param preserve_formatting: whether to keep select HTML text formatting
|
||||
:type preserve_formatting: bool
|
||||
:return: the created TranscriptList
|
||||
:rtype TranscriptList:
|
||||
"""
|
||||
|
@ -153,7 +151,6 @@ class TranscriptList(object):
|
|||
caption['languageCode'],
|
||||
caption.get('kind', '') == 'asr',
|
||||
translation_languages if caption.get('isTranslatable', False) else [],
|
||||
preserve_formatting=preserve_formatting,
|
||||
)
|
||||
|
||||
return TranscriptList(
|
||||
|
@ -253,8 +250,7 @@ class TranscriptList(object):
|
|||
|
||||
|
||||
class Transcript(object):
|
||||
def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages,
|
||||
preserve_formatting=False):
|
||||
def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):
|
||||
"""
|
||||
You probably don't want to initialize this directly. Usually you'll access Transcript objects using a
|
||||
TranscriptList.
|
||||
|
@ -268,8 +264,6 @@ class Transcript(object):
|
|||
:param language_code:
|
||||
:param is_generated:
|
||||
:param translation_languages:
|
||||
:param preserve_formatting: whether to keep select HTML text formatting
|
||||
:type preserve_formatting: bool
|
||||
"""
|
||||
self._http_client = http_client
|
||||
self.video_id = video_id
|
||||
|
@ -282,17 +276,17 @@ class Transcript(object):
|
|||
translation_language['language_code']: translation_language['language']
|
||||
for translation_language in translation_languages
|
||||
}
|
||||
self.preserve_formatting = preserve_formatting
|
||||
|
||||
def fetch(self):
|
||||
def fetch(self, preserve_formatting=False):
|
||||
"""
|
||||
Loads the actual transcript data.
|
||||
|
||||
:param preserve_formatting: whether to keep select HTML text formatting
|
||||
:type preserve_formatting: bool
|
||||
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
||||
:rtype [{'text': str, 'start': float, 'end': float}]:
|
||||
"""
|
||||
response = self._http_client.get(self._url)
|
||||
return _TranscriptParser(preserve_formatting=self.preserve_formatting).parse(
|
||||
return _TranscriptParser(preserve_formatting=preserve_formatting).parse(
|
||||
_raise_http_errors(response, self.video_id).text,)
|
||||
|
||||
def __str__(self):
|
||||
|
@ -321,7 +315,6 @@ class Transcript(object):
|
|||
language_code,
|
||||
True,
|
||||
[],
|
||||
preserve_formatting=self.preserve_formatting,
|
||||
)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue