added check if any transcripts are actually available

This commit is contained in:
Jonas Depoix 2019-12-20 16:28:35 +01:00
parent 20612ea7a3
commit f1e4754ca4
5 changed files with 1394 additions and 7 deletions

View File

@ -1,3 +1,11 @@
from ._api import YouTubeTranscriptApi
from ._transcripts import TranscriptList, Transcript
from ._errors import TranscriptsDisabled, NoTranscriptFound, CouldNotRetrieveTranscript, VideoUnavailable
from ._errors import (
TranscriptsDisabled,
NoTranscriptFound,
CouldNotRetrieveTranscript,
VideoUnavailable,
NotTranslatable,
TranslationLanguageNotAvailable,
NoTranscriptAvailable,
)

View File

@ -43,6 +43,10 @@ class TranscriptsDisabled(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'Subtitles are disabled for this video'
class NoTranscriptAvailable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'No transcripts are available for this video'
class NotTranslatable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'The requested language is not translatable'

View File

@ -13,7 +13,12 @@ import re
from ._html_unescaping import unescape
from ._errors import (
VideoUnavailable, NoTranscriptFound, TranscriptsDisabled, NotTranslatable, TranslationLanguageNotAvailable
VideoUnavailable,
NoTranscriptFound,
TranscriptsDisabled,
NotTranslatable,
TranslationLanguageNotAvailable,
NoTranscriptAvailable,
)
from ._settings import WATCH_URL
@ -38,9 +43,14 @@ class TranscriptListFetcher():
raise TranscriptsDisabled(video_id)
return json.loads(splitted_html[1].split(',"videoDetails')[0].replace('\n', ''))[
'playerCaptionsTracklistRenderer'
]
captions_json = json.loads(
splitted_html[1].split(',"videoDetails')[0].replace('\n', '')
)['playerCaptionsTracklistRenderer']
if 'captionTracks' not in captions_json:
raise NoTranscriptAvailable(video_id)
return captions_json
def _fetch_html(self, video_id):
return self._http_client.get(WATCH_URL.format(video_id=video_id)).text.replace(

File diff suppressed because one or more lines are too long

View File

@ -5,7 +5,13 @@ import os
import httpretty
from youtube_transcript_api import YouTubeTranscriptApi, VideoUnavailable, NoTranscriptFound, TranscriptsDisabled
from youtube_transcript_api import (
YouTubeTranscriptApi,
TranscriptsDisabled,
NoTranscriptFound,
VideoUnavailable,
NoTranscriptAvailable,
)
def load_asset(filename):
@ -88,6 +94,16 @@ class TestYouTubeTranscriptApi(TestCase):
with self.assertRaises(NoTranscriptFound):
YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', languages=['cz'])
def test_get_transcript__exception_if_no_transcript_available(self):
httpretty.register_uri(
httpretty.GET,
'https://www.youtube.com/watch',
body=load_asset('youtube_no_transcript_available.html.static')
)
with self.assertRaises(NoTranscriptAvailable):
YouTubeTranscriptApi.get_transcript('MwBPvcYFY2E')
def test_get_transcripts(self):
video_id_1 = 'video_id_1'
video_id_2 = 'video_id_2'