added check if any transcripts are actually available

This commit is contained in:
Jonas Depoix 2019-12-20 16:28:35 +01:00
parent 20612ea7a3
commit f1e4754ca4
5 changed files with 1394 additions and 7 deletions

View File

@ -1,3 +1,11 @@
from ._api import YouTubeTranscriptApi from ._api import YouTubeTranscriptApi
from ._transcripts import TranscriptList, Transcript from ._transcripts import TranscriptList, Transcript
from ._errors import TranscriptsDisabled, NoTranscriptFound, CouldNotRetrieveTranscript, VideoUnavailable from ._errors import (
TranscriptsDisabled,
NoTranscriptFound,
CouldNotRetrieveTranscript,
VideoUnavailable,
NotTranslatable,
TranslationLanguageNotAvailable,
NoTranscriptAvailable,
)

View File

@ -11,7 +11,7 @@ class CouldNotRetrieveTranscript(Exception):
GITHUB_REFERRAL = ( GITHUB_REFERRAL = (
'\n\nIf you are sure that the described cause is not responsible for this error ' '\n\nIf you are sure that the described cause is not responsible for this error '
'and that a transcript should be retrievable, please create an issue at ' 'and that a transcript should be retrievable, please create an issue at '
'https://github.com/jdepoix/youtube-transcript-api/issues.' 'https://github.com/jdepoix/youtube-transcript-api/issues. '
'Please add which version of youtube_transcript_api you are using ' 'Please add which version of youtube_transcript_api you are using '
'and provide the information needed to replicate the error. ' 'and provide the information needed to replicate the error. '
'Also make sure that there are no open issues which already describe your problem!' 'Also make sure that there are no open issues which already describe your problem!'
@ -43,6 +43,10 @@ class TranscriptsDisabled(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'Subtitles are disabled for this video' CAUSE_MESSAGE = 'Subtitles are disabled for this video'
class NoTranscriptAvailable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'No transcripts are available for this video'
class NotTranslatable(CouldNotRetrieveTranscript): class NotTranslatable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'The requested language is not translatable' CAUSE_MESSAGE = 'The requested language is not translatable'

View File

@ -13,7 +13,12 @@ import re
from ._html_unescaping import unescape from ._html_unescaping import unescape
from ._errors import ( from ._errors import (
VideoUnavailable, NoTranscriptFound, TranscriptsDisabled, NotTranslatable, TranslationLanguageNotAvailable VideoUnavailable,
NoTranscriptFound,
TranscriptsDisabled,
NotTranslatable,
TranslationLanguageNotAvailable,
NoTranscriptAvailable,
) )
from ._settings import WATCH_URL from ._settings import WATCH_URL
@ -38,9 +43,14 @@ class TranscriptListFetcher():
raise TranscriptsDisabled(video_id) raise TranscriptsDisabled(video_id)
return json.loads(splitted_html[1].split(',"videoDetails')[0].replace('\n', ''))[ captions_json = json.loads(
'playerCaptionsTracklistRenderer' splitted_html[1].split(',"videoDetails')[0].replace('\n', '')
] )['playerCaptionsTracklistRenderer']
if 'captionTracks' not in captions_json:
raise NoTranscriptAvailable(video_id)
return captions_json
def _fetch_html(self, video_id): def _fetch_html(self, video_id):
return self._http_client.get(WATCH_URL.format(video_id=video_id)).text.replace( return self._http_client.get(WATCH_URL.format(video_id=video_id)).text.replace(

File diff suppressed because one or more lines are too long

View File

@ -5,7 +5,13 @@ import os
import httpretty import httpretty
from youtube_transcript_api import YouTubeTranscriptApi, VideoUnavailable, NoTranscriptFound, TranscriptsDisabled from youtube_transcript_api import (
YouTubeTranscriptApi,
TranscriptsDisabled,
NoTranscriptFound,
VideoUnavailable,
NoTranscriptAvailable,
)
def load_asset(filename): def load_asset(filename):
@ -88,6 +94,16 @@ class TestYouTubeTranscriptApi(TestCase):
with self.assertRaises(NoTranscriptFound): with self.assertRaises(NoTranscriptFound):
YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', languages=['cz']) YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', languages=['cz'])
def test_get_transcript__exception_if_no_transcript_available(self):
httpretty.register_uri(
httpretty.GET,
'https://www.youtube.com/watch',
body=load_asset('youtube_no_transcript_available.html.static')
)
with self.assertRaises(NoTranscriptAvailable):
YouTubeTranscriptApi.get_transcript('MwBPvcYFY2E')
def test_get_transcripts(self): def test_get_transcripts(self):
video_id_1 = 'video_id_1' video_id_1 = 'video_id_1'
video_id_2 = 'video_id_2' video_id_2 = 'video_id_2'