103 lines
3.7 KiB
Python
103 lines
3.7 KiB
Python
from unittest import TestCase
|
|
from mock import MagicMock
|
|
|
|
import os
|
|
|
|
import httpretty
|
|
|
|
from youtube_transcript_api._api import YouTubeTranscriptApi
|
|
|
|
|
|
def load_asset(filename):
|
|
with open('{dirname}/assets/{filename}'.format(dirname=os.path.dirname(__file__), filename=filename)) as file:
|
|
return file.read()
|
|
|
|
|
|
class TestYouTubeTranscriptApi(TestCase):
|
|
def setUp(self):
|
|
httpretty.enable()
|
|
httpretty.register_uri(
|
|
httpretty.GET,
|
|
'https://www.youtube.com/watch',
|
|
body=load_asset('youtube.html')
|
|
)
|
|
httpretty.register_uri(
|
|
httpretty.GET,
|
|
'https://www.youtube.com/api/timedtext',
|
|
body=load_asset('transcript.xml')
|
|
)
|
|
|
|
def tearDown(self):
|
|
httpretty.disable()
|
|
|
|
def test_get_transcript(self):
|
|
transcript = YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8')
|
|
|
|
self.assertEqual(
|
|
transcript,
|
|
[
|
|
{'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
|
|
{'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16},
|
|
{'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
|
|
]
|
|
)
|
|
|
|
def test_get_transcript__correct_language_is_used(self):
|
|
YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', ['de', 'en'])
|
|
query_string = httpretty.last_request().querystring
|
|
|
|
self.assertIn('lang', query_string)
|
|
self.assertEqual(len(query_string['lang']), 1)
|
|
self.assertEqual(query_string['lang'][0], 'de')
|
|
|
|
def test_get_transcript__fallback_language_is_used(self):
|
|
httpretty.register_uri(
|
|
httpretty.GET,
|
|
'https://www.youtube.com/api/timedtext',
|
|
body=''
|
|
)
|
|
|
|
YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', ['de', 'en'])
|
|
query_string = httpretty.last_request().querystring
|
|
|
|
self.assertIn('lang', query_string)
|
|
self.assertEqual(len(query_string['lang']), 1)
|
|
self.assertEqual(query_string['lang'][0], 'en')
|
|
|
|
def test_get_transcript__exception_is_raised_when_not_available(self):
|
|
httpretty.register_uri(
|
|
httpretty.GET,
|
|
'https://www.youtube.com/api/timedtext',
|
|
body=''
|
|
)
|
|
|
|
with self.assertRaises(YouTubeTranscriptApi.CouldNotRetrieveTranscript):
|
|
YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8')
|
|
|
|
def test_get_transcripts(self):
|
|
video_id_1 = 'video_id_1'
|
|
video_id_2 = 'video_id_2'
|
|
languages = ['de', 'en']
|
|
YouTubeTranscriptApi.get_transcript = MagicMock()
|
|
|
|
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
|
|
|
|
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages)
|
|
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages)
|
|
self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2)
|
|
|
|
def test_get_transcripts__stop_on_error(self):
|
|
YouTubeTranscriptApi.get_transcript = MagicMock(side_effect=Exception('Error'))
|
|
|
|
with self.assertRaises(Exception):
|
|
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'])
|
|
|
|
def test_get_transcripts__continue_on_error(self):
|
|
video_id_1 = 'video_id_1'
|
|
video_id_2 = 'video_id_2'
|
|
YouTubeTranscriptApi.get_transcript = MagicMock(side_effect=Exception('Error'))
|
|
|
|
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
|
|
|
|
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, None)
|
|
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, None) |