From cdb7b90b821ad0b012dee385f22512e3b2c40696 Mon Sep 17 00:00:00 2001 From: Jonas Depoix Date: Mon, 11 Mar 2019 14:41:26 +0100 Subject: [PATCH 1/2] improved param parsing and CLI support for choosing languages --- youtube_transcript_api/__main__.py | 49 +++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/youtube_transcript_api/__main__.py b/youtube_transcript_api/__main__.py index 37bd7bb..205358a 100644 --- a/youtube_transcript_api/__main__.py +++ b/youtube_transcript_api/__main__.py @@ -6,18 +6,57 @@ from pprint import pprint import logging +import argparse + from ._api import YouTubeTranscriptApi +def parse_args(args): + parser = argparse.ArgumentParser( + description=( + 'This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. ' + 'It also works for automatically generated subtitles and it does not require a headless browser, like ' + 'other selenium based solutions do!' + ) + ) + parser.add_argument('video_ids', nargs='*', type=str, help='List of YouTube video IDs.') + parser.add_argument( + '--languages', + nargs='*', + default=[], + type=str, + help=( + 'A list of language codes in a descending priority. For example, if this is set to "de en" it will first ' + 'try to fetch the german transcript (de) and then fetch the english transcipt (en) if it fails to do so. ' + 'As I can\'t provide a complete list of all working language codes with full certainty, you may have to ' + 'play around with the language codes a bit, to find the one which is working for you!' + ), + ) + parser.add_argument( + '--json', + action='store_const', + const=True, + default=False, + help='If this flag is set the output will be JSON formatted.', + ) + + return parser.parse_args(args) + + def main(): logging.basicConfig() - if len(sys.argv) <= 1: - print('No YouTube video id was found') - elif sys.argv[1] == '--json': - print(json.dumps(YouTubeTranscriptApi.get_transcripts(sys.argv[2:], continue_after_error=True)[0])) + parsed_args = parse_args(sys.argv) + transcripts, _ = YouTubeTranscriptApi.get_transcripts( + parsed_args.video_ids, + languages=parsed_args.languages, + continue_after_error=True + ) + + if parsed_args.json: + print(json.dumps(transcripts)) else: - pprint(YouTubeTranscriptApi.get_transcripts(sys.argv[1:], continue_after_error=True)[0]) + pprint(transcripts) if __name__ == '__main__': From 0390b822c5ab45d5184e46df83f19a68798471a6 Mon Sep 17 00:00:00 2001 From: Jonas Depoix Date: Mon, 11 Mar 2019 14:54:44 +0100 Subject: [PATCH 2/2] README updated --- README.md | 10 ++++++++-- setup.py | 2 +- youtube_transcript_api/__main__.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9718aed..454f44a 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ You can also add the `languages` param if you want to make sure the transcripts YouTubeTranscriptApi.get_transcripts(video_ids, languages=['de', 'en']) ``` -It's a list of language codes in a descending priority. In this example it will first try to fetch the german transcript (`'de'`) and then fetch the english transcipt (`'en'`) if it fails to do so. As I can't provide a complete list of all working language codes with full certainty, you may have to play around with the language codes a bit, to find the one which is working for you! +It's a list of language codes in a descending priority. In this example it will first try to fetch the german transcript (`'de'`) and then fetch the english transcript (`'en'`) if it fails to do so. As I can't provide a complete list of all working language codes with full certainty, you may have to play around with the language codes a bit, to find the one which is working for you! To get transcripts for a list fo video ids you can call: @@ -72,10 +72,16 @@ Execute the CLI script using the video ids as parameters and the results will be youtube_transcript_api ... ``` +The CLI also gives you the option to provide a list of preferred languages: + +``` +youtube_transcript_api ... --languages de en +``` + If you would prefer to write it into a file or pipe it into another application, you can also output the results as json using the following line: ``` -youtube_transcript_api --json ... > transcripts.json +youtube_transcript_api ... --languages de en --json > transcripts.json ``` ## Warning diff --git a/setup.py b/setup.py index f13e8e0..38c2a9e 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ def get_long_description(): setuptools.setup( name="youtube_transcript_api", - version="0.1.2", + version="0.1.3", author="Jonas Depoix", author_email="jonas.depoix@web.de", description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles and it does not require a headless browser, like other selenium based solutions do!", diff --git a/youtube_transcript_api/__main__.py b/youtube_transcript_api/__main__.py index 205358a..f011ff1 100644 --- a/youtube_transcript_api/__main__.py +++ b/youtube_transcript_api/__main__.py @@ -46,7 +46,7 @@ def parse_args(args): def main(): logging.basicConfig() - parsed_args = parse_args(sys.argv) + parsed_args = parse_args(sys.argv[1:]) transcripts, _ = YouTubeTranscriptApi.get_transcripts( parsed_args.video_ids, languages=parsed_args.languages,