commit
4e228d9978
10
README.md
10
README.md
|
@ -54,7 +54,7 @@ You can also add the `languages` param if you want to make sure the transcripts
|
|||
YouTubeTranscriptApi.get_transcripts(video_ids, languages=['de', 'en'])
|
||||
```
|
||||
|
||||
It's a list of language codes in a descending priority. In this example it will first try to fetch the german transcript (`'de'`) and then fetch the english transcipt (`'en'`) if it fails to do so. As I can't provide a complete list of all working language codes with full certainty, you may have to play around with the language codes a bit, to find the one which is working for you!
|
||||
It's a list of language codes in a descending priority. In this example it will first try to fetch the german transcript (`'de'`) and then fetch the english transcript (`'en'`) if it fails to do so. As I can't provide a complete list of all working language codes with full certainty, you may have to play around with the language codes a bit, to find the one which is working for you!
|
||||
|
||||
To get transcripts for a list fo video ids you can call:
|
||||
|
||||
|
@ -72,10 +72,16 @@ Execute the CLI script using the video ids as parameters and the results will be
|
|||
youtube_transcript_api <first_video_id> <second_video_id> ...
|
||||
```
|
||||
|
||||
The CLI also gives you the option to provide a list of preferred languages:
|
||||
|
||||
```
|
||||
youtube_transcript_api <first_video_id> <second_video_id> ... --languages de en
|
||||
```
|
||||
|
||||
If you would prefer to write it into a file or pipe it into another application, you can also output the results as json using the following line:
|
||||
|
||||
```
|
||||
youtube_transcript_api --json <first_video_id> <second_video_id> ... > transcripts.json
|
||||
youtube_transcript_api <first_video_id> <second_video_id> ... --languages de en --json > transcripts.json
|
||||
```
|
||||
|
||||
## Warning
|
||||
|
|
2
setup.py
2
setup.py
|
@ -11,7 +11,7 @@ def get_long_description():
|
|||
|
||||
setuptools.setup(
|
||||
name="youtube_transcript_api",
|
||||
version="0.1.2",
|
||||
version="0.1.3",
|
||||
author="Jonas Depoix",
|
||||
author_email="jonas.depoix@web.de",
|
||||
description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles and it does not require a headless browser, like other selenium based solutions do!",
|
||||
|
|
|
@ -6,18 +6,57 @@ from pprint import pprint
|
|||
|
||||
import logging
|
||||
|
||||
import argparse
|
||||
|
||||
from ._api import YouTubeTranscriptApi
|
||||
|
||||
|
||||
def parse_args(args):
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
'This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. '
|
||||
'It also works for automatically generated subtitles and it does not require a headless browser, like '
|
||||
'other selenium based solutions do!'
|
||||
)
|
||||
)
|
||||
parser.add_argument('video_ids', nargs='*', type=str, help='List of YouTube video IDs.')
|
||||
parser.add_argument(
|
||||
'--languages',
|
||||
nargs='*',
|
||||
default=[],
|
||||
type=str,
|
||||
help=(
|
||||
'A list of language codes in a descending priority. For example, if this is set to "de en" it will first '
|
||||
'try to fetch the german transcript (de) and then fetch the english transcipt (en) if it fails to do so. '
|
||||
'As I can\'t provide a complete list of all working language codes with full certainty, you may have to '
|
||||
'play around with the language codes a bit, to find the one which is working for you!'
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
'--json',
|
||||
action='store_const',
|
||||
const=True,
|
||||
default=False,
|
||||
help='If this flag is set the output will be JSON formatted.',
|
||||
)
|
||||
|
||||
return parser.parse_args(args)
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig()
|
||||
|
||||
if len(sys.argv) <= 1:
|
||||
print('No YouTube video id was found')
|
||||
elif sys.argv[1] == '--json':
|
||||
print(json.dumps(YouTubeTranscriptApi.get_transcripts(sys.argv[2:], continue_after_error=True)[0]))
|
||||
parsed_args = parse_args(sys.argv[1:])
|
||||
transcripts, _ = YouTubeTranscriptApi.get_transcripts(
|
||||
parsed_args.video_ids,
|
||||
languages=parsed_args.languages,
|
||||
continue_after_error=True
|
||||
)
|
||||
|
||||
if parsed_args.json:
|
||||
print(json.dumps(transcripts))
|
||||
else:
|
||||
pprint(YouTubeTranscriptApi.get_transcripts(sys.argv[1:], continue_after_error=True)[0])
|
||||
pprint(transcripts)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in New Issue