_FORMATTING_TAGS is now a static property of _TranscriptParser; _get_html_regext is now private; removed preserve_formatting property of _TranscriptParser
This commit is contained in:
		
							parent
							
								
									ca93c48fa1
								
							
						
					
					
						commit
						8c62e5e276
					
				|  | @ -95,6 +95,7 @@ class TranscriptList(object): | ||||||
|     This object represents a list of transcripts. It can be iterated over to list all transcripts which are available |     This object represents a list of transcripts. It can be iterated over to list all transcripts which are available | ||||||
|     for a given YouTube video. Also it provides functionality to search for a transcript in a given language. |     for a given YouTube video. Also it provides functionality to search for a transcript in a given language. | ||||||
|     """ |     """ | ||||||
|  | 
 | ||||||
|     def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages): |     def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages): | ||||||
|         """ |         """ | ||||||
|         The constructor is only for internal use. Use the static build method instead. |         The constructor is only for internal use. Use the static build method instead. | ||||||
|  | @ -191,7 +192,7 @@ class TranscriptList(object): | ||||||
|         :rtype Transcript: |         :rtype Transcript: | ||||||
|         :raises: NoTranscriptFound |         :raises: NoTranscriptFound | ||||||
|         """ |         """ | ||||||
|         return self._find_transcript(language_codes, [self._generated_transcripts,]) |         return self._find_transcript(language_codes, [self._generated_transcripts]) | ||||||
| 
 | 
 | ||||||
|     def find_manually_created_transcript(self, language_codes): |     def find_manually_created_transcript(self, language_codes): | ||||||
|         """ |         """ | ||||||
|  | @ -205,7 +206,7 @@ class TranscriptList(object): | ||||||
|         :rtype Transcript: |         :rtype Transcript: | ||||||
|         :raises: NoTranscriptFound |         :raises: NoTranscriptFound | ||||||
|         """ |         """ | ||||||
|         return self._find_transcript(language_codes, [self._manually_created_transcripts,]) |         return self._find_transcript(language_codes, [self._manually_created_transcripts]) | ||||||
| 
 | 
 | ||||||
|     def _find_transcript(self, language_codes, transcript_dicts): |     def _find_transcript(self, language_codes, transcript_dicts): | ||||||
|         for language_code in language_codes: |         for language_code in language_codes: | ||||||
|  | @ -287,7 +288,8 @@ class Transcript(object): | ||||||
|         """ |         """ | ||||||
|         response = self._http_client.get(self._url) |         response = self._http_client.get(self._url) | ||||||
|         return _TranscriptParser(preserve_formatting=preserve_formatting).parse( |         return _TranscriptParser(preserve_formatting=preserve_formatting).parse( | ||||||
|             _raise_http_errors(response, self.video_id).text,) |             _raise_http_errors(response, self.video_id).text, | ||||||
|  |         ) | ||||||
| 
 | 
 | ||||||
|     def __str__(self): |     def __str__(self): | ||||||
|         return '{language_code} ("{language}"){translation_description}'.format( |         return '{language_code} ("{language}"){translation_description}'.format( | ||||||
|  | @ -319,9 +321,7 @@ class Transcript(object): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class _TranscriptParser(object): | class _TranscriptParser(object): | ||||||
|     def __init__(self, preserve_formatting=False): |     _FORMATTING_TAGS = [ | ||||||
|         self.preserve_formatting = preserve_formatting |  | ||||||
|         self._FORMATTING_TAGS = [ |  | ||||||
|         'strong',  # important |         'strong',  # important | ||||||
|         'em',  # emphasized |         'em',  # emphasized | ||||||
|         'b',  # bold |         'b',  # bold | ||||||
|  | @ -333,10 +333,12 @@ class _TranscriptParser(object): | ||||||
|         'sub',  # subscript |         'sub',  # subscript | ||||||
|         'sup',  # superscript |         'sup',  # superscript | ||||||
|     ] |     ] | ||||||
|         self._html_regex = self.get_html_regex() |  | ||||||
| 
 | 
 | ||||||
|     def get_html_regex(self): |     def __init__(self, preserve_formatting=False): | ||||||
|         if self.preserve_formatting: |         self._html_regex = self._get_html_regex(preserve_formatting) | ||||||
|  | 
 | ||||||
|  |     def _get_html_regex(self, preserve_formatting): | ||||||
|  |         if preserve_formatting: | ||||||
|             formats_regex = '|'.join(self._FORMATTING_TAGS) |             formats_regex = '|'.join(self._FORMATTING_TAGS) | ||||||
|             formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>' |             formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>' | ||||||
|             html_regex = re.compile(formats_regex, re.IGNORECASE) |             html_regex = re.compile(formats_regex, re.IGNORECASE) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue