_FORMATTING_TAGS is now a static property of _TranscriptParser; _get_html_regext is now private; removed preserve_formatting property of _TranscriptParser
This commit is contained in:
		
							parent
							
								
									ca93c48fa1
								
							
						
					
					
						commit
						8c62e5e276
					
				|  | @ -1,7 +1,7 @@ | ||||||
| import sys | import sys | ||||||
| 
 | 
 | ||||||
| # This can only be tested by using different python versions, therefore it is not covered by coverage.py | # This can only be tested by using different python versions, therefore it is not covered by coverage.py | ||||||
| if sys.version_info.major == 2: # pragma: no cover | if sys.version_info.major == 2:  # pragma: no cover | ||||||
|     reload(sys) |     reload(sys) | ||||||
|     sys.setdefaultencoding('utf-8') |     sys.setdefaultencoding('utf-8') | ||||||
| 
 | 
 | ||||||
|  | @ -95,6 +95,7 @@ class TranscriptList(object): | ||||||
|     This object represents a list of transcripts. It can be iterated over to list all transcripts which are available |     This object represents a list of transcripts. It can be iterated over to list all transcripts which are available | ||||||
|     for a given YouTube video. Also it provides functionality to search for a transcript in a given language. |     for a given YouTube video. Also it provides functionality to search for a transcript in a given language. | ||||||
|     """ |     """ | ||||||
|  | 
 | ||||||
|     def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages): |     def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages): | ||||||
|         """ |         """ | ||||||
|         The constructor is only for internal use. Use the static build method instead. |         The constructor is only for internal use. Use the static build method instead. | ||||||
|  | @ -191,7 +192,7 @@ class TranscriptList(object): | ||||||
|         :rtype Transcript: |         :rtype Transcript: | ||||||
|         :raises: NoTranscriptFound |         :raises: NoTranscriptFound | ||||||
|         """ |         """ | ||||||
|         return self._find_transcript(language_codes, [self._generated_transcripts,]) |         return self._find_transcript(language_codes, [self._generated_transcripts]) | ||||||
| 
 | 
 | ||||||
|     def find_manually_created_transcript(self, language_codes): |     def find_manually_created_transcript(self, language_codes): | ||||||
|         """ |         """ | ||||||
|  | @ -205,7 +206,7 @@ class TranscriptList(object): | ||||||
|         :rtype Transcript: |         :rtype Transcript: | ||||||
|         :raises: NoTranscriptFound |         :raises: NoTranscriptFound | ||||||
|         """ |         """ | ||||||
|         return self._find_transcript(language_codes, [self._manually_created_transcripts,]) |         return self._find_transcript(language_codes, [self._manually_created_transcripts]) | ||||||
| 
 | 
 | ||||||
|     def _find_transcript(self, language_codes, transcript_dicts): |     def _find_transcript(self, language_codes, transcript_dicts): | ||||||
|         for language_code in language_codes: |         for language_code in language_codes: | ||||||
|  | @ -287,7 +288,8 @@ class Transcript(object): | ||||||
|         """ |         """ | ||||||
|         response = self._http_client.get(self._url) |         response = self._http_client.get(self._url) | ||||||
|         return _TranscriptParser(preserve_formatting=preserve_formatting).parse( |         return _TranscriptParser(preserve_formatting=preserve_formatting).parse( | ||||||
|             _raise_http_errors(response, self.video_id).text,) |             _raise_http_errors(response, self.video_id).text, | ||||||
|  |         ) | ||||||
| 
 | 
 | ||||||
|     def __str__(self): |     def __str__(self): | ||||||
|         return '{language_code} ("{language}"){translation_description}'.format( |         return '{language_code} ("{language}"){translation_description}'.format( | ||||||
|  | @ -319,24 +321,24 @@ class Transcript(object): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class _TranscriptParser(object): | class _TranscriptParser(object): | ||||||
|     def __init__(self, preserve_formatting=False): |     _FORMATTING_TAGS = [ | ||||||
|         self.preserve_formatting = preserve_formatting |         'strong',  # important | ||||||
|         self._FORMATTING_TAGS = [ |         'em',  # emphasized | ||||||
|             'strong',  # important |         'b',  # bold | ||||||
|             'em',  # emphasized |         'i',  # italic | ||||||
|             'b',  # bold |         'mark',  # marked | ||||||
|             'i',  # italic |         'small',  # smaller | ||||||
|             'mark',  # marked |         'del',  # deleted | ||||||
|             'small',  # smaller |         'ins',  # inserted | ||||||
|             'del',  # deleted |         'sub',  # subscript | ||||||
|             'ins',  # inserted |         'sup',  # superscript | ||||||
|             'sub',  # subscript |     ] | ||||||
|             'sup',  # superscript |  | ||||||
|             ] |  | ||||||
|         self._html_regex = self.get_html_regex() |  | ||||||
| 
 | 
 | ||||||
|     def get_html_regex(self): |     def __init__(self, preserve_formatting=False): | ||||||
|         if self.preserve_formatting: |         self._html_regex = self._get_html_regex(preserve_formatting) | ||||||
|  | 
 | ||||||
|  |     def _get_html_regex(self, preserve_formatting): | ||||||
|  |         if preserve_formatting: | ||||||
|             formats_regex = '|'.join(self._FORMATTING_TAGS) |             formats_regex = '|'.join(self._FORMATTING_TAGS) | ||||||
|             formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>' |             formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>' | ||||||
|             html_regex = re.compile(formats_regex, re.IGNORECASE) |             html_regex = re.compile(formats_regex, re.IGNORECASE) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue