_html_regex static property of _TranscriptParser()
also rename TEXT_FORMATS -> FORMATTING TAGS
This commit is contained in:
		
							parent
							
								
									72e9781528
								
							
						
					
					
						commit
						eda8ddb38f
					
				|  | @ -27,7 +27,7 @@ from ._errors import ( | ||||||
| ) | ) | ||||||
| from ._settings import WATCH_URL | from ._settings import WATCH_URL | ||||||
| 
 | 
 | ||||||
| TEXT_FORMATS = [ | _FORMATTING_TAGS = [ | ||||||
|     'strong',  # important |     'strong',  # important | ||||||
|     'em',  # emphasized |     'em',  # emphasized | ||||||
|     'b',  # bold |     'b',  # bold | ||||||
|  | @ -341,11 +341,11 @@ class Transcript(object): | ||||||
| class _TranscriptParser(object): | class _TranscriptParser(object): | ||||||
|     def __init__(self, preserve_formatting=False): |     def __init__(self, preserve_formatting=False): | ||||||
|         self.preserve_formatting = preserve_formatting |         self.preserve_formatting = preserve_formatting | ||||||
|  |         self._html_regex = self.get_html_regex() | ||||||
| 
 | 
 | ||||||
|     @property |     def get_html_regex(self): | ||||||
|     def html_regex(self): |  | ||||||
|         if self.preserve_formatting: |         if self.preserve_formatting: | ||||||
|             formats_regex = '|'.join(TEXT_FORMATS) |             formats_regex = '|'.join(_FORMATTING_TAGS) | ||||||
|             formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>' |             formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>' | ||||||
|             html_regex = re.compile(formats_regex, re.IGNORECASE) |             html_regex = re.compile(formats_regex, re.IGNORECASE) | ||||||
|         else: |         else: | ||||||
|  | @ -355,7 +355,7 @@ class _TranscriptParser(object): | ||||||
|     def parse(self, plain_data): |     def parse(self, plain_data): | ||||||
|         return [ |         return [ | ||||||
|             { |             { | ||||||
|                 'text': re.sub(self.html_regex, '', unescape(xml_element.text)), |                 'text': re.sub(self._html_regex, '', unescape(xml_element.text)), | ||||||
|                 'start': float(xml_element.attrib['start']), |                 'start': float(xml_element.attrib['start']), | ||||||
|                 'duration': float(xml_element.attrib.get('dur', '0.0')), |                 'duration': float(xml_element.attrib.get('dur', '0.0')), | ||||||
|             } |             } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue