support for html unescaping for all python versions added
This commit is contained in:
parent
fe1783688e
commit
2b3a6f3a71
|
@ -0,0 +1,19 @@
|
|||
import sys
|
||||
|
||||
if sys.version_info.major == 3 and sys.version_info.minor >= 4:
|
||||
# Python 3.4+
|
||||
from html import unescape
|
||||
else:
|
||||
if sys.version_info.major <= 2:
|
||||
# Python 2
|
||||
import HTMLParser
|
||||
|
||||
html_parser = HTMLParser.HTMLParser()
|
||||
else:
|
||||
# Python 3.0 - 3.3
|
||||
import html.parser
|
||||
|
||||
html_parser = html.parser.HTMLParser()
|
||||
|
||||
def unescape(string):
|
||||
return html_parser.unescape(string)
|
|
@ -6,6 +6,8 @@ import logging
|
|||
|
||||
import requests
|
||||
|
||||
from .html_unescaping import unescape
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -112,7 +114,7 @@ class _TranscriptParser():
|
|||
def parse(self):
|
||||
return [
|
||||
{
|
||||
'text': re.sub(self.HTML_TAG_REGEX, '', xml_element.text),
|
||||
'text': re.sub(self.HTML_TAG_REGEX, '', unescape(xml_element.text)),
|
||||
'start': float(xml_element.attrib['start']),
|
||||
'duration': float(xml_element.attrib['dur']),
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue