Add new formatters.py module
This commit is contained in:
		
							parent
							
								
									7a47fc83ad
								
							
						
					
					
						commit
						f3dc6f508f
					
				|  | @ -0,0 +1,91 @@ | ||||||
|  | import json | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Formatter(object): | ||||||
|  |     """Formatter should be used as an abstract base class. | ||||||
|  |      | ||||||
|  |     Formatter classes should inherit from this class and implement | ||||||
|  |     their own .format() method which should return a string. A  | ||||||
|  |     transcript is represented by a List of Dictionary items. | ||||||
|  | 
 | ||||||
|  |     :param transcript: list representing 1 or more transcripts | ||||||
|  |     :type transcript: list | ||||||
|  |     """ | ||||||
|  |     def __init__(self, transcript): | ||||||
|  |         if not isinstance(transcript, list): | ||||||
|  |             raise TypeError("'transcript' must be of type: List") | ||||||
|  | 
 | ||||||
|  |         self._transcript = transcript | ||||||
|  |      | ||||||
|  |     def format(self, **kwargs): | ||||||
|  |         raise NotImplementedError('A subclass of Formatter must implement ' \ | ||||||
|  |             'their own .format() method.') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class JSONFormatter(Formatter): | ||||||
|  |     def format(self, **kwargs): | ||||||
|  |         """Converts a transcript into a JSON string. | ||||||
|  | 
 | ||||||
|  |         :return: A JSON string representation of the transcript.' | ||||||
|  |         :rtype str | ||||||
|  |         """ | ||||||
|  |         return json.dumps(self._transcript, **kwargs) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TextFormatter(Formatter): | ||||||
|  |     def format(self, **kwargs): | ||||||
|  |         """Converts a transcript into plain text with no timestamps. | ||||||
|  | 
 | ||||||
|  |         :return: all transcript text lines separated by newline breaks.' | ||||||
|  |         :rtype str | ||||||
|  |         """ | ||||||
|  |         return "\n".join(line['text'] for line in self._transcript) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class WebVTTFormatter(Formatter): | ||||||
|  |     def _seconds_to_timestamp(self, time): | ||||||
|  |         """Helper that converts `time` into a transcript cue timestamp. | ||||||
|  | 
 | ||||||
|  |         :reference: https://www.w3.org/TR/webvtt1/#webvtt-timestamp | ||||||
|  | 
 | ||||||
|  |         :param time: a float representing time in seconds. | ||||||
|  |         :type time: float | ||||||
|  |         :return: a string formatted as a cue timestamp, 'HH:MM:SS.MS' | ||||||
|  |         :rtype str | ||||||
|  |         :example: | ||||||
|  |         >>> self._seconds_to_timestamp(6.93) | ||||||
|  |         '00:00:06.930' | ||||||
|  |         """ | ||||||
|  |         time = float(time) | ||||||
|  |         hours, mins, secs = ( | ||||||
|  |             int(time) // 3600, | ||||||
|  |             int(time) // 60, | ||||||
|  |             int(time) % 60, | ||||||
|  |         ) | ||||||
|  |         ms = int(round((time - int(time))*1000, 2)) | ||||||
|  |         return "{:02d}:{:02d}:{:02d}.{:03d}".format(hours, mins, secs, ms) | ||||||
|  |      | ||||||
|  |     def format(self, **kwargs): | ||||||
|  |         """A basic implementation of WEBVTT formatting. | ||||||
|  | 
 | ||||||
|  |         :reference: https://www.w3.org/TR/webvtt1/#introduction-caption | ||||||
|  |         """ | ||||||
|  |         lines = [] | ||||||
|  |         for i, line in enumerate(self._transcript): | ||||||
|  |             if i < len(self._transcript)-1: | ||||||
|  |                 # Looks ahead, use next start time since duration value | ||||||
|  |                 # would create an overlap between start times. | ||||||
|  |                 time_text = "{} --> {}".format( | ||||||
|  |                     self._seconds_to_timestamp(line['start']), | ||||||
|  |                     self._seconds_to_timestamp(self._transcript[i+1]['start']) | ||||||
|  |                 ) | ||||||
|  |             else: | ||||||
|  |                 # Reached the end, cannot look ahead, use duration now. | ||||||
|  |                 duration = line['start'] + line['duration'] | ||||||
|  |                 time_text = "{} --> {}".format( | ||||||
|  |                     self._seconds_to_timestamp(line['start']), | ||||||
|  |                     self._seconds_to_timestamp(duration) | ||||||
|  |                 ) | ||||||
|  |             lines.append("{}\n{}".format(time_text, line['text'])) | ||||||
|  |          | ||||||
|  |         return "WEBVTT\n\n" + "\n\n".join(lines) + "\n" | ||||||
		Loading…
	
		Reference in New Issue