updated WebVTT and SRT formatters
This commit is contained in:
		
							parent
							
								
									3b2e6e253d
								
							
						
					
					
						commit
						68ca703ae0
					
				|  | @ -79,8 +79,19 @@ class TextFormatter(Formatter): | |||
|         """ | ||||
|         return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts]) | ||||
| 
 | ||||
| class _TextBasedFormatter(TextFormatter): | ||||
|     def _format_timestamp(self, hours, mins, secs, ms): | ||||
|         raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \ | ||||
|             'their own .format_timestamp() method.') | ||||
| 
 | ||||
|     def _format_transcript_header(self, lines): | ||||
|         raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \ | ||||
|             'their own _format_transcript_header method.') | ||||
| 
 | ||||
|     def _format_transcript_helper(self, i, time_text, line): | ||||
|         raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \ | ||||
|             'their own _format_transcript_helper method.')     | ||||
|              | ||||
| class WebVTTFormatter(Formatter): | ||||
|     def _seconds_to_timestamp(self, time): | ||||
|         """Helper that converts `time` into a transcript cue timestamp. | ||||
| 
 | ||||
|  | @ -95,96 +106,55 @@ class WebVTTFormatter(Formatter): | |||
|         '00:00:06.930' | ||||
|         """ | ||||
|         time = float(time) | ||||
|         hours, remainder = divmod(time, 3600) | ||||
|         mins, secs = divmod(remainder, 60) | ||||
|         hours_float, remainder = divmod(time, 3600) | ||||
|         mins_float, secs_float = divmod(remainder, 60) | ||||
|         hours, mins, secs = int(hours_float), int(mins_float), int(secs_float) | ||||
|         ms = int(round((time - int(time))*1000, 2)) | ||||
|         return "{:02.0f}:{:02.0f}:{:02.0f}.{:03d}".format(hours, mins, secs, ms) | ||||
|         return self._format_timestamp(hours, mins, secs, ms) | ||||
| 
 | ||||
|     def format_transcript(self, transcript, **kwargs): | ||||
|         """A basic implementation of WEBVTT formatting. | ||||
|         """A basic implementation of WEBVTT/SRT formatting. | ||||
| 
 | ||||
|         :param transcript: | ||||
|         :reference: https://www.w3.org/TR/webvtt1/#introduction-caption | ||||
|         :reference:  | ||||
|         https://www.w3.org/TR/webvtt1/#introduction-caption | ||||
|         https://www.3playmedia.com/blog/create-srt-file/ | ||||
|         """ | ||||
|         lines = [] | ||||
|         for i, line in enumerate(transcript): | ||||
|             if i < len(transcript) - 1: | ||||
|                 # Looks ahead, use next start time since duration value | ||||
|                 # would create an overlap between start times. | ||||
|             end = line['start'] + line['duration'] | ||||
|             time_text = "{} --> {}".format( | ||||
|                 self._seconds_to_timestamp(line['start']), | ||||
|                     self._seconds_to_timestamp(transcript[i + 1]['start']) | ||||
|                 self._seconds_to_timestamp( | ||||
|                     transcript[i + 1]['start']  | ||||
|                     if i < len(transcript) - 1 and transcript[i + 1]['start'] < end else end | ||||
|                 ) | ||||
|             else: | ||||
|                 # Reached the end, cannot look ahead, use duration now. | ||||
|                 duration = line['start'] + line['duration'] | ||||
|                 time_text = "{} --> {}".format( | ||||
|                     self._seconds_to_timestamp(line['start']), | ||||
|                     self._seconds_to_timestamp(duration) | ||||
|             ) | ||||
|             lines.append("{}\n{}".format(time_text, line['text'])) | ||||
|             lines.append(self._format_transcript_helper(i, time_text, line)) | ||||
| 
 | ||||
|         return "WEBVTT\n\n" + "\n\n".join(lines) + "\n" | ||||
|         return self._format_transcript_header(lines) | ||||
| 
 | ||||
|     def format_transcripts(self, transcripts, **kwargs): | ||||
|         """A basic implementation of WEBVTT formatting for a list of transcripts. | ||||
| 
 | ||||
|         :param transcripts: | ||||
|         :reference: https://www.w3.org/TR/webvtt1/#introduction-caption | ||||
|         """ | ||||
|         return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts]) | ||||
| 
 | ||||
| class SRTFormatter(Formatter): | ||||
|     def _seconds_to_timestamp(self, time): | ||||
|         """Helper that converts `time` into a transcript cue timestamp for SRT. | ||||
| 
 | ||||
|         :param time: a float representing time in seconds. | ||||
|         :type time: float | ||||
|         :return: a string formatted as a cue timestamp, 'HH:MM:SS,MS' | ||||
|         :rtype str | ||||
|         :example: | ||||
|         >>> self._seconds_to_timestamp(6.93) | ||||
|         '00:00:06,930' | ||||
|         """ | ||||
|         time = float(time) | ||||
|         hours, remainder = divmod(time, 3600) | ||||
|         mins, secs = divmod(remainder, 60) | ||||
|         ms = int(round((time - int(time))*1000, 2)) | ||||
|         return "{:02.0f}:{:02.0f}:{:02.0f},{:03d}".format(hours, mins, secs, ms) | ||||
| 
 | ||||
|     def format_transcript(self, transcript, **kwargs): | ||||
|         """Converts a transcript into SRT formatting. | ||||
| 
 | ||||
|         :param transcript: | ||||
|         :reference: https://www.3playmedia.com/blog/create-srt-file/ | ||||
|         """ | ||||
|         lines = [] | ||||
|         for i, line in enumerate(transcript): | ||||
|             if i < len(transcript) - 1: | ||||
|                 # Looks ahead, use next start time since duration value | ||||
|                 # would create an overlap between start times. | ||||
|                 time_text = "{} --> {}".format( | ||||
|                     self._seconds_to_timestamp(line['start']), | ||||
|                     self._seconds_to_timestamp(transcript[i + 1]['start']) | ||||
|                 ) | ||||
|             else: | ||||
|                 # Reached the end, cannot look ahead, use duration now. | ||||
|                 duration = line['start'] + line['duration'] | ||||
|                 time_text = "{} --> {}".format( | ||||
|                     self._seconds_to_timestamp(line['start']), | ||||
|                     self._seconds_to_timestamp(duration) | ||||
|                 ) | ||||
|             lines.append("{}\n{}\n{}".format(i + 1, time_text, line['text'])) | ||||
| class SRTFormatter(_TextBasedFormatter): | ||||
|     def _format_timestamp(self, hours, mins, secs, ms): | ||||
|         return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, mins, secs, ms) | ||||
|      | ||||
|     def _format_transcript_header(self, lines): | ||||
|         return "\n\n".join(lines) + "\n" | ||||
| 
 | ||||
|     def format_transcripts(self, transcripts, **kwargs): | ||||
|         """Converts a list of transcripts into SRT formatting. | ||||
|     def _format_transcript_helper(self, i, time_text, line): | ||||
|         return "{}\n{}\n{}".format(i + 1, time_text, line['text']) | ||||
| 
 | ||||
|         :param transcript: | ||||
|         :reference: https://www.3playmedia.com/blog/create-srt-file/ | ||||
|         """ | ||||
|         return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts]) | ||||
| 
 | ||||
| class WebVTTFormatter(_TextBasedFormatter): | ||||
|     def _format_timestamp(self, hours, mins, secs, ms): | ||||
|         return "{:02d}:{:02d}:{:02d}.{:03d}".format(hours, mins, secs, ms) | ||||
| 
 | ||||
|     def _format_transcript_header(self, lines): | ||||
|         return "WEBVTT\n\n" + "\n\n".join(lines) + "\n" | ||||
| 
 | ||||
|     def _format_transcript_helper(self, i, time_text, line): | ||||
|         return "{}\n{}".format(time_text, line['text']) | ||||
| 
 | ||||
| 
 | ||||
| class FormatterLoader(object): | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue