我试图合并两个时间戳之间的句子,并以列表的形式返回。 输入的数据是。
'00:00:00.000 --> 00:00:00.740', '<v Davis, Tres>Hi, Tuim.</v>', '00:00:10.000 --> 00:01:00.581', '<v Crook, Tim. J.>Yeah. Hi, Tres.</v>', '00:00:10.000 --> 00:01:00.581', '<v Davis, Tres>On the deck. We will go back. I'm watching so not to what I'm thinking. </v>', '00:00:12.056 --> 00:00:00.721', '<v Davis, Tres>Ofcourse.</v>', '00:00:25.643 --> 00:00:00.775', '<v Davis, Tres>Thanks, Bye.</v>'Expected output is:
'00:00:00.000 --> 00:00:00.740', '<v Davis, Tres>Hi, Tuim.</v>', '00:00:10.000 --> 00:01:00.581', '<v Crook, Tim. J.>Yeah. Hi, Tres.</v>', '00:00:10.000 --> 00:01:00.581', '<v Davis, Tres>On the deck. We will go back. I'm watching so not to what I'm thinking. Ofcourse. Thanks, Bye.</v>'以下是我正在尝试的代码-
import itertools
lines = [
'00:00:00.000 --> 00:00:00.740', '<v Davis, Tres>Hi, Tuim.</v>',
'00:00:10.000 --> 00:01:00.581', '<v Crook, Tim. J.>Yeah. Hi, Tres.</v>',
'00:00:10.000 --> 00:01:00.581', '<v Davis, Tres>On the deck. We will go back. Iam watching so not to what Iam thinking. </v>',
'00:00:12.056 --> 00:00:00.721', '<v Davis, Tres>Ofcourse.</v>',
'00:00:25.643 --> 00:00:00.775', '<v Davis, Tres>Thanks, Bye.</v>'
def iterate_over_lines(lines):
lines_iter = iter(lines) # create an iterator for the list
while True:
timestamp = next(lines_iter)
speech = next(lines_iter)
yield (timestamp, speech) # Each "speech group" contains the timestamp, and what was said
except StopIteration:
break
def get_speaker(speech_group):
line = speech_group[1] # What was said is the second element of the group
speaker_out = line.split('<v ')[1].split('>')[0] # Extract speaker from line
return speaker_out
line_groups = iterate_over_lines(lines)
for speaker, lines_spoken in itertools.groupby(line_groups, key=get_speaker):
print(speaker)
print(*lines_spoken, sep="\n")
print("")
def get_speech(line):
speech_out = line.split('<v ')[1].split('>')[1].split('</v')[0] # Extract speech from line
return speech_out
def merge_group(group):
timestamp = None
speech = []
for ts, sp in group:
if timestamp is None:
timestamp = ts
speech.append(get_speech(sp))
line_groups = iterate_over_lines(lines)
result = []
for speaker, lines_spoken in itertools.groupby(line_groups, key=get_speaker):
timestamp, speech = merge_group(lines_spoken)
result.append(timestamp)
result.append(f"<v {speaker}>{speech}</v>")
print(result)
我得到的错误是。
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
TypeError: cannot unpack non-iterable NoneType object