forked from janakiram180/Video-summarizer-tool
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchuck.py
36 lines (27 loc) · 1.02 KB
/
chuck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import nltk
def re_chunks(input, tokenizer):
sentences = nltk.tokenize.sent_tokenize(input)
# initialize
length = 0
chunk = ""
chunks = []
count = -1
for sentence in sentences:
count += 1
# add the no. of sentence tokens to the length counter
combined_length = len(tokenizer.tokenize(sentence)) + length
if combined_length <= 500: # if it doesn't exceed
chunk += sentence + " " # add the sentence to the chunk
length = combined_length # update the length counter
# if it is the last sentence
if count == len(sentences) - 1:
chunks.append(chunk.strip()) # save the chunk
else:
chunks.append(chunk.strip()) # save the chunk
# reset
length = 0
chunk = ""
# take care of the overflow sentence
chunk += sentence + " "
length = len(tokenizer.tokenize(sentence))
return chunks