Skip to content

Commit

Permalink
feat(rapidocr_onnxruntime): support en char rec (#272)
Browse files Browse the repository at this point in the history
* feat: sup en char rec

* test: add en char rec test
  • Loading branch information
Joker1212 authored Dec 3, 2024
1 parent 1905451 commit 01a5a65
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 28 deletions.
65 changes: 37 additions & 28 deletions python/rapidocr_onnxruntime/cal_rec_boxes/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def get_box_direction(box: np.ndarray) -> str:

@staticmethod
def cal_ocr_word_box(
rec_txt: str, box: np.ndarray, rec_word_info: List[Tuple[str, List[int]]]
rec_txt: str, box: np.ndarray, rec_word_info: List[Tuple[str, List[int]]]
) -> Tuple[List[str], List[List[int]]]:
"""Calculate the detection frame for each word based on the results of recognition and detection of ocr
汉字坐标是单字的
Expand All @@ -77,47 +77,52 @@ def cal_ocr_word_box(
word_box_list = []
word_box_content_list = []
cn_width_list = []
en_width_list = []
cn_col_list = []
for word, word_col, state in zip(word_list, word_col_list, state_list):
if state == "cn":
if len(word_col) != 1:
char_seq_length = (word_col[-1] - word_col[0] + 1) * cell_width
char_width = char_seq_length / (len(word_col) - 1)
cn_width_list.append(char_width)
cn_col_list += word_col
word_box_content_list += word
else:
cell_x_start = bbox_x_start + int(word_col[0] * cell_width)
cell_x_end = bbox_x_start + int((word_col[-1] + 1) * cell_width)
cell = [
[cell_x_start, bbox_y_start],
[cell_x_end, bbox_y_start],
[cell_x_end, bbox_y_end],
[cell_x_start, bbox_y_end],
]
word_box_list.append(cell)
word_box_content_list.append("".join(word))
en_col_list = []

if len(cn_col_list) != 0:
if len(cn_width_list) != 0:
avg_char_width = np.mean(cn_width_list)
def cal_char_width(width_list, word_col_):
if len(word_col) == 1:
return
char_total_length = (word_col_[-1] - word_col_[0] + 1) * cell_width
char_width = char_total_length / (len(word_col_) - 1)
width_list.append(char_width)

def cal_box(col_list, width_list, word_box_list_):
if len(col_list) == 0:
return
if len(width_list) != 0:
avg_char_width = np.mean(width_list)
else:
avg_char_width = (bbox_x_end - bbox_x_start) / len(rec_txt)

for center_idx in cn_col_list:
for center_idx in col_list:
center_x = (center_idx + 0.5) * cell_width
cell_x_start = max(int(center_x - avg_char_width / 2), 0) + bbox_x_start
cell_x_end = (
min(int(center_x + avg_char_width / 2), bbox_x_end - bbox_x_start)
+ bbox_x_start
min(int(center_x + avg_char_width / 2), bbox_x_end - bbox_x_start)
+ bbox_x_start
)
cell = [
[cell_x_start, bbox_y_start],
[cell_x_end, bbox_y_start],
[cell_x_end, bbox_y_end],
[cell_x_start, bbox_y_end],
]
word_box_list.append(cell)
word_box_list_.append(cell)

for word, word_col, state in zip(word_list, word_col_list, state_list):
if state == "cn":
cal_char_width(cn_width_list, word_col)
cn_col_list += word_col
word_box_content_list += word
else:
cal_char_width(en_width_list, word_col)
en_col_list += word_col
word_box_content_list += word

cal_box(cn_col_list, cn_width_list, word_box_list)
cal_box(en_col_list, en_width_list, word_box_list)
sorted_word_box_list = sorted(word_box_list, key=lambda box: box[0][0])
return word_box_content_list, sorted_word_box_list

Expand Down Expand Up @@ -256,5 +261,9 @@ def order_points(box: List[List[int]]) -> List[List[int]]:
p23[np.where(p23[:, 1] == np.min(p23[:, 1]))],
p23[np.where(p23[:, 1] == np.max(p23[:, 1]))],
)

# 解决单字矩形框重叠导致多个相同框的情况
p1 = p1[:1, :]
p2 = p2[:1, :]
p3 = p3[:1, :]
p4 = p4[:1, :]
return np.array([p1, p2, p3, p4]).reshape((-1, 2)).tolist()
4 changes: 4 additions & 0 deletions python/tests/test_ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,10 @@ def test_input_three_ndim_one_channel():
"text_vertical_words.png",
["已", "取", "之", "時", "不", "參", "一", "人", "見", "而"],
),
(
"issue_170.png",
["T", "E", "S", "T"],
),
],
)
def test_word_ocr(img_name: str, words: List[str]):
Expand Down

0 comments on commit 01a5a65

Please sign in to comment.