Skip to content

Commit 7a98f82

Browse files
committed
Split leptonica out of tesseract-sys
This is done because `cargo` can only specify one library to link to per crate: https://doc.rust-lang.org/cargo/reference/build-scripts.html#the-links-manifest-key I've had to change the image used, becuase Tesseract had started reading the apostrophe as a double quote. I could have changed the tests to adapt, but thought it better not to test an incorrect OCR. #7
1 parent 47b6683 commit 7a98f82

File tree

9 files changed

+98
-67
lines changed

9 files changed

+98
-67
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ license = "MIT"
99

1010
[dependencies]
1111
libc = "0.1"
12+
leptonica-sys = "0.1.0"
1213

1314
[dependencies.tesseract-sys]
1415
version = "0.2"

img.png

22 KB
Loading

img.txt

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
Rust is a systems programming language
2-
that runs blazingly fast, prevents almost all
3-
crashes*, and eliminates data races.
4-
5-
Show me more!
6-
1+
Hundreds of companies around the world are using Rust in production today for fast, low-
2+
resource, cross-platform solutions. Software you know and love, like Firefox, Dropbox, and
3+
Cloudflare, uses Rust. From startups to large corporations, from embedded devices to
4+
scalable web services, Rust is a great fit.

src/lib.rs

Lines changed: 56 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,83 @@
1-
extern crate tesseract_sys;
1+
extern crate leptonica_sys;
22
extern crate libc;
3+
extern crate tesseract_sys;
34

4-
use tesseract_sys::*;
5+
use leptonica_sys::pixRead;
6+
use std::ffi::CStr;
57
use std::ffi::CString;
68
use std::ptr;
79
use std::str;
8-
use std::ffi::CStr;
9-
10+
use tesseract_sys::{
11+
TessBaseAPI, TessBaseAPICreate, TessBaseAPIDelete, TessBaseAPIGetUTF8Text, TessBaseAPIInit3,
12+
TessBaseAPIRecognize, TessBaseAPISetImage2, TessBaseAPISetVariable,
13+
};
1014

1115
pub struct Tesseract {
12-
raw: *mut TessBaseAPI
16+
raw: *mut TessBaseAPI,
1317
}
1418

1519
impl Drop for Tesseract {
16-
fn drop(&mut self) {
17-
println!("Ave Imperator! Nos morituri te salutamus.");
18-
unsafe { TessBaseAPIDelete(self.raw) }
19-
}
20+
fn drop(&mut self) {
21+
println!("Ave Imperator! Nos morituri te salutamus.");
22+
unsafe { TessBaseAPIDelete(self.raw) }
23+
}
2024
}
2125

2226
fn cs(string: &str) -> CString {
23-
// do not call as_ptr yet, since the data will be freed before we return
24-
CString::new(string).unwrap()
27+
// do not call as_ptr yet, since the data will be freed before we return
28+
CString::new(string).unwrap()
2529
}
2630

2731
impl Tesseract {
28-
pub fn new() -> Tesseract {
29-
Tesseract {
30-
raw: unsafe { TessBaseAPICreate() }
31-
}
32-
}
33-
pub fn set_lang(&self, language: &str) -> i32 {
34-
let cs_language = cs(language);
35-
unsafe { TessBaseAPIInit3(self.raw, ptr::null(), cs_language.as_ptr()) }
36-
}
37-
pub fn set_image(&self, filename: &str) {
38-
let cs_filename = cs(filename);
39-
unsafe {
40-
let img = pixRead(cs_filename.as_ptr());
41-
TessBaseAPISetImage2(self.raw, img);
42-
}
43-
}
44-
pub fn set_variable(&self, name: &str, value: &str) -> i32 {
45-
let cs_name = cs(name);
46-
let cs_value = cs(value);
47-
unsafe { TessBaseAPISetVariable(self.raw, cs_name.as_ptr(), cs_value.as_ptr()) }
48-
}
49-
pub fn recognize(&self) -> i32 {
50-
unsafe {
51-
TessBaseAPIRecognize(self.raw, ptr::null_mut())
52-
}
53-
}
54-
pub fn get_text(&self) -> &str {
55-
unsafe {
56-
str::from_utf8(CStr::from_ptr(TessBaseAPIGetUTF8Text(self.raw)).to_bytes()).unwrap()
57-
}
58-
}
32+
pub fn new() -> Tesseract {
33+
Tesseract {
34+
raw: unsafe { TessBaseAPICreate() },
35+
}
36+
}
37+
pub fn set_lang(&self, language: &str) -> i32 {
38+
let cs_language = cs(language);
39+
unsafe { TessBaseAPIInit3(self.raw, ptr::null(), cs_language.as_ptr()) }
40+
}
41+
pub fn set_image(&self, filename: &str) {
42+
let cs_filename = cs(filename);
43+
unsafe {
44+
let img = pixRead(cs_filename.as_ptr());
45+
TessBaseAPISetImage2(self.raw, img);
46+
}
47+
}
48+
pub fn set_variable(&self, name: &str, value: &str) -> i32 {
49+
let cs_name = cs(name);
50+
let cs_value = cs(value);
51+
unsafe { TessBaseAPISetVariable(self.raw, cs_name.as_ptr(), cs_value.as_ptr()) }
52+
}
53+
pub fn recognize(&self) -> i32 {
54+
unsafe { TessBaseAPIRecognize(self.raw, ptr::null_mut()) }
55+
}
56+
pub fn get_text(&self) -> &str {
57+
unsafe {
58+
str::from_utf8(CStr::from_ptr(TessBaseAPIGetUTF8Text(self.raw)).to_bytes()).unwrap()
59+
}
60+
}
5961
}
6062

6163
pub fn ocr(filename: &str, language: &str) -> String {
62-
let cube = Tesseract::new();
63-
cube.set_lang(language);
64-
cube.set_image(filename);
65-
cube.recognize();
66-
return cube.get_text().to_string()
64+
let cube = Tesseract::new();
65+
cube.set_lang(language);
66+
cube.set_image(filename);
67+
cube.recognize();
68+
return cube.get_text().to_string();
6769
}
6870

69-
7071
#[test]
71-
fn blah(){
72-
ocr("img.png", "eng");
72+
fn blah() {
73+
ocr("img.png", "eng");
7374
}
7475

7576
#[test]
7677
fn it_works() {
77-
let cube = Tesseract::new();
78-
cube.set_lang("eng");
79-
cube.set_image("img.png");
80-
cube.recognize();
81-
println!("{:?}", cube.get_text());
78+
let cube = Tesseract::new();
79+
cube.set_lang("eng");
80+
cube.set_image("img.png");
81+
cube.recognize();
82+
println!("{:?}", cube.get_text());
8283
}

tesseract-sys/Cargo.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,15 @@ version = "0.2.0"
44
authors = ["Kevin Kwok <[email protected]>", "Chris Couzens <[email protected]>"]
55
description = "Rust Bindings for Tesseract OCR"
66
license = "MIT"
7+
edition = '2018'
8+
repository = "https://github.com/antimatter15/tesseract-rs"
9+
keywords = ["OCR", "tesseract"]
10+
categories = ["external-ffi-bindings", "multimedia::images"]
11+
links = "tesseract"
12+
build = "build.rs"
713

814
[dependencies]
15+
leptonica-sys = "0.1.0"
916

1017
[build-dependencies]
11-
bindgen = "0.45.0"
18+
bindgen = "0.49.3"

tesseract-sys/README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# tesseract-sys
2+
Rust bindings for [Tesseract](https://github.com/tesseract-ocr/tesseract)
3+
4+
## Building
5+
6+
This links to the C libraries [leptonica](https://github.com/danbloomberg/leptonica) and tesseract.
7+
8+
On Fedora 30 the additional dependencies can be installed by running:
9+
10+
```bash
11+
sudo dnf install leptonica-devel tesseract-devel clang
12+
```
13+
14+
On Termux 2019 (Android, Android on Chromebooks) the additional dependencies can be installed by running:
15+
16+
```bash
17+
pkg install libclang leptonica-dev tesseract-dev
18+
```

tesseract-sys/build.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ fn main() {
77
// Tell cargo to tell rustc to link the system tesseract
88
// and leptonica shared libraries.
99
println!("cargo:rustc-link-lib=tesseract");
10-
println!("cargo:rustc-link-lib=lept");
1110

1211
// The bindgen::Builder is the main entry point
1312
// to bindgen, and lets you build up options for
@@ -17,8 +16,13 @@ fn main() {
1716
// bindings for.
1817
.header("wrapper.h")
1918
.whitelist_function("^Tess.*")
20-
.whitelist_function("^pixRead.*")
21-
.whitelist_function("pixFreeData")
19+
.blacklist_type("Boxa")
20+
.blacklist_type("Pix")
21+
.blacklist_type("Pixa")
22+
.blacklist_type("_IO_FILE")
23+
.blacklist_type("_IO_codecvt")
24+
.blacklist_type("_IO_marker")
25+
.blacklist_type("_IO_wide_data")
2226
// Finish the builder and generate the bindings.
2327
.generate()
2428
// Unwrap the Result and panic on failure.
@@ -29,4 +33,4 @@ fn main() {
2933
bindings
3034
.write_to_file(out_path.join("bindings.rs"))
3135
.expect("Couldn't write bindings!");
32-
}
36+
}

tesseract-sys/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22
#![allow(non_camel_case_types)]
33
#![allow(non_snake_case)]
44

5+
use leptonica_sys::{Boxa, Pix, Pixa, _IO_FILE};
6+
57
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
68

79
#[cfg(test)]
810
mod tests {
911
use super::*;
12+
use leptonica_sys::{pixFreeData, pixRead};
1013
use std::ffi::CStr;
1114
use std::ffi::CString;
1215
use std::ptr;

tesseract-sys/wrapper.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
1-
#include <leptonica/allheaders.h>
21
#include <tesseract/capi.h>

0 commit comments

Comments
 (0)