Skip to content

Commit 4b10e53

Browse files
Merge pull request #95 from datalogics-cgreen/add-go
Add complex flow sample program written in Go
2 parents b62f963 + 4601173 commit 4b10e53

File tree

2 files changed

+127
-0
lines changed

2 files changed

+127
-0
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
module github.com/datalogics/pdf-rest-api-samples/go-ocr-extract
2+
3+
go 1.24.2
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
package main
2+
3+
import (
4+
"bytes"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"mime/multipart"
9+
"net/http"
10+
"os"
11+
)
12+
13+
// In this sample, we will show how to convert a scanned document into a PDF with
14+
// searchable and extractable text using Optical Character Recognition (OCR), and then
15+
// extract that text from the newly created document.
16+
//
17+
// First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the
18+
// output ID. Then, we will send the output ID to the /extracted-text route, which will
19+
// return the newly added text.
20+
21+
func main() {
22+
baseUrl := "https://api.pdfrest.com/"
23+
24+
// Replace the values below with your input file's location and name
25+
inputFilePath := "/path/to/input.pdf"
26+
inputFileName := "input.pdf"
27+
28+
// Replace with your API key
29+
apiKey := "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
30+
31+
// Begin request to /pdf-with-ocr-text
32+
// Create a buffer and a multipart writer
33+
var ocrReqBody bytes.Buffer
34+
ocrReqWriter := multipart.NewWriter(&ocrReqBody)
35+
36+
// Open the input file
37+
fileField := "file"
38+
file, err := os.Open(inputFilePath)
39+
if err != nil {
40+
panic(err)
41+
}
42+
defer file.Close()
43+
44+
filePart, err := ocrReqWriter.CreateFormFile(fileField, inputFileName)
45+
if err != nil {
46+
panic(err)
47+
}
48+
_, err = io.Copy(filePart, file)
49+
if err != nil {
50+
panic(err)
51+
}
52+
53+
err = ocrReqWriter.Close()
54+
if err != nil {
55+
panic(err)
56+
}
57+
58+
// Create the HTTP request
59+
ocrReq, err := http.NewRequest("POST", baseUrl+"pdf-with-ocr-text", &ocrReqBody)
60+
if err != nil {
61+
panic(err)
62+
}
63+
64+
// Set the headers
65+
ocrReq.Header.Set("Content-Type", ocrReqWriter.FormDataContentType())
66+
ocrReq.Header.Set("Api-Key", apiKey)
67+
68+
// Send the request
69+
client := &http.Client{}
70+
ocrResp, err := client.Do(ocrReq)
71+
if err != nil {
72+
panic(err)
73+
}
74+
defer ocrResp.Body.Close()
75+
76+
var ocrRespData map[string]interface{}
77+
err = json.NewDecoder(ocrResp.Body).Decode(&ocrRespData)
78+
if err != nil {
79+
panic(err)
80+
}
81+
errorMessage, hasErrorMessage := ocrRespData["error"]
82+
if hasErrorMessage {
83+
fmt.Println("ERR:", errorMessage)
84+
} else {
85+
// Begin request to /extracted-text
86+
var extractReqBody bytes.Buffer
87+
extractReqWriter := multipart.NewWriter(&extractReqBody)
88+
89+
// Add the "id" form field
90+
err = extractReqWriter.WriteField("id", ocrRespData["outputId"].(string))
91+
if err != nil {
92+
panic(err)
93+
}
94+
err = extractReqWriter.Close()
95+
if err != nil {
96+
panic(err)
97+
}
98+
extractReq, err := http.NewRequest("POST", baseUrl+"extracted-text", &extractReqBody)
99+
if err != nil {
100+
panic(err)
101+
}
102+
103+
extractReq.Header.Set("Content-Type", extractReqWriter.FormDataContentType())
104+
extractReq.Header.Set("Api-Key", apiKey)
105+
106+
extractResp, err := client.Do(extractReq)
107+
if err != nil {
108+
panic(err)
109+
}
110+
defer extractResp.Body.Close()
111+
112+
var extractRespData map[string]interface{}
113+
err = json.NewDecoder(extractResp.Body).Decode(&extractRespData)
114+
if err != nil {
115+
panic(err)
116+
}
117+
errorMessage, hasErrorMessage := extractRespData["error"]
118+
if hasErrorMessage {
119+
fmt.Println("ERR:", errorMessage)
120+
} else {
121+
fmt.Println(extractRespData["fullText"])
122+
}
123+
}
124+
}

0 commit comments

Comments
 (0)