This repository has been archived by the owner on Jan 15, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ushahidi2pdf.rb
executable file
·207 lines (179 loc) · 5.33 KB
/
ushahidi2pdf.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
require 'rubygems'
$LOAD_PATH.unshift("/Users/chris/git/prawn/lib")
require 'prawn'
require 'json'
require "rubygems"
require "httparty"
require 'crack'
require 'ostruct'
require 'ap'
#1. call "scriptname cache" => cache.json is filled with instance data.
#2. call "scriptname print" => pdf is typeset and output
INSTANCE_URL= "http://haiti.ushahidi.com/api?task=incidents&by=sinceid&resp=json&id="
# INSTANCE_URL= "http://roguegenius.com/africa/api?task=incidents&by=sinceid&resp=json&id="
# You must adjust the limit to reflect the total number of instances.
READ_LIMIT=4040
#lower it for test printing
PRINT_LIMIT=100000
#anything larger than this will be trimmed
MAX_INCIDENT_SIZE= 1500
module PrintingPress
# ========
# = book =
# ========
class Book
def initialize
@book = Prawn::Document.new
Prawn.debug = true
@x_pos = ((@book.bounds.width / 2) - 150)
@y_pos = ((@book.bounds.height / 2) + 200)
end
def typeset_header
@book.font("Helvetica", :style => :bold)
@book.font_size 10
end
def typeset_body
@book.font_size 10
@book.font("Courier", :style => :normal)
end
def typeset_timestamp
@book.font_size 6
end
def print(incidents)
incidents.each do |i|
if i.empty?
p "empty incident, skipping ..."
elsif i['incident']['incidentdescription'].length > MAX_INCIDENT_SIZE
p "incident too big: #{i['incident']['incidentdescription']}"
elsif i['incident']['incidentid'] == @previous_id
p "duplicate incident, skipping ..."
else
@current_id= i['incident']['incidentid']
if @current_id.to_i > PRINT_LIMIT
@book.render_file("book.pdf")
`open book.pdf`
return true
end
p "printing! This incident: #{@current_id}"
@previous_id ||= "unset"
p "previous incident: #{@previous_id}"
@book.bounding_box([@x_pos, @y_pos], :width => 300, :height => 500) do
typeset_header
@book.text((i['incident']['incidenttitle']))
@book.text("\n")
typeset_body
@book.text((i['incident']['incidentdescription']).gsub(/IDUshahidi:\W+\d+/, '').gsub(/[\n]+/, "\n"))
typeset_timestamp
@book.text("\n")
@book.text(i['incident']['incidentdate']) unless i['incident']['incidentdate'] == nil
@book.text(i['incident']['locationname']) unless i['incident']['locationname'] == nil
@previous_id= @current_id
end
@book.start_new_page
end
end
p "done rendering, now printing ..."
@book.render_file("book.pdf")
`open book.pdf`
end
end
# =========
# = cache =
# =========
class Cache
def full?
File.exist?("cache.json")
end
def read(filename="cache.json")
p "... reading cache"
jsonfile= File.open(filename, "r")
results= jsonfile.read
p "about to parse!"
parsed_results= Crack::JSON.parse(results)
jsonfile.close
p "... incidents loaded"
incidents= parsed_results['incidents']
return incidents
end
def discover_file(filename)
if File.exists?(filename)
jsonfile=File.open(filename, "a")
else
jsonfile= File.new(filename, "w")
end
return jsonfile
end
#Worker.fill_cache relies on these two writers.
def write_json(data, filename= "cache.json")
jsonfile= discover_file(filename)
jsonfile.write(JSON.pretty_generate(data))
jsonfile.write(",")
jsonfile.close
p "... cache written"
end
def write_text(text, filename= "cache.json")
jsonfile= discover_file(filename)
jsonfile.write(text)
jsonfile.close
end
end
# ===========
# = crawler =
# ===========
class Crawler
def crawl(url)
p "starting to crawl: #{url}"
data= HTTParty.get(url).body
parsed_data= Crack::JSON.parse(data)
@incidents= parsed_data["payload"]["incidents"]
return @incidents
end
end
# ==========
# = worker =
# ==========
class Worker
def fill_cache
crawler= PrintingPress::Crawler.new
cache= PrintingPress::Cache.new
sinceid=0
cache.write_text('{"incidents":[')
until sinceid > READ_LIMIT do
# incrementing sinceid to work around API limits
theurl= "#{INSTANCE_URL}#{sinceid}"
incidents= crawler.crawl(theurl)
incidents.each do |json|
prev_json ||= "none"
#write the json incident record unless it's the same as the last one
cache.write_json(json)
# increment the filter trap
prev_json= json
p prev_json
end
#be nice
sleep 1
sinceid = incidents.last["incident"]["incidentid"].to_i
end
p "writing the closing bit"
cache.write_text('{}]}')
end
end
end
# ===========
# = routine =
# ===========
if ARGV[0] == "cache"
worker=PrintingPress::Worker.new
cache=PrintingPress::Cache.new
if cache.full?
p "looks like your cache has data in it. Delete it or print."
else
worker.fill_cache
end
elsif ARGV[0] == "print"
book= PrintingPress::Book.new
cache= PrintingPress::Cache.new
book.print(incidents= cache.read)
else
p "usage: ruby ushahidi2pdf.rb [cache|print]"
end