-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path4-calculate_dist_UC.R
210 lines (179 loc) · 7.37 KB
/
4-calculate_dist_UC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!Rscript
# Calculates distances between pairs of polygons in a shape file. Results are
# written to a tab-separated file.
#
# Run this script from the command line with no arguments to see a usage
# message.
library(sp, quietly = TRUE, verbose = FALSE)
library(rgdal, quietly = TRUE, verbose = FALSE)
library(rgeos, quietly = TRUE, verbose = FALSE)
library(data.table, quietly = TRUE, verbose = FALSE)
# Loading shp file ####
fed1 <- readOGR(dsn = "./outputs/clipped_shp", layer = "crop_fed1_bhrd")
fed2 <- readOGR(dsn = "./outputs/clipped_shp", layer = "crop_fed2_bhrd")
# Sources a file in the same directory as this script.
sourceFromPkg <- function(file) {
# Modified from stackoverflow answer https://stackoverflow.com/a/15373917
pkgDir <- function() {
cmdArgs <- commandArgs(trailingOnly = FALSE)
fopt <- "--file="
match <- grep(fopt, cmdArgs)
if (length(match) > 0) {
dirname(sub(fopt, "", cmdArgs[match]))
} else if ("ofile" %in% names(sys.frames()[[1]])) {
dirname(sys.frames()[[1]]$ofile)
} else {
# Run selection in RStudio
"." # this is wrong, but at least it doesn't crash
}
}
source(file.path(pkgDir(), file))
}
sourceFromPkg("functions.R")
# Writes distances between pairs of polygons to a tab-separated file
#
# Output is a CSV file (tab-separated by default) with 3 columns: ID of the 1st
# shape, ID of the 2nd shape, distance between the 2 shapes. The first shape is
# the sequence of shapes from index `firstRow` to index `lastRow` inclusive.
# Each shape is measured against all shapes which come after it in `shapes`.
#
# @param shapes The spatial objects to measure.
# @param fileName Name of the file to write to.
# @param firstRow Index of first row to measure.
# @param lastRow Index of last row to measure.
# @param idCol Name of the ID column. This is the value written to the output
# file to identify the shapes.
# @param append If TRUE, rows will be added to the existing file. If FALSE, an
# existing file will be overwritten.
# @param sep Separator for CSV file.
#
CalcBlockPairwiseDistances <- function(shapes, fileName, firstRow, lastRow, idCol, append = FALSE, sep = "\t", verbose = TRUE) {
nr <- nrow(shapes)
.workForRow <- function(n) nr - n
.workForRows <- function(from, to) sum(.workForRow(from:to))
pb <- function(n) NULL
if (verbose) {
pb <- ElapsedTimeProgressBarFn(.workForRows(firstRow, lastRow),
buildTxtReportFn(sprintf("Calculating distances to polygons %d:%d, block %d of %d\n", firstRow, lastRow, thisBlock, totalBlocks)))
}
# Treat warnings as errors inside the loop
oldOpt <- options(warn = 2)
workDone <- 1
# For each polygon in the block...
for (i in firstRow:lastRow) {
# Calculate distance to all polygons with ID > this polygon's id
# Save this set of results in a data frame then write them all out at once.
# This speeds up processing and simplifies handling of incomplete output files.
js <- (i + 1):nr
distances <- data.frame(nrow = length(js), ncol = 3)
idx <- 1
pb(workDone)
for (j in js) {
workDone <- workDone + 1
distances[idx, 1] <- shapes@data[i, idCol]
distances[idx, 2] <- shapes@data[j, idCol]
distances[idx, 3] <- gDistance(shapes[i, ], shapes[j, ])
idx <- idx + 1
}
# Add the distances to the file
fwrite(distances, fileName, sep = sep, row.names = FALSE, col.names = FALSE, append = append)
append <- TRUE
}
pb(close = TRUE)
# Restore warnings
options(oldOpt)
}
# Calculates the rows remaining to be measured from an incomplete file.
CalcRemainingRange <- function(block, nPolygons, outFile, sep = "\t") {
# Read the file
old <- fread(file = outFile, sep = sep, header = FALSE)
# Sanity check
if (ncol(old) != 3 || class(old[[1]]) != "integer" || class(old[[2]]) != "integer" || class(old[[3]]) != "numeric")
stop(sprintf("Invalid data file, unexpected format: %s", outFile))
# Check the first line
if (old[[1, 1]] != block[1])
stop(sprintf("Invalid data file, expected row 1 column 1 to be %d, was %d: %s", block[1], old[[1, 1]], outFile))
if (old[[1, 2]] != block[1] + 1)
stop(sprintf("Invalid data file, expected row 1 column 2 to be %d, was %d: %s", block[1] + 1, old[[1, 2]], outFile))
# Check the last line
old <- old[nrow(old), ]
if (old[[2]] != nPolygons || old[[1]] >= old[[2]])
stop(sprintf("Data file '%s' doesn't seem to match input or is corrupt,\n expected the last row to look like \"<n> %d <distance>\"", outFile, nPolygons))
firstRow <- old[[1]] + 1
lastRow <- block[2]
if (firstRow == nPolygons)
stop(sprintf("Data file '%s' appears to be complete already", outFile))
# Return the new range
c(firstRow = firstRow, lastRow = lastRow)
}
CalcPairwiseDistances <- function(dsn, layer, idCol, outFile, thisBlock, totalBlocks, continue = FALSE) {
shp <- readOGR(dsn = dsn, layer = layer, verbose = FALSE)
colNames <- names(shp@data)
if (!idCol %in% colNames) {
if (length(colNames) == 0)
msg <- "There are no columns in this layer."
else if(length(colNames) == 1)
msg <- sprintf("The only column is named '%s'.", colNames)
else
msg <- sprintf("Available columns are: %s\n", paste(names(shp@data), collapse = ", "))
stop(paste(sprintf("'%s' is not a valid column name.", idCol), msg, collapse = " "))
}
# Check for duplicate IDs
ti <- table(shp@data[[idCol]])
if (sum(ti > 1) > 0)
stop(sprintf("Shape file contains duplicate %s values", idCol))
nPolygons <- nrow(shp)
block <- BlockRange(thisBlock, totalBlocks, nPolygons)
if (continue && file.exists(outFile)) {
newblock <- CalcRemainingRange(block, nPolygons, outFile)
cat(sprintf("Adding to existing file, skipping polygons %d to %d\n", block[1], newblock[1] - 1))
block <- newblock
} else if (file.exists(outFile)) {
stop(sprintf("File %s already exists, skipping", outFile))
}
cat(sprintf("Writing %s, id column '%s'\n", outFile, idCol))
CalcBlockPairwiseDistances(shp, outFile, block[1], block[2], idCol, append = continue)
}
####################################################################################
# Test whether running under a GUI such as RStudio
# Automatically detect if running inside RStudio
isGUI <- isRStudio()
# Uncomment this line if you are running in another GUI
#isGUI <- TRUE
if (isGUI) {
####
# EDIT THIS SECTION if you are running in a GUI/IDE
# This is just an example
continue <- FALSE
dsn <- "Outputs/clipped_shp"
layer <- "crop_fed1_bhrd"
idCol <- "ID_UC0"
thisBlock <- 1
totalBlocks <- 1
# End of section to edit
####
} else {
# Not running in a GUI, so no need for this file to be edited.
# Parameters are passed in on the command line
# Process command line arguments
args <- commandArgs(TRUE)
cat("\n")
continue <- length(args) >= 1 && args[1] == "--continue"
if (continue)
args <- tail(args, -1)
if (!(length(args) == 3 || length(args) == 5))
stop("Usage: [-continue] map layer idColumn [thisBlock totalBlocks]")
dsn <- args[1]
layer <- args[2]
idCol <- args[3]
thisBlock <- 1
totalBlocks <- 1
if (length(args) == 5) {
thisBlock <- as.integer(args[4])
totalBlocks <- as.integer(args[5])
}
}
st <- proc.time()
outFile <- sprintf("%s-%03d-%03d.txt", layer, thisBlock, totalBlocks)
CalcPairwiseDistances(dsn, layer, idCol, outFile, thisBlock, totalBlocks, continue)
cat(sprintf("Elapsed time %g secs\n", (proc.time() - st)[3]))