Skip to content

Commit 0720b3a

Browse files
committed
added inputs and outputs to create_alternative_choice
1 parent 8006601 commit 0720b3a

File tree

2 files changed

+44
-54
lines changed

2 files changed

+44
-54
lines changed

R/calc_exp.R

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ calc_exp <- function(dataset,
4444
# TODO: when revenue col exists, either automatically create col of ones (currently user must do this)
4545
# or allow catch arg to also be revenue. Use generic name (e.g. value)
4646

47-
# TODO: Add warning if the number of alternatives is high (or/also include in create_alternative_choice())
48-
4947
catch_name <- catch # save variable name for settings
5048

5149
dataZoneTrue <- Alt[["dataZoneTrue"]] # used for catch and other variables
@@ -62,7 +60,7 @@ calc_exp <- function(dataset,
6260
fleet <- as.integer(as.factor(dataset[[defineGroup]]))
6361
}
6462

65-
z_ind <- which(dataZoneTrue == 1)
63+
z_ind <- which(dataZoneTrue == 1) # index for which observations are included in the model
6664
fleet <- fleet[z_ind]
6765

6866
areas <- choice[z_ind] # mapping to to the map file (zones)
@@ -90,7 +88,6 @@ calc_exp <- function(dataset,
9088
catch <- as.numeric(dataset[[catch]][z_ind])
9189

9290
if (price != "none" && !is_value_empty(price)) {
93-
9491
price <- as.numeric(dataset[[price]][z_ind])
9592
catch <- catch * price
9693
}
@@ -255,7 +252,7 @@ calc_exp <- function(dataset,
255252
# Get the average for each date-area combination prior to calculating the window average
256253
tmp_df <- aggregate(catch ~ areas + fleet + dateFloor, data = new_df,
257254
FUN = mean, na.action = stats::na.pass, na.rm = TRUE)
258-
255+
259256
aggregate(catch ~ areas + fleet, data = tmp_df,
260257
FUN = mean, na.action = stats::na.pass, na.rm = TRUE)
261258
} else {

R/create_alternative_choice.R

Lines changed: 42 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@
8383
#' in the FishSET database.
8484
#' @param spatID Required when `alt_var = 'nearest point'`. Variable in `spat`
8585
#' that identifies the individual zones or areas.
86+
#' @param grid_sample Logical, TRUE if randomly sampling from all alternatives or
87+
#' from a separate point-grid table. If TRUE, then also need to specify the
88+
#' sample size (i.e., number of alternatives for each observation).
89+
#' @param grid_sample_n Integer, indicating the sample size for grid sampling
90+
#' (i.e., number of alternative to sample).
8691
#' @param outsample Logical, indicating whether this is for primary data or out-of
8792
#' sample data.
8893
#' @importFrom DBI dbExecute
@@ -137,110 +142,91 @@ create_alternative_choice <-
137142
fish.cent.name = NULL,
138143
spatname = NULL,
139144
spatID = NULL,
145+
grid_sample = FALSE,
146+
grid_sample_n = NULL,
140147
outsample = FALSE) {
141148

142149
# Call in datasets
143150
out <- data_pull(dat, project)
144151
dataset <- out$dataset
145152

153+
# Parse main data table
146154
if(outsample){ # save dat as out-of-sample dataset
147155
outsample_dat <- dat
148156
dat <- parse_data_name(outsample_dat, "outsample", project)
149157
} else {
150158
dat <- parse_data_name(dat, "main", project)
151159
}
152160

161+
# Parse spatial data object
153162
spat_out <- data_pull(spatname, project)
154163
spatdat <- spat_out$dataset
155164
spat <- parse_data_name(spatname, "spat", project)
156165

157-
column_check(dataset, cols = c(zoneID, occasion_var))
166+
# Check args ----
158167

168+
# Run a series of checks for various input args
169+
# Stop executing code in this function if anything here fails
170+
column_check(dataset, cols = c(zoneID, occasion_var))
159171
o_len <- length(occasion_var)
160-
161-
162172
if (occasion == "zonal centroid") {
163-
164173
if (o_len != 2 & is_value_empty(zone.cent.name)) {
165-
166174
stop("'zone.cent.name' is required.", call. = FALSE)
167175
}
168176
}
169177

170178
if (occasion == "fishing centroid") {
171-
172179
if (o_len != 2 & is_value_empty(fish.cent.name)) {
173-
174180
stop("'fish.cent.name' is required.", call. = FALSE)
175181
}
176182
}
177183

178184
if (alt_var == "zonal centroid" & is_value_empty(zone.cent.name)) {
179-
180185
stop("'zone.cent.name' is required.", call. = FALSE)
181186
}
182187

183188
if (alt_var == "fishing centroid" & is_value_empty(fish.cent.name)) {
184-
185189
stop("'fish.cent.name' is required.", call. = FALSE)
186190
}
187191

188-
zone_cent <- NULL
189-
fish_cent <- NULL
190-
191-
# check args ----
192-
193192
occasion_opts <- c("zonal centroid", "fishing centroid", "port", "lon-lat")
194193
alt_opts <- c("zonal centroid", "fishing centroid", "nearest point")
195194

196195
if (o_len > 2) stop("Invalid values for 'occasion_var'.", call. = FALSE)
197196

198197
if (!occasion %in% occasion_opts) {
199-
200198
stop("Invalid option for 'occasion'. Options are ",
201199
paste0(occasion_opts, collapse = ", "), ".", call. = FALSE)
202200
}
203201

204202
if (!alt_var %in% alt_opts) {
205-
206203
stop("Invalid option for 'alt_var'. Options are ",
207204
paste0(alt_opts, collapse = ", "), ".", call. = FALSE)
208205
}
209206

210207
# check occasion_var for lon-lat string match
211208
ll_occ_check <- function(occ) {
212-
213209
ll_check <- grepl("lon|lat", occ, ignore.case = TRUE)
214-
215210
if (!any(ll_check)) {
216-
217211
warning("Check that 'occasion_var' contains longitude and latitude ",
218-
"variables: ", paste0(occ, collapse = ", "),
219-
call. = FALSE)
212+
"variables: ", paste0(occ, collapse = ", "), call. = FALSE)
220213
}
221214
}
222215

223216
cent_check <- function(project, cent.tab, type = "zone") {
224-
225217
cent_type <- switch(type, zone = "Zonal", fish = "Fishing")
226-
227218
cent_exists <- table_exists(cent.tab, project)
228-
229219
if (!cent_exists) {
230-
231220
stop(cent_type, " centroid table must be saved to FishSET Database. Run ",
232221
"create_centroid().", call. = FALSE)
233222
}
234223

235224
cent_tab <- table_view(cent.tab, project)
236-
237225
if (!any(cent_tab$ZoneID %in% unique(dataset[[zoneID]]))) {
238-
239226
stop('Zones do not match between centroid table and zonal assignments ',
240227
'in primary data table. Rerun find_centroid() using same spatial data file ',
241228
'as was using with the assignment_column() function.', call. = FALSE)
242229
}
243-
244230
cent_tab
245231
}
246232

@@ -249,37 +235,33 @@ create_alternative_choice <-
249235
'mile', 'miles', 'nmile', 'nmiles')
250236

251237
if (!dist.unit %in% valid_units) {
252-
253238
stop(dist.unit, " is not an available unit. Unit options are: ",
254239
paste0(valid_units, collapse = ", "), call. = FALSE)
255240
}
256241

257242
# alt_var ----
258-
## zonal centroid ----
243+
# Initialize empty centroid vars
244+
zone_cent <- NULL
245+
fish_cent <- NULL
259246

247+
## zonal centroid ----
260248
if (alt_var == "zonal centroid") {
261-
262249
zone_cent <- cent_check(project, zone.cent.name, "zone")
263250
}
264251

265252
## fishing centroid ----
266-
267253
if (alt_var == "fishing centroid") {
268-
269254
fish_cent <- cent_check(project, fish.cent.name, "fish")
270255
}
271256

272257
## nearest point ----
273258
if (alt_var == "nearest point") {
274-
275259
if (is_value_empty(spatdat) | is_value_empty(spatID)) {
276-
277260
stop("'spat' and 'spatID' are required for alt_var = 'nearest point'",
278261
call. = FALSE)
279262
}
280263

281264
if (!any(unique(spatdat[[spatID]]) %in% unique(dataset[[zoneID]]))) {
282-
283265
stop("There are no shared zones between dat and spat. Check that 'spatID' ",
284266
"and 'zoneID' are correct, or rerun assignment_column().", call. = FALSE)
285267
}
@@ -292,53 +274,48 @@ create_alternative_choice <-
292274
if (is_value_empty(occasion_var) | o_len == 1) {
293275

294276
if (is.null(zone_cent)) {
295-
296277
zone_cent <- cent_check(project, zone.cent.name, "zone")
297278
}
298279

299280
} else if (o_len == 2) {
300-
301281
ll_occ_check(occasion_var)
302282

303283
} else {
304-
305284
stop("Invalid 'occasion_var'.", call. = FALSE)
306285
}
286+
307287
## fishing centroid ----
308288
} else if (occasion == "fishing centroid") {
309289

310290
if (is_value_empty(occasion_var) | o_len == 1) {
311291

312292
if (is.null(fish_cent)) {
313-
314293
fish_cent <- cent_check(project, fish.cent.name, "fish")
315294
}
316295

317296
} else if (o_len == 2) {
318-
319297
ll_occ_check(occasion_var)
320298

321299
} else {
322300

323301
stop("Invalid 'occasion_var'.", call. = FALSE)
324-
} ## port ----
325-
302+
}
303+
304+
## port ----
326305
} else if (occasion == "port") {
327306

328307
if (is_value_empty(occasion_var)) {
329-
330308
stop("Port column name required for 'occasion = port'.", call. = FALSE)
331309
}
332310

333311
if (o_len == 2) {
334-
335312
ll_occ_check(occasion_var)
336313
}
314+
337315
## lon-lat ----
338316
} else if (occasion == "lon-lat") {
339317

340318
if (o_len != 2) {
341-
342319
stop("'occasion_var' must contain a longitude and latitude column.",
343320
call. = FALSE)
344321
}
@@ -354,7 +331,6 @@ create_alternative_choice <-
354331
choice <- dataset[[zoneID]]
355332

356333
if (anyNA(choice) == TRUE) {
357-
358334
warning("No zone identified for ", sum(is.na(choice)), " observations. These ",
359335
"observations will be removed in future analyses.", call. = FALSE)
360336
}
@@ -366,7 +342,6 @@ create_alternative_choice <-
366342
zoneCount[zoneCount$n < min.haul, zoneID] <- NA
367343

368344
if (all(is.na(zoneCount[[zoneID]]))) {
369-
370345
stop("No zones meet criteria. No data will be included in further analyses.",
371346
" Check the 'min.haul' parameter or zone identification.", call. = FALSE)
372347
}
@@ -390,6 +365,21 @@ create_alternative_choice <-
390365
spat_out <- spat
391366
}
392367

368+
# Grid-sampling ----
369+
if(grid_sample){
370+
# Create a matrix with nrows = observations, and columns = alts. The first column = choice.
371+
rand_alts_mat <- matrix(choice, nrow = length(choice), ncol = 1)
372+
373+
draw_alts <- function(choice, all_alts, numAlts){
374+
c(choice, sample(all_alts[all_alts != choice], numAlts))
375+
}
376+
377+
rand_alts_mat <- do.call(rbind, lapply(rand_alts_mat, draw_alts, all_alts = zone_cent$ZoneID, numAlts = grid_sample_n))
378+
379+
} else {
380+
rand_alts_mat <- NULL
381+
}
382+
393383
# alt choice list ----
394384
Alt <- list(
395385
dataZoneTrue = dataZoneTrue, # index to identify which obs to use in model
@@ -410,7 +400,10 @@ create_alternative_choice <-
410400
fish_cent_name = fish.cent.name,
411401
spat = spatdat,
412402
spatID = spatID,
413-
spatname = spat
403+
spatname = spat,
404+
grid_sample = grid_sample,
405+
grid_sample_n = grid_sample_n,
406+
rand_alts_mat = rand_alts_mat
414407
)
415408

416409
# write Alt to datafile ----

0 commit comments

Comments
 (0)