-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathObserver troubles.Rmd
executable file
·344 lines (278 loc) · 16.7 KB
/
Observer troubles.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
---
title: "Observer troubles"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
##
To make this thing run, you need these files:
"Ole_Squash.RDS"
"Chinooky_Rates_merged.RDS"
"Unique_trips_Observed.RDS"
I keep things in different directories for my own sanity. Change the first chunk to match your directories to make things work.
```{r echo=F,warning=FALSE,include=F}
library(tidyverse)
library(here)
library(gmodels)
library(gplots)
library(readr)
```
```{r }
base.dir <- "/Users/ole.shelton/Github"
# this is where Unique_trips, Chinooky live.
data.dir <- paste0(base.dir,"/Orca_Salmon_DATA/Effort info/Trawl-ALASKA/")
# This is where Squash lives.
data.rec.dir <- paste0(base.dir,"/Orca_Salmon_DATA/Recoveries/Alaska Trawl/")
write.dir <- paste0(base.dir,"/spring-chinook-distribution/Processed Data/Effort Data/")
```
```{r warning=FALSE, echo=F,include=F }
##########################################################
####################################################################
####################################################################
####################################################################
####################################################################
# If there is an observer on board- how much of the catch are they surveying?
options(digits = 21)
setwd(data.dir)
observed.raw <- readRDS("Unique_trips_Observed.RDS") %>% mutate(haul_date2 =haul_date) %>%
separate(haul_date2, into = c("year", "month", "day"), sep = "-")
observed.summary<- observed.raw %>%
mutate(year=as.numeric(substr(haul_date,1,4))) %>%
mutate(trip_target_code2=ifelse(akr_gear_code=="NPT","Not-P",trip_target_code)) %>%
dplyr::select(year,
haul_join,
cruise,
#haul_date,
obs_vessel_id,
nmfs_area=reporting_area_code,
akr_gear_code,
trip_target_code2,
pscnq_processing_sector) %>%
mutate(nmfs_area=as.numeric(nmfs_area)) %>%
# only keep observations targeting pollock
#filter(trip_target_code %in% c("B","P")|akr_gear_code=="PTR") %>%
mutate(trip_target_code2 = ifelse(trip_target_code2 == "B","P",trip_target_code2)) %>%
group_by(year,
haul_join,
cruise,
#haul_date,
obs_vessel_id,
nmfs_area,
#akr_gear_code,
#trip_target_code,
trip_target_code2,
pscnq_processing_sector) %>%
dplyr::summarise(n=length(year)) %>% dplyr::select(-n)
# get rid of catcher processors right off the bat.
##### SUMMARY Here is a table for Pollock in the GOA. The vast, vast majority of fishing is
##### from Shoreside in the Gulf of Alaska. CP and motherships are ignorable.
observed.raw %>%
group_by(reporting_area_code,pscnq_processing_sector, trip_target_code) %>%
tally() %>% filter(trip_target_code %in% c("P","B")) %>% as.data.frame()
# how many observations are we looking for
observed.S.P <- observed.summary %>% filter(trip_target_code2=="P",pscnq_processing_sector=="S")
count.ob.S.P <- observed.S.P %>% group_by(cruise,year,haul_join) %>% summarise(N=length(year))
dups <- count.ob.S.P %>% filter(N>1) # these are are the observations that are duplicates.
A <- observed.S.P %>% filter(cruise %in% dups$cruise, haul_join %in% dups$haul_join)
dim(A); sum(dups$N) # got em
# why are they duplicated?
# because the observers had multiple regions associated with each trip
head(A %>% as.data.frame())
# dat.squash is a list of all of the chinook sampled in the observer program.
# we need to merge in the observed data from above to assign target and managment group to this
# unfortunately, dat.squash includes all of the Bering and lots of fleets, so we need to filter those out.
# here is a table of where and when they have samples from:
options(digits = 21)
setwd(data.rec.dir)
dat.squash.raw <- readRDS("Ole_Squash.RDS") %>%
mutate(haul_date_sq =HAUL_OFFLOAD_DATE) %>%
separate(HAUL_OFFLOAD_DATE, into = c("year", "month", "day"), sep = "-")
dat.squash.raw %>% arrange(NMFS_AREA) %>% group_by(year,NMFS_AREA) %>% summarise(N=length(year)) %>%
pivot_wider(.,values_from = "N",names_from = c("NMFS_AREA")) %>% as.data.frame()
# We assume that if they made in into this database they were checked for an adipose
# fin and therefore for CWT.
# We are going to use this database to figure out what fraction of the total Chinook
# captured during the fishery.
# First we have to pull out the trips that can be ascribed to the shoreside pollock fleet
# in the Gulf of Alaska.
dat.squash.goa <- dat.squash.raw %>%
dplyr::select(year,month,day,haul_date_sq,
cdq_code=CDQ_CODE,gear=GEAR,permit=PERMIT,
cruise=CRUISE,
port_join =PORT_JOIN,
haul_join=HAUL_JOIN,
vessel_code =VESSEL_CODE,
lat_end=LATDD_END,lat_start=LATDD_START,
lon_end=LONDD_END,lon_start=LONDD_START,
nmfs_area=NMFS_AREA,
TYPE_1_OTOLITH, TYPE_2_SCALE, TYPE_3_SEX_LENGTH_WEIGHT,
TYPE_4_FIN_CLIPS, TYPE_5_VERTEBRAE, TYPE_6_SPINES,
TYPE_7_MATURITY_SCAN, TYPE_8_MATURITY, TYPE_9_STOMACHS,
TYPE_10_ISOTOPES,TYPE_11_OTHER_TISSUE, TYPE_12_SNOUT,
TYPE_13_ADIPOSE_PRESENT,
weight=WEIGHT,
sex=SEX,
ad_pres = ADIPOSE_PRESENT,
SAMPLE_SYSTEM) %>%
filter(nmfs_area>600,nmfs_area<670) %>% # include only GOA
mutate(year=as.numeric(year))
dat.squash.goa.obs.shore.early <- left_join(dat.squash.goa,observed.S.P %>% filter(year<=2007,year>=1996),
by = c("haul_join"= "haul_join",
"cruise"="cruise",
"nmfs_area"="nmfs_area",
"year"="year")) %>%
filter(pscnq_processing_sector=="S")
dim(dat.squash.goa %>% filter(year<=2007,year>=1996)) # 4724
dim(dat.squash.goa.obs.shore.early) # 3706
# 78.4% from the trawl shoreside.
# join based on two different fields for 2008 and on.
dat.squash.goa.obs.shore.late1 <- left_join(dat.squash.goa,observed.S.P %>% filter(year>2007),
by = c("port_join"= "haul_join",
"cruise"="cruise",
"nmfs_area"="nmfs_area",
"year"="year")) %>%
filter(pscnq_processing_sector=="S")
dat.squash.goa.obs.shore.late2 <- left_join(dat.squash.goa,observed.S.P %>% filter(year>2007)) %>%
filter(pscnq_processing_sector=="S")
dat.squash.goa.obs.shore.late <- bind_rows(dat.squash.goa.obs.shore.late1,dat.squash.goa.obs.shore.late2)
dim(dat.squash.goa %>% filter(year>2007)) # 24979
dim(dat.squash.goa.obs.shore.late) # 19616
# 78.5% from the trawl shoreside.
dat.squash.goa.obs.shore <- bind_rows(dat.squash.goa.obs.shore.early,dat.squash.goa.obs.shore.late)
dat.squash.goa.obs.shore %>% distinct() %>% dim()
# Let's do a little checking. First are there a bunch of rows with duplicates?
dat.squash.goa.obs.shore %>% distinct() %>% dim()
# 19272.... so there are about 4000 potential duplicates.
# Let's find them.
A <- dat.squash.goa.obs.shore %>% group_by(year,cruise,haul_join,weight,sex) %>% tally() %>% filter(n>1) %>% as.data.frame()
# Let's just see if there are multiple entries that are identical in dat.squash.goa first
dat.squash.goa %>% filter(cruise%in%A$cruise,
year%in%A$year,
haul_join%in%A$haul_join,
weight%in%A$weight,
sex%in%A$sex) %>% as.data.frame()
# It sure looks like it worked ok.
# and it sure looks like those are just fish multiple fish that happen to be the same size and sex in a single observer trip.
# So we're done.
# Let's assign our regions to this data file.
dat.squash.goa.obs.shore <- dat.squash.goa.obs.shore %>%
mutate(region = case_when(nmfs_area == "610" ~ "W.APEN",
nmfs_area == "620" ~ "E.APEN",
nmfs_area %in% c("630") ~ "NW.GOA",
nmfs_area %in% c("640", "640 ") ~ "NE.GOA",
nmfs_area == "649" ~ "PWS",
#reporting_area_code == "649" & adfg_stat_area_code %in% c("466032","466033","466003") ~ "NE.GOA",
#reporting_area_code == "649" & adfg_stat_area_code %in% c("476006","486001","476035","476005","476007","476003","476004","476034", "475933","476031","476008","485932") ~ "NW.GOA", #there are about 600 records from NMFS AREA 649, which is the most northern part of PWS stat area- going to divide these based on in the 630/640 line continued into PWS and assign via ADFG #'s
TRUE ~ "MISSING_LOCATION_INFO"))
```
```{r echo=F,include=F }
### OK. toy example
############### OK. NOW LET'S CONNECT THESE TO THE CHINOOKY DATA FRAME which contains all of the
############### information that allows you to expand to total bycatch.
###############
#options(digits=15)
setwd(data.dir)
chinooky_rates <- readRDS("Chinooky_Rates_merged.RDS") %>%
filter(fmp_sub_area_code == "GOA", target =="pollock") %>% # 23438 observations
mutate(region = case_when(reporting_area_code == "610" ~ "W.APEN",
reporting_area_code == "620" ~ "E.APEN",
reporting_area_code %in% c("630") ~ "NW.GOA",
reporting_area_code %in% c("640", "640 ") ~ "NE.GOA",
reporting_area_code == "649" ~ "PWS",
#reporting_area_code == "649" & adfg_stat_area_code %in% c("466032","466033","466003") ~ "NE.GOA",
#reporting_area_code == "649" & adfg_stat_area_code %in% c("476006","486001","476035","476005","476007","476003","476004","476034", "475933","476031","476008","485932") ~ "NW.GOA", #there are about 600 records from NMFS AREA 649, which is the most northern part of PWS stat area- going to divide these based on in the 630/640 line continued into PWS and assign via ADFG #'s
TRUE ~ "MISSING_LOCATION_INFO")) %>% #there are 14 of these because they are in area 649 but have missing ADFG #'s, all from a few days in 2003
separate(trip_target_date, into = c("year", "month", "day"), sep = "-") %>%
rename(nmfs_area = reporting_area_code) %>% # 23438 observations
filter(processing_sector=="S") %>% #23363 observations
filter(!management_program_code %in% c("SMO"),!nmfs_area==649)
# this gets the data down to about 22940 observations.
# includes a small number of RPP and AFA management codes.
```
## Here are the nuts and bolts
There is a bunch of code above. Here is the short version. I think I have most things sorted.
I used "Unique_trips_Observed.RDS" to determine which trips had observer aboard and to include only shoreside pollock in the GOA (nmfs areas 610, 620, 630, 640).
I used "Ole_Squash.RDS" as the list of Chinook salmon ever examined by the observer program. It does not have info about fleet or target species, though. So I've been trying to merge the unique trips and squash so I can ID the fish sampled from the pollock shoreside trawl in the GOA.
It turns out this is complicated, but I got it to largely to work. I joined using "cruise" and "haul_join" for 2007 and earlier. For 2008 on, I performed two joins using: First using "port_join" from Squash and "haul_join" from Unique_trips (gets 90+% of the values), second using "haul_join" from both (gets a few stragglers). But misses some values that I can't seem to find.
```{r }
# summary tables of the merge for Squash and Unique_trips
dat.squash.goa.obs.shore %>% group_by(year,nmfs_area) %>% tally() %>%
pivot_wider(.,values_from = n,names_from = nmfs_area) %>% as.data.frame()
#Something is wrong from 2008-2013.
# Try and figure out if it is in the observed trips or in the squash data.
dat.squash.goa.obs.shore %>% group_by(year,nmfs_area) %>% tally() %>%
pivot_wider(.,values_from = n,names_from = nmfs_area) %>% filter(year>=2007,year<=2014) %>%
as.data.frame()
#Pull out dat.squash from GOA in 2009
this <- dat.squash.goa %>% filter(year==2009)
obs.trips <- unique(this$port_join)
nrow(this); length(obs.trips)
# there are 369 observed fish in 2009 in the squash data from 34 unique trips
temp <- observed.raw %>% filter(year==2009, haul_join %in% obs.trips)
length(unique(temp$haul_join))
# but only exactly one of the observed trips shows up in the observer data (all fleets).
# That is awful strange. There are fish observed and sampled, but they come from trips that were not observed... according to the Unique Trips database.
#Repeat for 2011 Pull out dat.squash from GOA in 2011
this <- dat.squash.goa %>% filter(year==2011)
obs.trips <- unique(this$port_join)
nrow(this); length(obs.trips)
# there are 286 observed fish in 2011 in the squash data from 59 unique trips
temp <- observed.raw %>% filter(year==2011, haul_join %in% obs.trips)
length(unique(temp$haul_join))
# but there are only of the observed trips shows up in the observer data (all fleets).
```
Let's check this against the chinooky database to see if that suggests there were observers on boats in the pollock fleet during 2008-2013
```{r }
chinooky_rates %>% filter(year%in%c(2008:2013),nmfs_area>600,nmfs_area<670,
observers_onboard>0,
target=="pollock",
psc_fishery_code=="S") %>%
group_by(year) %>%
tally %>% as.data.frame()
# looks like there were a bunch.
```
So the question is why aren't they in the Unique_Trips data. I went rooting around for specific trips that have observed fish e.g. (you'll have to uncomment and run locally to make these work).
```{r }
# If I run these two lines,
# dat.squash.goa %>% filter(year==2009,month=="02",day==28,nmfs_area==620)
# observed.raw %>% filter(year==2009,month=="02",day==28,reporting_area_code==620)
````
I get a single boat and cruise numbers but different haul_join codes. Ugh.
```{r }
# Same thing here.
# dat.squash.goa %>% filter(year==2011,month=="03",day=11,nmfs_area==630)
# observed.raw %>% filter(year==2011,month=="03",reporting_area_code==630,trip_target_code=="P",day==11)
```
So I think the haul_joins and port_joins are f*ed for those years for some reason. So my question is if there is a extra field in AKFIN that might play this role to join things (e.g. is there a "port_join" field or something else in AKFIN that could be queried and used?). If not, I can run through the other fields and make my own join that correctly identifies these records.
Here is the query you wrote to pull from AKFIN to make Unique Trips if that helps:
```{r }
# con <- dbConnect(odbc::odbc(), "akfin", UID="jwatson", PWD= rstudioapi::askForPassword("Enter AKFIN Password"))
#
# dbFetch(dbSendQuery(con,"select distinct a.cruise,
# a.obs_vessel_id,
# a.performance,
# a.haul_date,
# a.haul_join,
# a.catcher_boat_adfg,
# a.official_total_catch,
# a.fishing_start_date,
# a.akr_gear_code,
# a.reporting_area_code,
# a.retained_groundfish_weight,
# a.year,
# a.trip_target_code,
# a.pscnq_processing_sector,
# a.ves_cfec_length,
# a.duration_in_min,
# b.management_program_code
# from council.comprehensive_obs a
# left join council.comprehensive_blend_ca b
# on to_char(a.catch_report_source_pk)=to_char(b.catch_report_source_pk)
# where a.year<=2017
# and a.akr_gear_code IN ('PTR','NPT','TRW')
# and a.FMP_AREA='GOA'
# order by a.cruise,a.haul_join")) %>%
# rename_all(tolower) %>%
# saveRDS("Unique_trips_Observed.RDS")
```