-
Notifications
You must be signed in to change notification settings - Fork 0
/
SummaryJS.R
2357 lines (2124 loc) · 90.2 KB
/
SummaryJS.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
###############################################################################
#
# Author: Matthew H. Grinnell
# Affiliation: Pacific Biological Station, Fisheries and Oceans Canada (DFO)
# Group: Quantitative Assessment Methods Section, Science
# Address: 3190 Hammond Bay Road, Nanaimo, BC, Canada, V9T 6N7
# Contact: e-mail: [email protected] | tel: 250.756.7055
# Project: Herring
# Code name: Summary.R
# Version: 2.0
# Date started: Jun 03, 2016
# Date edited: Sep 07, 2017
#
# Overview:
# Generate tables and figures for annual Pacific herring preliminary data
# summaries by region(s), and generate the input file for the Pacific herring
# stock assessment, which uses ADMB.
#
# Requirements:
# Access to the main herring databases on the shared network drive, or local
# copies: catch, biosample, and spawn. In addition, look-up tables are used to
# convert numeric codes in the databases into reader-friendly text.
#
# Notes:
# Usually only one region is considered at a time, but this script can analyse
# multiple regions if requested. Output (e.g., .RData, pdf figures, latex
# tables) can be used by the Knitr file 'DataReport.Rnw' to generate a dynamic
# and reproducible document (http://yihui.name/knitr/).
#
# References:
# This script is based on several R scripts writen by Jaclyn Cleary and Ashleen
# Benson which wrangle herring catch, biosample, and spawn data.
#
# Versions: Version 1 used the spawn index calculated in the database. Version
# 2 sources the script 'SpawnIndex.R', and does not rely on the database.
#
###############################################################################
# TODO:
# 1. Get better section shapefile polygons (i.e., contiguous with no holes or
# overlapping borders).
# 2. Label extra sampling in Central Coast Area 8 as non-representative (i.e.,
# for years when additional samples were collected)? This isn't really
# necessary since we have a work-around using the historic sample ratio.
# 3. Differentiate between representative, unrepresentative, and unknown
# samples. The default should be unknown (e.g., for old samples).
# 4. Update column and variable names in the look-up tables so that I don't have
# to make any manual changes in the R script (e.g., Period1=Gear1). Better
# yet, omit the need for look-up tables by having the actual variable names
# in the main tables! Why isn't this the case?
# 5. Calculate tide/datum corrected spawn depths.
# 6. Get better location information (i.e., X, and Y for locations).
# 11. Fix spawn metric figures (e.g., length, width, layers, index) so that the
# y-axis CIs don't go negative (i.e., log scale?). Or, omit the CIs?
# 12. Add SOK harvest (spawning biomass in tonnes) to the time series of landed
# catch? That is to say, add the data from Table 2 to Figure 3 in the data
# summary report.
########################
##### Housekeeping #####
########################
# General options
rm( list=ls( ) ) # Clear the workspace
sTime <- Sys.time( ) # Start the timer
graphics.off( ) # Turn graphics off
# Install missing packages and load required packages (if required)
UsePackages <- function( pkgs, locn="https://cran.rstudio.com/" ) {
# Reverse the list
rPkgs <- rev( pkgs )
# Identify missing (i.e., not yet installed) packages
newPkgs <- rPkgs[!(rPkgs %in% installed.packages( )[, "Package"])]
# Install missing packages if required
if( length(newPkgs) ) install.packages( newPkgs, repos=locn )
# Loop over all packages
for( i in 1:length(rPkgs) ) {
# Load required packages using 'library'
eval( parse(text=paste("suppressPackageStartupMessages(library(", rPkgs[i],
"))", sep="")) )
} # End i loop over package names
} # End UsePackages function
# Make packages available
UsePackages( pkgs=c("tidyverse", "RODBC", "zoo", "Hmisc", "scales", "sp",
"maptools", "rgdal", "rgeos", "raster", "xtable", "cowplot", "grid",
"colorRamps", "RColorBrewer", "stringr", "lubridate", "readxl",
"plyr", "lettercase") )
####################
##### Controls #####
####################
# Select region(s): major (HG, PRD, CC, SoG, WCVI); or minor (A27, A2W, JS)
region <- "JS"
# Select a subset of sections (or NA for all)
sectionSub <- NA # c( 67, 70:79 )
# Include test fishery catch
inclTestCatch <- TRUE
# Include test seine biological data
inclTestSNBio <- TRUE
# Include test gillnet biological data
inclTestGNBio <- FALSE
# Include spawn on kelp biological data
inclSOKBio <- TRUE
# Location of the shared network drive
dirShare <- file.path( "\\\\dcbcpbsna01a", "hdata$" )
# Databases: remote or local
dbLoc <- "Remote"
# Database name
dbName <- "HSA_Program_v6.2.mdb"
# Input coordinate reference system
inCRS <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
# Coordinate reference system (http://spatialreference.org/ref/sr-org/82/)
outCRS <- "+proj=aea +lat_1=50 +lat_2=58.5 +lat_0=45 +lon_0=-126 +x_0=1000000
+y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"
# Geographic projection
geoProj <- "Projection: BC Albers (NAD 1983)"
######################
##### Parameters #####
######################
# Year range: include data
yrRange <- 1951:2017
# Age range: omit below, plus group above
ageRange <- 2:10
# Age to highlight in figure
ageShow <- 3
# Number of years to calculate running mean
nRoll <- 5
# Conversion factors: short tons to tonnes, feet to metres
convFac <- list( st2t=0.90718474, ft2m=0.3048 )
# First year of data to include in summary tables
firstYrTab <- min( yrRange )
# First year of data to include in summary figures
firstYrFig <- min( yrRange )
# Age schedule and population parameters for model input
parsADMB <- list(
"Natural mortality (m)"=0.334,
"Growth (linf, k, to)"=c(27, 0.48, 0),
"Length Weight (a, b)"=c(4.5e-6, 3.127),
"Maturity at age"=c(2.055, 0.05),
"Maturity vector"=c(0.24974, 0.9, 1, 1, 1, 1, 1, 1, 1) )
# Spawn survey method changed from surface (1951--1987) to dive (1988--present)
newSurvYr <- 1988
# Figure width
figWidth <- 6
# Type of smoothing line
smLine <- "loess"
# Level of confidence interval for smoothed conditional mean
ciLevel <- 0.9
# Get ylimits (e.g., weight in g) for the weight-at-age plot
wtRange <- c( 35, 130 )
# TODO: Omit spawn data prior, or draw a vertical line (minor stock areas)
firstYrSpawnMinor <- 1978
# Year range for historic ratio of biosample effort (Central Coast)
yrsRatioHist <- 1994:2013
# Years to fix using historic ratio of biosample effort (Central Coast)
yrsRatioFix <- c( 2014, 2015 )
# Expand the transect length
doExpand <- FALSE
# Years to expand the transect length (i.e., footrope)
expYrs <- 2003:2014
# Proportion to expand the transect length (i.e., footrope)
expFac <- 0.075
# Years where intensity is used to determine egg layers
intenseYrs <- yrRange[yrRange < 1979]
# Years where intensity needs to be re-scaled
rescaleYrs <- intenseYrs[intenseYrs < 1951]
# Transect width (m; used for macrocystis spawn)
TransectW <- 2
# Buffer distance (m; to include locations that are outside the region polygon)
maxBuff <- 5000
##################
#### Sources #####
##################
# File name for dive transect XY
diveLoc <- list(
loc=file.path("..", "Data"),
fn="dive_transects_with_lat_long_June2_2017.xlsx" )
# Location and names of lookup tables with catch codes
codesLoc <- list(
loc=file.path("..", "Data"),
fns=list(tDisposal="tDisposal.csv", tGear="tGear.csv",
tSource="tSource.csv") )
# Location and name of the location database and tables
areaLoc <- list(
loc=file.path("..", "Data", dbLoc),
db=dbName,
fns=list(sections="Sections", locations="Location") )
# Location(s) and names of the Sections and land shapefiles
shapesLoc <- list(
locSec=file.path(dirShare, "Kristen", "Herring_Shapefiles"),
locLand=file.path("..", "Data", "Polygons"),
fns=list(sections="SectionsIntegrated", land="GSHHS_h_L1_Alb") )
# Location and name of the catch database and tables
catchLoc <- list(
loc=file.path("..", "Data", dbLoc),
db=dbName,
fns=list(tCatch="tCatchData", hCatch="HailCatch", sokCatch="SpawnOnKelp") )
# Location and name of the biological database and tables
bioLoc <- list(
loc=file.path("..", "Data", dbLoc),
db=dbName,
fns=list(samples="sample", fish="fish", bmc="BMcCarter") )
# Location and name of the surface database and tables
surfLoc <- list(
loc=file.path("..", "Data", dbLoc),
db=dbName,
fns=list(regionStd="RegionStd", sectionStd="SectionStd", poolStd="PoolStd",
surface="tSSSurface", intensity="Intensity", allSpawn="tSSAllspawn") )
# Location and name of the macrocystis database and tables
macroLoc <- list(
loc=file.path("..", "Data", dbLoc),
db=dbName,
fns=list(allSpawn="tSSAllspawn", plants="tSSMacPlant",
transects="tSSMacTrans") )
# Location and name of the macrocystis database and tables
underLoc <- list(
loc=file.path("..", "Data", dbLoc),
db=dbName,
fns=list(allSpawn="tSSAllspawn", algTrans="tSSVegTrans",
stations="tSSStations", algae="tSSVegetation",
typeAlg="tSSTypeVegetation") )
#####################
##### Functions #####
#####################
# Load helper functions
source( file="Functions.R" )
# Load spawn index functions
source( file="SpawnIndex.R" )
################
##### Data #####
################
# Breaks for years
yrBreaks <- seq( from=round_any(x=min(yrRange), accuracy=10, f=floor),
to=round_any(x=max(yrRange), accuracy=10, f=ceiling), by=10 )
# Function to load transect spatial info
LoadTransectXY <- function( loc ) {
# Load the data and wrangle
dat <- read_excel( path=file.path(loc$loc, loc$fn), sheet=1 ) %>%
rename( LocationCode=LOC_CODE ) %>%
mutate( LocationCode=as.integer(LocationCode) ) %>%
group_by( LocationCode ) %>%
summarise(
Longitude=MeanNA(c(StartLong, MidLong, EndLong)),
Latitude=MeanNA(c(StartLat, MidLat, EndLat)) ) %>%
ungroup( ) %>%
filter( !is.na(Longitude), !is.na(Latitude), LocationCode!=0 )
# Grab the spatial info (X and Y)
locSP <- dat %>%
transmute( X=Longitude, Y=Latitude )
# Put X and Y into a spatial points object
locPts <- SpatialPointsDataFrame( coords=locSP,
data=data.frame(LocationCode=dat$LocationCode), proj4string=CRS(inCRS) )
# Convert X and Y from WGS to Albers
locPtsAlb <- spTransform( x=locPts, CRSobj=CRS(outCRS) )
# Extract spatial info
dfAlb <- as_tibble( locPtsAlb ) %>%
rename( Eastings=X, Northings=Y )
# Return the data
return( dfAlb )
} # End LoadTransectXY function
# Load 'auxiliary' dive transect spatial data
transectXY <- LoadTransectXY( loc=diveLoc )
# If region is a vector, collapse region names for output; otherwise region
regName <- paste( region, collapse="." )
# If old directory exists
if( regName %in% list.files() ) {
# Remove the old directory
unlink( regName, recursive=TRUE )
# Warning: remove previous summary output
warning( "Removed existing directory '", regName, "'", call.=FALSE )
# Create the main directory for output
dir.create( regName )
} else { # End if directory exists, otherwise
# Create the main directory for output
dir.create( regName )
} # End if directory doesn't exists
# Load disposal codes
tDisposal <- read_csv( file=file.path(codesLoc$loc, codesLoc$fns$tDisposal),
col_types=cols("i", "c", "c", "i", "i", "c") )
# Load gear codes
tGear <- read_csv( file=file.path(codesLoc$loc, codesLoc$fns$tGear),
col_types=cols("i", "c", "i", "i", "i") )
# Load source codes
tSource <- read_csv( file=file.path(codesLoc$loc, codesLoc$fns$tSource),
col_types=cols("i", "c", "c") )
# Load herring areas
areas <- LoadAreaData( where=areaLoc )
# Get BC land data etc (for plots)
shapes <- LoadShapefiles( where=shapesLoc, a=areas )
# Load raw catch data, and some light wrangling
LoadCatchData <- function( where ) {
# This function loads the tree types of herring catch data, drops unnecessary
# rows and columns, and combines the data frames for the region(s) in question.
# Progress message
cat( "Loading catch data... " )
# Establish connection with access
accessDB <- odbcConnectAccess( access.file=file.path(where$loc, where$db) )
# Access the tCatch worksheet and wrangle
tCatch <- sqlFetch( channel=accessDB, sqtable=where$fns$tCatch ) %>%
mutate( Year=Season2Year(Season), Source=rep("Tab", times=n()) ) %>%
left_join( y=areas, by="LocationCode" ) %>%
filter( Section %in% areas$Section ) %>%
group_by( Year, Source, Section, GearCode, DisposalCode ) %>%
summarise( Catch=SumNA(Catch) ) %>%
ungroup( )
# Access the hail worksheet and wrangle
hCatch <- sqlFetch( channel=accessDB, sqtable=where$fns$hCatch ) %>%
filter( Active == 1, Section %in% areas$Section ) %>%
mutate( Year=Season2Year(Season), Catch=CatchTons*convFac$st2t,
Source=rep("Hail", times=n()) ) %>%
group_by( Year, Source, Section, GearCode, DisposalCode ) %>%
summarise( Catch=SumNA(Catch) ) %>%
ungroup( )
# Access the sok worksheet
sokCatch <- sqlFetch( channel=accessDB, sqtable=where$fns$sokCatch ) %>%
mutate( Year=Season2Year(Season), Source=rep("SOK", times=n()) ) %>%
rename( Catch=ProductLanded ) %>%
filter( Section %in% areas$Section ) %>%
group_by( Year, Source, Section, GearCode, DisposalCode ) %>%
summarise( Catch=SumNA(Catch) ) %>%
ungroup( )
# Combine the three tables
allCatch <- bind_rows( tCatch, hCatch, sokCatch )
# Smaller subset of area information
areasSm <- areas %>%
select( Region, RegionName, StatArea, Section, Group ) %>%
distinct( )
# Merge with area information
res <- allCatch %>%
left_join( y=areasSm, by="Section" ) %>%
select( Year, Source, Region, StatArea, Section, GearCode, DisposalCode,
Catch ) %>%
arrange( Year, Source, Region, StatArea, Section, GearCode, DisposalCode )
# Warning if more recent data is available
if( max(res$Year, na.rm=TRUE) > max(yrRange) )
warning( "Recent catch data exists; update 'yrRange' to include ",
paste(unique(res$Year[which(res$Year > max(yrRange))]), collapse=", "),
call.=FALSE )
# Close the connection
odbcClose( accessDB )
# Update progress message
cat( "done\n" )
# Return the data
return( res )
} # End LoadCatchData function
# Load raw catch data
catchRaw <- LoadCatchData( where=catchLoc )
# Load biological data, and some light wrangling
LoadBioData <- function( where, XY ) {
# This function loads the herring biosample data: one table with general
# sample information, and the other with fish measurements. Unnecessary
# rows and columns are dropped. The output is a data frame of the two merged
# tables for the region(s) in question.
# Progress message
cat( "Loading biosample data... " )
# Establish connection with access
accessDB <- odbcConnectAccess( access.file=file.path(where$loc, where$db) )
# Access the sample worksheet
sampleDat <- sqlFetch( channel=accessDB, sqtable=where$fns$samples )
# Grab the spatial info and process
sampleSP <- sampleDat %>%
transmute( X=ifelse(is.na(Set_Longitude), 0, Set_Longitude),
Y=ifelse(is.na(Set_Latitude), 0, Set_Latitude))
# Put X and Y into a spatial points object
sPts <- SpatialPoints( coords=sampleSP, proj4string=CRS(inCRS) )
# Convert X and Y from WGS to Albers
sPtsAlb <- spTransform( x=sPts, CRSobj=CRS(outCRS) )
# Extract spatial info
dfAlb <- as_tibble( sPtsAlb )
# Extract relevant sample data
samples <- sampleDat %>%
cbind( dfAlb ) %>%
rename( LocationCode=loc_code, Sample=isamp, Month=month,
Representative=Representative_Set, SourceCode=source_code,
GearCode=gear_code ) %>%
mutate( Year=Season2Year(season),
Eastings=ifelse(is.na(Set_Longitude), Set_Longitude, X),
Northings=ifelse(is.na(Set_Latitude), Set_Latitude, Y) ) %>%
select( Year, Month, Sample, Representative, LocationCode, Eastings,
Northings, SourceCode, GearCode ) %>%
as_tibble( )
# Access the fish worksheet and wrangle
fish <- sqlFetch( channel=accessDB, sqtable=where$fns$fish ) %>%
rename( Sample=isamp, Fish=fish, Length=len, Weight=wgt, Sex=sex_alpha,
MaturityCode=mat_code, DualAge=dual_age,
GonadLength=gonad_len, GonadWeight=gonad_wgt ) %>%
mutate( Year=Season2Year(season),
Age=ifelse(age<=max(ageRange), age, max(ageRange)) ) %>%
filter( Age >= min(ageRange) ) %>%
select( Year, Sample, Fish, Length, Weight, Sex, MaturityCode, Age,
DualAge, GonadLength, GonadWeight ) %>%
as_tibble( )
# Combine the two tables (note that Sample re-starts at 1 each Year)
fishSamples <- full_join( x=samples, y=fish, by=c("Year", "Sample") )
# More wrangling: filter to region(s)
raw <- fishSamples %>%
filter( LocationCode %in% areas$LocationCode ) %>%
left_join( y=areas, by="LocationCode" ) %>%
mutate( Eastings=ifelse(is.na(Eastings.x), Eastings.y, Eastings.x),
Northings=ifelse(is.na(Northings.x), Northings.y, Northings.x)) %>%
select( Year, Month, Region, StatArea, Group, Section, LocationCode,
LocationName, Eastings, Northings, Sample, Representative, SourceCode, GearCode,
Fish, Length, Weight, Sex, MaturityCode, Age, DualAge, GonadLength,
GonadWeight ) %>%
arrange( Year, Month, Region, StatArea, Group, Section, LocationCode,
Sample, Fish )
# Clip the extent
df <- ClipExtent( dat=raw, spObj=shapes$regSPDF, bufDist=maxBuff,
silent=TRUE )
# Subset data with 'good' X and Y
dfNotNA <- df %>%
filter( !is.na(Eastings) & !is.na(Northings) )
# Subset data with 'bad' X or Y, and try to fill in using transect X and Y
dfNA <- df %>%
filter( is.na(Eastings) | is.na(Northings) ) %>%
select( -Eastings, -Northings ) %>%
left_join( y=XY, by="LocationCode" )
# Re-combine the two subsets
df2 <- bind_rows( dfNotNA, dfNA )
# Clip the extent (again)
res <- ClipExtent( dat=df2, spObj=shapes$regSPDF, bufDist=maxBuff,
silent=TRUE )
# Get locations with missing X or Y
noXY <- res %>%
filter( is.na(Eastings) | is.na(Northings) ) %>%
select( Region, StatArea, Group, Section, LocationCode, LocationName ) %>%
distinct( )
# Message re missing X and Y, if any
if( nrow(noXY) >=1 ) warning( "There are ", nrow(noXY),
" biological sample location(s) with missing or incorrect spatial info",
call.=FALSE )
# Stop if we're missing rows
if( nrow(raw) != nrow(res) ) stop( "Missing rows!", call.=FALSE )
# Warning if more recent data is available
if( max(res$Year, na.rm=TRUE) > max(yrRange) )
warning( "Recent biological data exists; update 'yrRange' to include ",
paste(unique(res$Year[which(res$Year > max(yrRange))]), collapse=", "),
call.=FALSE )
# Close the connection
odbcClose( accessDB )
# Update progress message
cat( "done\n" )
# Return the data
return( res )
} # End LoadBioData function
# Load raw biological data
bioRaw <- LoadBioData( where=bioLoc, XY=transectXY )
# Load spawn data, and some light wrangling
LoadSpawnData <- function( whereSurf, whereMacro, whereUnder, XY ) {
# This function loads the herring spawn data, and drops nnnecessary rows and
# columns. The output is a data frame for the region(s) in question.
# Progress message
cat( "Calculating spawn index: " )
# Fecundity conversion factor
ECF <- CalcEggConversion( fecundity=200, pFemale=0.5 )
# Access and calculate surface spawn
surface <- CalcSurfSpawn( where=whereSurf, a=areas, f=ECF )
# Access and calculate macrocystis spawn
macrocystis <- CalcMacroSpawn( where=whereMacro, a=areas, f=ECF )
# Access and calculate understory spawn
understory <- CalcUnderSpawn( where=whereUnder, a=areas, f=ECF )
# Load the all spawn data
allSpawn <- GetAllSpawn( where=underLoc, a=areas )
# Combine the spawn types (by spawn number)
raw <- surface$biomassSpawn %>%
full_join( y=macrocystis$biomassSpawn, by=c("Year", "Region", "StatArea",
"Section", "LocationCode", "SpawnNumber") ) %>%
full_join( y=understory$biomassSpawn, by=c("Year", "Region", "StatArea",
"Section", "LocationCode", "SpawnNumber") ) %>%
full_join( y=allSpawn, by=c("Year", "Region", "StatArea", "Section",
"LocationCode", "SpawnNumber") ) %>%
select( Year, Region, StatArea, Group, Section, LocationCode,
LocationName, SpawnNumber, Eastings, Northings, Start, End, Length,
Width, Depth, Method, SurfLyrs, SurfSI, MacroLyrs, MacroSI, UnderLyrs,
UnderSI ) %>%
mutate( Year=as.integer(Year) ) %>%
arrange( Year, Region, StatArea, Section, LocationCode, SpawnNumber,
Start, End )
# Clip the extent
df <- ClipExtent( dat=raw, spObj=shapes$regSPDF, bufDist=maxBuff,
silent=TRUE )
# Subset data with 'good' X and Y
dfNotNA <- df %>%
filter( !is.na(Eastings) & !is.na(Northings) )
# Subset data with 'bad' X or Y, and replace using transect X and Y
dfNA <- df %>%
filter( is.na(Eastings) | is.na(Northings) ) %>%
select( -Eastings, -Northings ) %>%
left_join( y=XY, by="LocationCode" )
# Re-combine the two subsets
df2 <- bind_rows( dfNotNA, dfNA )
# Clip the extent (again)
res <- ClipExtent( dat=df2, spObj=shapes$regSPDF, bufDist=maxBuff,
silent=TRUE )
# Get locations with missing X or Y
noXY <- res %>%
filter( is.na(Eastings) | is.na(Northings) ) %>%
select( Region, StatArea, Group, Section, LocationCode, LocationName ) %>%
distinct( )
# Message re missing X and Y, if any
if( nrow(noXY) >=1 ) warning( "There are ", nrow(noXY),
" spawn location(s) with missing or incorrect spatial info",
call.=FALSE )
# Stop if we're missing rows
if( nrow(raw) != nrow(res) ) stop( "Missing rows!", call.=FALSE )
# Warning if more recent data is available
if( max(res$Year, na.rm=TRUE) > max(yrRange) )
warning( "Recent spawn data exists; update 'yrRange' to include ",
paste(unique(res$Year[which(res$Year > max(yrRange))]), collapse=", "),
call.=FALSE )
# Return the data
return( res )
} # End LoadSpawnData function
# Load spawn data
spawnRaw <- LoadSpawnData( whereSurf=surfLoc, whereMacro=macroLoc,
whereUnder=underLoc, XY=transectXY )
##################
##### Update #####
##################
# Update catch data (more wrangling)
UpdateCatchData <- function( dat, a ) {
# Get a short list of area information
areasSm <- a %>%
select( Region, StatArea, Group, Section ) %>%
distinct( )
# Wrangle catch data
WrangleCatch <- function( df ) {
# This function combines the three types of herring catch information into
# the three 'gear' types, which are representative of the three main periods
# of herring catch. The output is a data frame with total catch (scaled as
# specified) by year and gear type (i.e., period).
# Update tCatch for Period 1
tc1 <- df %>%
filter( Source == "Tab", DisposalCode %in% c(1, 3, 4, 5, 6) ) %>%
select( Year, Catch )
# Update hCatch for Period 1
hc1 <- df %>%
filter( Source == "Hail", DisposalCode %in% c(3, 6) ) %>%
select( Year, Catch )
# Combine catches for period 1
dat1 <- bind_rows( tc1, hc1 )
# Period 1 catch
pd1 <- dat1 %>%
group_by( Year ) %>%
summarise( Gear1=SumNA(Catch) ) %>%
ungroup( )
# Updated tCatch for Period 2
tc2 <- df %>%
filter( Source == "Tab", GearCode == 29 )
# If including test fishery catch
if( inclTestCatch ){
# Include
tc2 <- filter( .data=tc2, DisposalCode %in% c(7, 8) )
} else { # End if including, otherwise
# Exclude
tc2 <- filter( .data=tc2, DisposalCode == 7 )
} # End if exclude
# Period 2 catch
pd2 <- tc2 %>%
group_by( Year ) %>%
summarise( Gear2=SumNA(Catch) ) %>%
ungroup( )
# Updated tCatch for Period 3
tc3 <- df %>%
filter( Source == "Tab", GearCode == 19 )
# If including test fishery catch
if( inclTestCatch ) {
# Include
tc3 <- filter(.data=tc3, DisposalCode %in% c(7, 8))
} else { # End if include, otherwise
# Exclude
tc3 <- filter(.data=tc3, DisposalCode == 7)
} # End if exclude
# Period 3 catch
pd3 <- tc3 %>%
group_by( Year ) %>%
summarise( Gear3=SumNA(Catch) ) %>%
ungroup( )
# Combine the data frames
dfList <- list( pd1, pd2, pd3 )
# Merge the tables and wrangle
pd123 <- Reduce( function(...) merge(..., all=TRUE), dfList ) %>%
complete( Year=yrRange, fill=list(Gear1=0, Gear2=0, Gear3=0) )
# Select years of interest and arrange by year
res <- pd123 %>%
filter( Year %in% yrRange ) %>%
gather( key=Period, value=Catch, Gear1, Gear2, Gear3 ) %>%
arrange( Period, Year ) %>%
select( Period, Year, Catch ) %>%
as_tibble( )
# Return catch by year and period
return( res )
} # End WrangleCatch function
# Start a list to hold dataframes
rList <- list( )
# Get the unique sections
uSection <- unique( dat$Section )
# Loop over sections
for( i in 1:length(uSection) ) {
# Get the ith section
iSec <- uSection[i]
# Get the catch
rList[[i]] <- catchRaw %>%
filter( Section == iSec ) %>%
WrangleCatch( ) %>%
group_by( Period, Year ) %>%
summarise( Catch=SumNA(Catch) ) %>%
ungroup( ) %>%
#mutate( Catch=ifelse(Catch == 0, NA, Catch) ) %>%
mutate( Section=iSec ) %>%
#filter( !is.na(Catch) )
filter( Catch>0 ) %>%
select( Period, Year, Section, Catch )
} # End i loop over sections
# Combine the data frames
res <- bind_rows( rList) %>%
left_join( y=areasSm, by="Section" ) %>%
select( Period, Year, Region, StatArea, Group, Section, Catch ) %>%
arrange( Period, Year, Region, StatArea, Group, Section )
# Return the results
return( res )
} # End UpdateCatchData function
# Update catch data
catch <- UpdateCatchData( dat=catchRaw, a=areas )
# Update biological data (more wrangling)
UpdateBioData <- function( dat, rYr ) {
# This function determines the three 'gear' types, which are representative
# of the three main periods of herring biological data. The output is a data
# frame with a new column indicating the period (i.e., gear type), and a new
# column indicating the weights (to be used in special cases only).
# Get data for period 1
pd1 <- dat %>%
filter( GearCode == 29, SourceCode %in% c(1, 6, 7) ) %>%
mutate( Period=rep(1, times=n()) )
# Get data for period 2
pd2 <- dat %>%
filter( GearCode == 29 ) %>%
mutate( Period=rep(2, times=n()) )
# If including test seine biological data
if( inclTestSNBio ) {
# Include
pd2 <- filter(.data=pd2, SourceCode %in% c(0, 5))
} else { # End if include, otherwise
# Exclude
pd2 <- filter(.data=pd2, SourceCode == 0)
} # End if exclude
# If including sok biological data
if( inclSOKBio ) {
# Get SOK data
SOK <- dat %>%
filter( GearCode == 29, SourceCode == 4, Month %in% c(3, 4) ) %>%
mutate( Period=rep(2, times=n()) )
# Combine with non-SOK dat
pd2 <- bind_rows( pd2, SOK )
} # End if including SOK data
# Get data for period 3
pd3 <- dat %>%
filter( GearCode == 19 ) %>%
mutate( Period=rep(3, times=n()) )
# If including test gillnet biological data
if( inclTestGNBio ) {
# Include
pd3 <- filter(.data=pd3, SourceCode %in% c(0, 5))
} else { # End if include, otherwise
# Exclude
pd3 <- filter(.data=pd3, SourceCode == 0)
} # End if exclude
# Combine the three tables
p123 <- bind_rows( pd1, pd2, pd3 )
# Warning re representative samples
warning( "Biosamples: keep all samples from ", min(yrRange), " to ", rYr-1,
", and 'representative' samples from ", rYr, " to ", max(yrRange), sep="",
call.=FALSE )
# Include only representative samples (ish)
res <- p123 %>%
filter( Year < rYr | Representative == 1 ) %>%
filter( Year %in% yrRange ) %>%
select( -Representative ) %>%
mutate( SampWt=1 )
# Return the data
return( res )
} # End UpdateBioData function
# Update biological data
bio <- UpdateBioData( dat=bioRaw, rYr=2014 )
################
##### Main #####
################
# Calculate commercial catch in recent years
CalcCommCatch <- function( dat ) {
# This function calculates total catch by gear (fishery) and use (disposal)
# for recent years, and returns a data frame.
# Get catch in current year by gear and disposal
cat <- dat %>%
filter( Year >= firstYrFig ) %>%
group_by( Year, GearCode, DisposalCode ) %>%
summarise( Catch=SumNA(Catch) ) %>%
ungroup( )
# Combine with gear names
catGear <- left_join( x=cat, y=select(.data=tGear, Gear, GearCode),
by="GearCode" )
# Combine with disposal names
catGearDisp <- left_join( x=catGear,
y=select(.data=tDisposal, DisposalCode, Disposal), by="DisposalCode" )
# Wrangle
res <- catGearDisp %>%
select( Year, Gear, Disposal, Catch ) %>%
rename( Fishery=Gear, Use=Disposal ) %>%
filter( Use != "SoK" ) %>%
arrange( Year, Fishery, Use )
# If there are no rows
if( nrow(res) == 0 ) {
# Ensure there are 3 columns
if( ncol(res) != 4 ) warning( "Result must have 4 columns", call.=FALSE )
# Add a dummy row
res[1, ] <- c( NA, NA, NA, as.integer(0) )
} # End if there are no rows
# Return the catch
return( res )
} # End CalcCommCatch function
# Calculate commercial catch in recent years
catchCommUse <- CalcCommCatch( dat=catchRaw )
# Get commercial catch in current year
GetCatchYear <- function( dat ) {
# Subset the commercial catch for the current year and ensure there is at
# least 1 row for the table. Returns a table
# Subset to get most recent year
res <- dat %>%
filter( Year == max(yrRange) ) %>%
select( -Year ) %>%
arrange( Fishery, Use )
# If there are no rows
if( nrow(res) == 0 ) {
# Ensure there are 3 columns
if( ncol(res) != 3 ) warning( "Result must have 3 columns", call.=FALSE )
# Add a dummy row
res[1, ] <- c( NA, NA, as.integer(0) )
} # End if there are no rows
# Return the data
return( res )
} # End GetCatchYear function
# Calculate commercial catch in current year
catchCommUseYr <- GetCatchYear( dat=catchCommUse )
# Calculate commercial SoK harvest and biomass in recent year
harvestSOK <- catchRaw %>%
filter( Year >= firstYrTab, DisposalCode == 2 ) %>%
group_by( Year ) %>%
summarise( Harvest=SumNA(Catch),
SoG=SumNA(Catch[Section%in%c(132, 135)])*100/Harvest ) %>%
ungroup( ) %>%
# TODO: Reference? Note: covert harvest (lb) to spawning biomass (t).
mutate( Biomass=Harvest * 831932.773 / 100000000 ) %>%
complete( Year=firstYrTab:max(yrRange),
fill=list(Harvest=0, Biomass=0, SoG=0) ) %>%
arrange( Year )
# Count the number of biological samples per year
CountBiosamplesYear <- function( dat ) {
# This function counts the number of biosamples collected in the current year
# and the previous few years from commercial vessels as well as test/
# research vessels. It returns a data frame that includes the total number of
# samples collected each year.
# Count the number of samples by year and gear: commercial fisheries
numComm <- dat %>%
filter( Year >= firstYrTab, !SourceCode %in% c(2, 3, 5) ) %>%
select( Year, Sample ) %>%
group_by( Year ) %>%
summarise( Commercial=n_distinct(Sample) ) %>%
ungroup( )
# Count the number of samples by year and gear: test and research
numTest <- dat %>%
filter( Year >= firstYrTab, SourceCode %in% c(2, 3, 5) ) %>%
select( Year, Sample ) %>%
group_by( Year ) %>%
summarise( Test=n_distinct(Sample) ) %>%
ungroup( )
# Count the number of samples in SoG
numSoG <- dat %>%
filter( Year >= firstYrTab, Section %in% c(132, 135) ) %>%
select( Year, Sample ) %>%
group_by( Year ) %>%
summarise( SoG=n_distinct(Sample) ) %>%
ungroup( )
# Merge the two tables and wrangle
res <- full_join( x=numComm, y=numTest, by="Year" ) %>%
full_join( y=numSoG, by="Year" ) %>%
complete( Year=firstYrTab:max(yrRange),
fill=list(Commercial=0, Test=0, SoG=0) ) %>%
mutate( Total=as.integer(Commercial+Test),
SoG=SoG*100/Total) %>%
replace_na( replace=list(SoG=0) ) %>%
arrange( Year ) %>%
select( Year, Commercial, Test, Total, SoG )
# If there are no rows
if( nrow(res) == 0 ) {
# Ensure there are 4 columns
if( ncol(res) != 4 ) warning( "Result must have 4 columns", call.=FALSE )
# Add a dummy row
res[1, ] <- c( NA, as.integer(0), as.integer(0), as.integer(0) )
} # End if there are no rows
# Return the table
return( res )
} # End CountBiosamplesYear function
# Count the number of biological samples per year
# TODO: Update this to be only 'Representative' samples (i.e., dat=bio?)
bioNum <- CountBiosamplesYear( dat=bioRaw )
# Determine number of biosample by type in current year
GetSampleNumType <- function( dat ) {
# This function determines biosample type by gear (fishery) and use (source)
# for the current year, and returns a data frame.
# Get catch in current year by gear and disposal
samp <- dat %>%
filter( Year == max(yrRange) ) %>%
group_by( SourceCode, GearCode ) %>%
summarise( Number=n_distinct(Sample) ) %>%
ungroup( ) %>%
mutate( Type=ifelse(SourceCode %in% c(2, 3, 5), "Test", "Commercial") )
# Combine with gear names
sampGear <- left_join( x=samp, y=select(.data=tGear, Gear, GearCode),
by="GearCode" )
# Combine with disposal names
sampGearSource <- left_join( x=sampGear, y=tSource, by="SourceCode" )
# A bit more wrangling
res <- sampGearSource %>%
rename( Use=SampleSource ) %>%
select( Type, Gear, Use, Number ) %>%
arrange( Type, Gear, Use )
# Replace NA with 0
res[is.na(res)] <- 0
# If there are no rows
if( nrow(res) == 0 ) {
# Ensure there are 3 columns
if( ncol(res) != 4 ) warning( "Result must have 4 columns", call.=FALSE )
# Add a dummy row
res[1, ] <- c( NA, NA, NA, as.integer(0) )
} # End if there are no rows
# Warning if number of biosamples don't match
if( bioNum$Total[bioNum$Year==max(yrRange)] != sum(res$Number) )
warning( "Number of biosamples differ in most recent year", call.=FALSE )
# Return the catch
return( res )
} # End GetSampleNumType function
# Determine biosample types in current year
# TODO: Update this to be only 'Representative' samples (i.e., dat=bio?)
bioTypeNum <- GetSampleNumType( dat=bioRaw )
# If region is Central Coast
if( region == "CC" ) {
# Ratio of number of biological samples between groups
propNumBioHist <- bio %>%
filter( GearCode == 29, Year %in% yrsRatioHist ) %>%
group_by( Year, Group ) %>%
summarise( Number=n_distinct(Sample) ) %>%
mutate( Proportion=Number/SumNA(Number) ) %>%
group_by( Group ) %>%
summarise( SampWt=MeanNA(Proportion) ) %>%
ungroup( )
# Merge weights in the main bio table (i.e., to fix unbalanced sampling among
# groups in identified years)
bio <- bio %>%
select( -SampWt ) %>%
left_join( y=propNumBioHist, by="Group" ) %>%
mutate( SampWt=ifelse(Year %in% yrsRatioFix & Period == 2, SampWt, 1) )
} # End if region is Central Coast
# Count the number of fish aged by year and gear (and as a proportion): use the
# 'SampWt' column to fix unrepresentative sampling if identified
numAgedYearGear <- bio %>%
select( Period, Year, Age, SampWt ) %>%
na.omit( ) %>%
group_by( Period, Year, Age ) %>%
summarise( Number=SumNA(SampWt) ) %>%
mutate( Proportion=Number/SumNA(Number) ) %>%
ungroup( ) %>%
arrange( Period, Year, Age )
# Count the number of fish aged by year (and as a proportion) by seine gear:
# use the 'SampWt' column to fix unrepresentative sampling if identified
numAgedYear <- bio %>%
filter( GearCode == 29 ) %>% # %in% c(19, 29) (originally == 29)
select( Year, Age, SampWt ) %>%
na.omit( ) %>%
group_by( Year, Age ) %>%
summarise( Number=SumNA(SampWt) ) %>%
mutate( Proportion=Number/SumNA(Number) ) %>%
ungroup( ) %>%
arrange( Year, Age )
# Proportion in SoG per year
SoGYr <- bio %>%
filter( GearCode == 29 ) %>%
select( Year, Age, SampWt, Section ) %>%
na.omit( ) %>%
group_by( Year ) %>%
summarise( SoG=SumNA(SampWt[Section %in% c(132, 135)])*100/
SumNA(SampWt) ) %>%
ungroup( )
# Reshape and format proportion-at-age
FormatPropAtAge <- function( dat ) {
# This function reshapes the proportion-at-age data from long to wide,
# replaces NAs with zeros, ensures that all years are present, and returns a
# data frame
# Make a table and reshape to wide: proportion-at-age
wide <- dat %>%
select( Year, Age, Proportion ) %>%
spread( key=Age, value=Proportion, drop=FALSE )
# Replace NAs with 0
wide[is.na(wide)] <- 0
# Fill in missing years
res <- wide %>%
filter( Year >= firstYrTab ) %>%