# packages
source("../functions/required_packages.R")
# functions
source("../functions/extract.R") # predictor categories
source("../functions/categories.R") # predictor categories
source("../functions/plot.R") # predictor categories
Example use of the Cephaloplot plot functions
Data
# path to the data
<- "../data/" # note that the output folder needs to be included in the project_wd path if that's the arborescence of your files
project_wd
# observations
<- extract_observations(project_wd,FOLDER_NAME = "zooscan_grey_varfactor",
df_obs SUBFOLDER_NAME="all")
# performance
<- extract_performance(project_wd,FOLDER_NAME = "zooscan_grey_varfactor",
all_perf SUBFOLDER_NAME="all")
# predictions
<- extract_prediction("../data/",FOLDER_NAME = "zooscan_grey_varfactor",SUBFOLDER_NAME="all",
df_day ensemble=TRUE,model=c("BRT","RF"),m=1:12,f=2)
Retrieving data for models: BRT RF
Processing model: BRT
Processing months/factor ensemble per model
Processing model: RF
Processing months/factor ensemble per model
Processing model ensemble
Normalized standard deviation for the ensemble model was calculated with: 191.9999
<- extract_prediction(project_wd,FOLDER_NAME = "zooscan_grey_varfactor",SUBFOLDER_NAME="all",
df_night ensemble=TRUE,model=c("BRT","RF"),m=1:12,f=1)
Retrieving data for models: BRT RF
Processing model: BRT
Processing months/factor ensemble per model
Processing model: RF
Processing months/factor ensemble per model
Processing model ensemble
Normalized standard deviation for the ensemble model was calculated with: 188.6976
# land
<- extract_land(project_wd,FOLDER_NAME="zooscan_grey_varfactor") df_land
Observations
If you have a varfactor, the observations give you the order of the levels, here Night correspond to level 1 and Day to level 2:
unique(df_obs$varfactor)
[1] "Night" "Day"
You can plot the original observations with plot_observations
.
plot_observations(df_obs,colors=c("#2D2B63","#FB3C44","#F7FF84"),
correct.lightness=FALSE,lim=NULL,
lab="Zooplankton \ncommunity \ntransparency",
title="Observations of zooplankton community transparency",
land=df_land)
# custom limits for colorbar (note that values outside of bounds are assigned to the extreme colors), corrected color lightness, world map instead of land mask, custom tet size, legend at the bottom
<- round(quantile(df_obs$measurementvalue,c(0.025,0.975),na.rm=TRUE)/5)*5
lim plot_observations(df_obs,colors=c("#2D2B63","#FB3C44","#F7FF84"), alpha=0.7,
correct.lightness=TRUE,lim=lim,legend_position="bottom",
lab="Zooplankton community transparency",
title="Observations of zooplankton community transparency",
land=NULL,landcolor="grey85",landfill="#E8E8E8") +
theme(legend.text=element_text(size=10))
# add a latitude profile (note that it returns a patchwork so it cannot be as easily customized as a ggplot)
plot_observations(df_obs,colors=c("#2D2B63","#FB3C44","#F7FF84"), alpha=0.7,
correct.lightness=TRUE,lim=lim,legend_position="bottom",
lab="Zooplankton community \ntransparency",
land=NULL,landcolor="grey85",landfill="#E8E8E8",
latitude_profile=TRUE)
# only plot the average, no legend and a different span for loess
plot_observations(df_obs,colors=c("#2D2B63","#FB3C44","#F7FF84"), alpha=0.7,
correct.lightness=TRUE,lim=lim,legend_position="bottom",
lab="Zooplankton community \ntransparency",
land=NULL,landcolor="grey85",landfill="#E8E8E8",
latitude_profile=TRUE,latitude_curves="average",
latitude_legend=FALSE,latitude_span=0.5,
latitude_step=10)
# only plot 2 seasons
plot_observations(df_obs,colors=c("#2D2B63","#FB3C44","#F7FF84"), alpha=0.7,
correct.lightness=TRUE,lim=lim,legend_position="bottom",
lab="Zooplankton community \ntransparency",
land=NULL,landcolor="grey85",landfill="#E8E8E8",
latitude_profile=TRUE,latitude_curves=c("summer","winter"),
latitude_legend=TRUE,latitude_span=0.75)
Model performance
You can plot the different performance metrics used in the QC with the plot_performance
function.
# plot performance for all models - separate metrics
plot_performance(all_perf,plot_type="separate",metrics=NULL,
models=NULL,threshold=TRUE)
# plot_performance for all models - specific metrics (ordered by the first one)
plot_performance(all_perf,plot_type="separate",metrics=c("NSD","CUM_VIP"),
models=c("GLM","MLP","ENSEMBLE"),threshold=FALSE,
dot_size=1,dot_color="red")
# plot performance for all models - combined metrics (threshold_size=0 removes it)
plot_performance(all_perf,plot_type="combined",models=c("GLM","GAM","BRT","RF","ENSEMBLE"),threshold_color="darkgreen",threshold_size=2)
# custom layouts
plot_performance(all_perf,plot_type="combined",models=c("GLM","GAM","BRT","RF","ENSEMBLE"),threshold_color="darkgreen",threshold_size=2) +
scale_color_chroma() +
theme(text=element_text(size=15))
Predictions
You can use plot_prediction
to plot the output prediction of a model or ensemble model, and plot_anomaly
if you want to plot the difference between two models or two runs from Cephalopod.
Note that for this function, your input dataframe should only contain one set of prediction values you want to plot (not for example the output from 2 different models).
Single map
For each, you can choose to have the uncertainty (standard deviation, normalized standard deviation, coefficient of variation or another custom column) to be either not displayed (plot_uncertainty="none"
), added through a bivariate colormap (plot_uncertainty="bivarmap"
) or added through stippling (plot_uncertainty="stippling"
).
# regular map of ensemble model (default: plot the predicted values with no uncertainty)
plot_prediction(output=df_day,
ylim=NULL,ylab="Zooplankton \ncommunity \ntransparency",
ycolors=c("#2D2B63","#FB3C44","#F7FF84"),
title="Day - Zooplankton community transparency",
land=df_land,landfill="beige")
# with lightness correction and a different scale bar (based on quantiles, rounded to closest multiple of 5)
# note that indicating a custom scale forces all outbound values to have the min and max color (oob_squish)
<- round(quantile(df_day$predictedvalue,c(0.025,0.975),na.rm=TRUE)/5)*5
ylim plot_prediction(output=df_day,
ylim=ylim,ylab="Zooplankton \ncommunity \ntransparency",
ycolors=c("#2D2B63","#FB3C44","#F7FF84"),
correct.lightness=TRUE,
title="Day - Zooplankton community transparency",
land=NULL,landcolor="grey85",landfill="#E8E8E8")
# add observations (only for the measurement values and not uncertainty)
# note that if you have defined a different ylim, it will also apply to the observations
plot_prediction(output=df_day,
ylim=NULL,ylab="Zooplankton community \ntransparency",
ycolors=c("#2D2B63","#FB3C44","#F7FF84"),
correct.lightness=TRUE,legend_position="right",
# exclude one very low observation
obs=TRUE,obs_data=filter(df_obs,measurementvalue > 120),obs_size=2,
latitude_profile=FALSE)
# with a latitudinal profile
plot_prediction(output=df_day,
ylim=ylim,ylab="Zooplankton community \ntransparency",
ycolors=c("#2D2B63","#FB3C44","#F7FF84"),
correct.lightness=TRUE,legend_position="bottom",
latitude_profile=TRUE,latitude_position="left")
# regular map of uncertainty
plot_prediction(output=df_day, plot_value="predictedsd",
ylim=NULL,ylab="Zooplankton \ncommunity \ntransparency \nstd",
ycolors=c("lightblue","red"),
title="Day - Zoo community transparency standard deviation",
land=df_land,landfill="beige")
plot_prediction(output=df_day, plot_value="predictedsd",
ylim=NULL,ylab="Zooplankton \ncommunity \ntransparency \nstd",
ycolors=c("#440154", "#21908D","#FDE725"),
title="Day - Zoo community transparency standard deviation")
# add contour lines
<- round(quantile(df_day$predictedsd,c(0.025,0.975),na.rm=TRUE))
xlim plot_prediction(output=df_day, plot_value="predictedsd",
ylim=xlim,ylab="Zooplankton \ncommunity \ntransparency \nstd",
ycolors=c("#440154", "#21908D","#FDE725"),
contour=TRUE,contour_levels=5, contour_color="black",
title="Day - Zoo community transparency standard deviation")
Bivarmap
Stippling
# threshold: mean of standard deviations + standard deviation of standard deviations
=mean(df_day$predictedsd) + 1*sd(df_day$predictedsd)
threshold<- length(which(df_day$predictedsd < threshold))*100/nrow(df_day) # percentage of values below threshold
perc_below_threshold %>%
df_day ggplot(aes(x=predictedsd)) +
geom_histogram(color="white",fill="black") +
geom_vline(aes(xintercept=threshold),color="darkred") +
theme_minimal()
# here xlab is used as the threshold label
plot_prediction(output=df_day,
plot_uncertainty="stippling",uncertainty="predictedsd",
ylim=ylim,
xlab=paste0("sd > ", round(threshold,2), " (~ ", 100-round(perc_below_threshold,1), "%)"),
ylab="Zooplankton community \ntransparency",
threshold=threshold,
ycolors=c("#2D2B63","#FB3C44","#F7FF84"),
stippling_bin=2.5,stippling_size=0.5,stippling_color="black",
correct.lightness=TRUE,
latitude_profile=FALSE,legend_position="bottom")
Anomaly
The last possibility is to calculate an anomaly between two model results (two models, two months or two varfactors for example). The first dataset (i.e. output) is the reference model, and the anomaly_data is the dataset for which we want to compute the anomaly.
Example: output = df_day, anomaly_data = df_night will compte the night anomaly as Night - Day
# threshold: mean of standard deviations + standard deviation of standard deviations
=mean(sqrt(df_night$predictedsd^2+df_day$predictedsd^2)) + 1*sd(sqrt(df_night$predictedsd^2+df_day$predictedsd^2))
threshold<- length(which(sqrt(df_night$predictedsd^2+df_day$predictedsd^2) < threshold))*100/nrow(df_day) # percentage of values below
perc_below_threshold
plot_prediction(output=df_day,
plot_uncertainty="stippling",uncertainty="predictedsd",
ylim=NULL,title="Night anomaly",
xlab=paste0("sd > ", round(threshold,2), " (~ ", 100-round(perc_below_threshold,1), "%)"),
ylab="Zooplankton night \ncommunity transparency \nanomaly",
threshold=threshold,
ycolors=c("red","white","blue"),
stippling_bin=2.5,stippling_size=0.5,stippling_color="black",
anomaly=TRUE,anomaly_data=df_night,
latitude_profile=FALSE,legend_position="bottom")