Back to Article
presentation
Download Source

Master Thesis

P-release kinetic as a predictor for P-availability in the STYCS Trials

Author

Marc Jerónimo Pérez y Ropero

In [1]:
#| include: false
#| message: false
#| warning: false

suppressPackageStartupMessages({
  library(multcomp)
  library(car)
  library(tidyr)
  library(lme4)
  library(ggplot2)
  library(ggtext)
  library(ggpmisc)
  library(nlme)
  library(latex2exp)
  library(kableExtra)
  library(broom)
  library(dplyr)
  library(MuMIn)
  library(kableExtra)
  library(dplyr)
  library(lmerTest)
  library(webshot2)

})

options(warn = -1)
load("~/Documents/Master Thesis/Master-Thesis-P-kinetics/data/results_coefficient_analysis")
RES <- readRDS("~/Documents/Master Thesis/Master-Thesis-P-kinetics/data/RES.rds")
d <- d[d$Site!="Cadenazzo",]

D <- RES$D2

# 'd' is your starting dataframe with all 2017-2022 data

# This single command creates your final, ready-to-use dataframe.
D <- D %>%
  # First, convert all categorical columns to factors.
  mutate(across(c(site, Site, Treatment, Rep,rep, block, year, uid, ui, location), as.factor)) %>%
  
  # SECOND, on that same dataframe, scale all the columns that are still numeric.
  mutate(across(where(is.numeric), scale))

# That's it! 'd_prepared_for_modeling' is the only dataframe you need.
# It contains both your factor columns and your scaled numeric columns.
# You can now use it directly in your lmer() models.

fit.soil.PS  <- lmer(PS     ~ soil_0_20_clay+ soil_0_20_pH_H2O + soil_0_20_Corg + soil_0_20_silt + Feox + Alox  + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

# fit.soil.PS2 <- lmer(log(PS)     ~ soil_0_20_clay+ soil_0_20_pH_H2O + soil_0_20_Corg + soil_0_20_silt  + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
fit.soil.k   <- lmer(k           ~ soil_0_20_clay+ soil_0_20_pH_H2O + soil_0_20_Corg + soil_0_20_silt + Feox + Alox + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.soil.kPS <- lmer(I(k*PS)~ soil_0_20_clay+ soil_0_20_pH_H2O + soil_0_20_Corg + soil_0_20_silt + Feox + Alox + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.soil.kPS2<- lmer(I(k*log(PS))~ soil_0_20_clay+ soil_0_20_pH_H2O + soil_0_20_Corg + soil_0_20_silt + Feox + Alox + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients
#| include: false
#| message: false
#| warning: false

fit.soil.CO2 <- lmer(soil_0_20_P_CO2~ soil_0_20_clay+ soil_0_20_pH_H2O + soil_0_20_Corg + soil_0_20_silt + Feox + Alox + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.soil.AAE10<-lmer(soil_0_20_P_AAE10~ soil_0_20_clay+ soil_0_20_pH_H2O + soil_0_20_Corg + soil_0_20_silt + Feox + Alox + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.kin.Yrel     <- lmer(Ymain_rel         ~ k * log(PS)  + (1|year) + (1|Site)  + (1|Site:block), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.grud.CO2.Ynorm     <- lmer(Ymain_norm    ~ log(soil_0_20_P_CO2) + (1|year) + (1|Site)  + (1|Site:block), D, subset = Treatment != "P166")
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

r.squaredGLMM(fit.grud.CO2.Ynorm)
            R2m        R2c
[1,] 0.01170003 0.08311711
#| include: false
#| message: false
#| warning: false

fit.grud.AAE10.Ynorm   <- lmer(Ymain_norm    ~ log(soil_0_20_P_AAE10) + (1|year) + (1|Site)  + (1|Site:block), D, subset = Treatment != "P166")
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

r.squaredGLMM(fit.grud.AAE10.Ynorm)
            R2m       R2c
[1,] 0.08383486 0.3608054
#| include: false
#| message: false
#| warning: false

fit.grud.CO2.AAE10.Ynorm <- lmer(Ymain_norm  ~ log(soil_0_20_P_CO2) * log(soil_0_20_P_AAE10) + (1|year) + (1|Site)  + (1|Site:block), D, subset = Treatment != "P166")
r.squaredGLMM(fit.grud.CO2.AAE10.Ynorm)
           R2m       R2c
[1,] 0.2914964 0.4358532
#| include: false
#| message: false
#| warning: false

# compare with k*log(PS)
fit.kin.Ynorm   <- lmer(Ymain_norm       ~ k*log(PS) + (1|year) + (1|Site)  + (1|Site:block), D, subset = Treatment != "P166")
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

r.squaredGLMM(fit.kin.Ynorm)
            R2m        R2c
[1,] 0.01864865 0.04530061
#| include: false
#| message: false
#| warning: false

fit.kin.Ynorm    <- lmer(Ymain_norm       ~ k * log(PS) + (1|year) + (1|Site)  + (1|Site:block), D, subset = Treatment != "P166")
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.kin.Pexport  <- lmer(annual_P_uptake  ~ k * log(PS) + (1|year) + (1|Site)  + (1|Site:block), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.kin.Pbalance <- lmer(annual_P_balance ~ k * log(PS) + (1|year) + (1|Site)  + (1|Site:block), D)

fit.grud.PS       <- lm(log(PS)         ~ log(soil_0_20_P_CO2) + log(soil_0_20_P_AAE10), D)
fit.grud.k        <- lm(k               ~ log(soil_0_20_P_CO2) + log(soil_0_20_P_AAE10), D)
fit.grud.kPS      <- lm(I(log(k*PS))    ~ log(soil_0_20_P_CO2) + log(soil_0_20_P_AAE10), D)

fit.grud.CO2.Yrel     <- lmer(Ymain_rel       ~ log(soil_0_20_P_CO2) + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.grud.AAE10.Yrel     <- lmer(Ymain_rel       ~ log(soil_0_20_P_AAE10) + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.grud.Yrel     <- lmer(Ymain_rel       ~ log(soil_0_20_P_CO2) * log(soil_0_20_P_AAE10) + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

# this is hopeless, since cannot log becaus of 0's
fit.CO2.Pexport     <- lmer(annual_P_uptake     ~ log(soil_0_20_P_CO2) + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.AAE10.Pexport     <- lmer(annual_P_uptake ~ log(soil_0_20_P_AAE10) + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.grud.Pexport     <- lmer(annual_P_uptake     ~ log(soil_0_20_P_CO2) * log(soil_0_20_P_AAE10) + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.CO2.Pbalance     <- lmer(annual_P_balance      ~ log(soil_0_20_P_CO2)  + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.AAE10.Pbalance     <- lmer(annual_P_balance      ~ log(soil_0_20_P_AAE10)  + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

fit.grud.Pbalance     <- lmer(annual_P_balance      ~ log(soil_0_20_P_CO2) * log(soil_0_20_P_AAE10)  + (1|year) + (1|Site)  + (1|Site:block) + (1|Site:Treatment), D)
boundary (singular) fit: see help('isSingular')
#| include: false
#| message: false
#| warning: false

lmer_models_soil <- list(
  PS = fit.soil.PS,
  k = fit.soil.k,
  'log(k*PS)' = fit.soil.kPS,
  CO2 = fit.soil.CO2,
  AAE10 = fit.soil.AAE10
)



lmer_models_yield_norm <- list(
  "Yn-STP-CO2" = fit.grud.CO2.Ynorm,
  "Yn-STP-AAE10" = fit.grud.AAE10.Ynorm,
  "Yn-STP-GRUD" = fit.grud.CO2.AAE10.Ynorm,
  "Yn-Kinetic" = fit.kin.Ynorm
)
  

lmer_models_balance <- list(
  CO2_Pbalance = fit.CO2.Pbalance,
  AAE10_Pbalance = fit.AAE10.Pbalance,
  Grud_Pbalance = fit.grud.Pbalance,
  Kin_Pbalance = fit.kin.Pbalance
)

lmer_models_export <- list(
  CO2_Pexport = fit.CO2.Pexport,
  AAE10_Pexport = fit.AAE10.Pexport,
  Grud_Pexport = fit.grud.Pexport,
  Kin_Pexport = fit.kin.Pexport
)
In [2]:
#| echo: false
create_coef_table <- function(lmer_models, 
                              covariate_order = NULL, 
                              covariate_labels = NULL, # NEU: Benannter Vektor für Zeilennamen
                              model_labels = NULL,
                              descriptor = "Model") {   # NEU: Benannter Vektor für Spaltennamen

  # Extract coefficients and p-values (Ihre Originalfunktion, keine Änderung hier)
  extract_coef_info <- function(model) {
    # ... (keine Änderung, Ihr Code bleibt hier)
    coef_matrix <- summary(model)|> coef()
    estimates <- coef_matrix[, 1]
    p_values <- coef_matrix[, ncol(coef_matrix)]
    formatted_coef <- sapply(seq_along(estimates), function(i) {
      est_str <- sprintf("%.3f", estimates[i])
      stars <- if (p_values[i] < 0.001) "***" else
               if (p_values[i] < 0.01) "** " else
               if (p_values[i] < 0.05) "* " else  ""
      paste0(est_str,stars)
    })
    names(formatted_coef) <- rownames(coef_matrix)
    return(formatted_coef)
  }

  # Extract R-squared values (Ihre Originalfunktion, keine Änderung hier)
  extract_r_squared <- function(model) {
    # ... (keine Änderung, Ihr Code bleibt hier)
    r2_values <- MuMIn::r.squaredGLMM(model) # MuMIn:: hinzugefügt für Klarheit
    return(c(
      R2m = sprintf("%.3f", r2_values[1, "R2m"]),
      R2c = sprintf("%.3f", r2_values[1, "R2c"])
    ))
  }

  # Daten extrahieren (Ihr Originalcode)
  all_coefs <- lapply(lmer_models, extract_coef_info)
  all_r_squared <- lapply(lmer_models, extract_r_squared)
  all_covariate_names <- unique(unlist(lapply(all_coefs, names)))

  if (is.null(covariate_order)) {
    covariate_order <- c("(Intercept)", sort(all_covariate_names[all_covariate_names != "(Intercept)"]))
  }
  covariate_order <- covariate_order[covariate_order %in% all_covariate_names]
  final_order <- c(covariate_order, "R2m", "R2c")

  # Matrix erstellen (Ihr Originalcode)
  results_matrix <- matrix("",
                           nrow = length(final_order),
                           ncol = length(lmer_models),
                           dimnames = list(final_order, names(lmer_models)))

  # Matrix füllen (Ihr Originalcode)
  for (model_name in names(lmer_models)) {
    model_coefs <- all_coefs[[model_name]]
    for (covar in names(model_coefs)) {
      if (covar %in% covariate_order) {
        results_matrix[covar, model_name] <- model_coefs[covar]
      }
    }
    r2_values <- all_r_squared[[model_name]]
    results_matrix["R2m", model_name] <- r2_values["R2m"]
    results_matrix["R2c", model_name] <- r2_values["R2c"]
  }

  # --- NEU: Zeilen- und Spaltennamen ersetzen ---
  
  # Ersetze die Zeilennamen (Kovariaten), falls covariate_labels übergeben wurde
  if (!is.null(covariate_labels)) {
    # Finde die Übereinstimmungen in den aktuellen Zeilennamen
    row_matches <- match(rownames(results_matrix), names(covariate_labels))
    # Ersetze nur die, die gefunden wurden
    new_rownames <- rownames(results_matrix)
    new_rownames[!is.na(row_matches)] <- covariate_labels[row_matches[!is.na(row_matches)]]
    rownames(results_matrix) <- new_rownames
  }
  
  # Ersetze die Spaltennamen (Modelle), falls model_labels übergeben wurde
  if (!is.null(model_labels)) {
    col_matches <- match(colnames(results_matrix), names(model_labels))
    new_colnames <- colnames(results_matrix)
    new_colnames[!is.na(col_matches)] <- model_labels[col_matches[!is.na(col_matches)]]
    colnames(results_matrix) <- new_colnames
  }
  
  # --- Ende der neuen Sektion ---

  # Convert to data frame for kable
  results_df <- data.frame("Model" = rownames(results_matrix),
                           results_matrix,
                           check.names = FALSE, # Verhindert, dass R Spaltennamen ändert
                           stringsAsFactors = FALSE)
  
  results_df
}

Introduction

  • In my Internship I studied the current GRUD, particularly Mg, P and K

  • Fertilizer requirement models imply \(Y\sim STP + Clay\) & \(P-\text{Export}\sim STP + Clay\)

  • Currently only stationary measurement of STP are considered

  • Could a kinetic desorption-model better explain the soil status and yield data?

The STYCS Experiment

  • LTE STYCS, all treatment conditions equal except P-fertilization, which is in 6 Levels, 3 were considered(\(P0\),\(P100\),\(P166\))
  • 4 Sites regarded; Ellighausen, Rümlang-Altwi, Oensingen, Zürich-Reckenholz
  • 5 Sites, 4 blocks per site, 6 Treatment-Levels, 4 Repetitions
  • Years 2017-2022 were modelled, kinetic data was collected only for year 2022

A kinetic Approach to P

The net-desorption was modeled using a first-order kinetic equation:

1. The Rate of Release: The change in P over time is proportional to the remaining desorbable P. \[\frac{dP}{dt}=k \times (P^S-P)\]

2. The Solution: When solved, this gives us the equation for the curve: \[P(t)=P^S \times (1-e^{-kt})\]

  • \(P^S\) (PS): The maximum desorbable P pool.
  • \(k\): The first-order rate constant.
In [3]:
#| echo: false
#| message: false
#| warning: false
#| fig-align: "center"
#| fig-height: 7

library(ggplot2)

# Parameters for our ideal curve
PS <- 0.8 # The plateau
k <- 0.1   # The rate constant

# Create plot data
t <- seq(0, 120, length.out = 100)
P <- PS * (1 - exp(-k * t))
df_conceptual <- data.frame(t, P)

# Calculate half-life and tangent parameters
tau <- log(2) / k
P_tau <- PS / 2
slope_tau <- k * PS / 2
intercept_tau <- (PS / 2) * (1 - log(2))

# --- NEW DARKER COLORS ---
dark_purple <- "#9A51E0"
dark_orange <- "#D97F22"
dark_cyan <- "#48B6D5"
dark_green <- "#2ABD4A"

# Create a separate data frame for annotations
annotation_data <- data.frame(
  x = c(50, tau, 25),
  y = c(PS + 0.05, -0.05, 0.5),
  label = c(
    "'PS (The desorbable P pool)'",
    "paste(tau, ' = ', frac(ln(2), k), ' (Half-life)')",
    "paste('Rate at half-life = ', frac(k %*% PS, 2))"
  ),
  color = c(dark_orange, dark_cyan, dark_green), # Using new darker colors
  angle = c(0, 0, 18)
)

# Create the plot
ggplot(df_conceptual, aes(x = t, y = P)) +
  geom_line(color = dark_purple, size = 1.5) + # Using new darker purple
  geom_hline(yintercept = PS, linetype = "dashed", color = dark_orange, size = 1) + # Using new darker orange
  geom_segment(aes(x = tau, y = 0, xend = tau, yend = P_tau), linetype = "dotted", color = dark_cyan) + # Using new darker cyan
  geom_segment(aes(x = 0, y = P_tau, xend = tau, yend = P_tau), linetype = "dotted", color = dark_cyan) + # Using new darker cyan
  geom_abline(intercept = intercept_tau, slope = slope_tau, linetype = "dashed", color = dark_green, size = 1) + # Using new darker green
  geom_text(
    data = annotation_data,
    aes(x = x, y = y, label = label, color = I(color), angle = angle),
    size = 5,
    parse = TRUE
  ) +
  labs(
    title = "Simulated Kinetic Model",
    x = "Time (t)",
    y = "P desorbed (mg/L)"
  ) +
  theme_minimal(base_size = 16) +
  theme(
    plot.title = element_text(hjust = 0.5)
  ) +
  coord_cartesian(ylim = c(0, 1), clip = "off")

Adapted Kinetic-Experiment Setup

In [4]:
#| label: fig-replicability-setup
#| echo: false
#| message: false
#| warning: false
#| include: false 
# This chunk does all calculations for the next 3 slides and is hidden

library(dplyr); library(ggplot2); library(nlme); library(latex2exp)

Res <- nlsList(Pv.mg.L. ~ PS * (1 - exp(-k * (t.dt))) | uid, d[, c("Pv.mg.L.", "ui", "t.dt","uid")],  start=list(PS=0.1,k=0.2))
# summary(Res)
# d$nls_pred <- predict(Res)

# Extract coefficients from the nlsList results
nls_coefs <- coef(Res)
nls_coefs$uid <- rownames(nls_coefs)


d_plot <- merge(d, nls_coefs, by = "uid")

# Most straightforward approach - create curves manually
time_seq <- seq(min(d$t.dt, na.rm = TRUE), max(d$t.dt, na.rm = TRUE), length.out = 100)

# Create prediction data
pred_data <- d_plot %>%
  select(uid, Site, Treatment, Repetition, PS, k,ui,Pv_Olsen.mg.L.,Pv_labile.mg.L.) %>%
  distinct() %>%
  crossing(t.dt = time_seq) %>%
  mutate(pred_Pv = PS * (1 - exp(-k * (t.dt))))

hline_data <- d_plot %>%
  select(Site, Treatment, Repetition, Pv_Olsen.mg.L., Pv_labile.mg.L.) %>%
  distinct()

# Step 1: Prepare data and fit the flawed linearized model
d_linear <- d_plot %>%
  mutate(
    flawed_PS = Pv_Olsen.mg.L.,
    Y1 = log(1 - (Pv.mg.L. / flawed_PS))
  ) %>%
  filter(is.finite(Y1))

res_linear <- lmList(Y1 ~ t.dt | uid, data = d_linear, na.action = na.pass)
coefs_linear <- coef(res_linear)
coefs_linear$k_flawed <- -coefs_linear$t.dt
coefs_linear$uid <- rownames(coefs_linear)

# Step 2: Generate predictions from the flawed model
pred_data_flawed <- d_plot %>%
  select(uid, Site, Treatment, Repetition, Pv_Olsen.mg.L.) %>%
  distinct() %>%
  left_join(select(coefs_linear, uid, k_flawed), by = "uid") %>%
  crossing(t.dt = seq(0, 60, length.out = 100)) %>%
  mutate(pred_Pv_flawed = Pv_Olsen.mg.L. * (1 - exp(-k_flawed * t.dt)))

# Step 3: Define the base plot object
p_base <- ggplot() +
  geom_point(data = d_plot, aes(y = Pv.mg.L., x = t.dt, col = Repetition), alpha = 0.7) +
  facet_grid(Treatment ~ Site, scales = "free_y") +
  labs(x = TeX("$Time (min)$"), y = TeX("$P_{V}(\\frac{mg}{L})$")) +
  theme_bw(base_size = 14)

Method Validation

In [5]:
#| echo: false
#| message: false
#| warning: false
#| fig-height: 9
#| fig-width: 16
p_base + labs(title = "Step 1: Raw Experimental Data")

Method Validation

In [6]:
#| echo: false
#| message: false
#| warning: false
#| fig-height: 9
#| fig-width: 16
p_base +
  geom_line(data = pred_data_flawed, aes(x = t.dt, y = pred_Pv_flawed, color = Repetition), linetype = "dashed", size = 1) +
  labs(title = "Step 2: The Flawed Linearized Model Fit")

Method Validation

In [7]:
#| echo: false
#| message: false
#| warning: false
#| fig-height: 9
#| fig-width: 16
p_base +
  geom_line(data = pred_data, aes(x = t.dt, y = pred_Pv, color = Repetition), size = 1) +
  labs(title = "Step 3: The Successful Non-Linear Model Fit")

Method Validation

In [8]:
#| echo: false
#| message: false
#| warning: false
#| fig-height: 9
#| fig-width: 16

# Create the data for the hline
hline_data_comparison <- d_plot %>%
  select(Site, Treatment, Repetition, Pv_Olsen.mg.L., Pv_labile.mg.L.) %>%
  mutate(flawed_estimate = Pv_Olsen.mg.L. - Pv_labile.mg.L.) %>%
  distinct()

# Create the final comparison plot
p_base +
  # Plot 1: Our successful non-linear model fit
  geom_line(data = pred_data, aes(x = t.dt, y = pred_Pv, col = Repetition), size = 1) +
  
  # Plot 2: The flawed P-Olsen - P-labile estimate
  geom_hline(
    data = hline_data_comparison,
    aes(yintercept = flawed_estimate, color = Repetition),
    linetype = "dashed", size = 1.2
  ) +
  
  labs(
    title = "Successful Fit vs. Flawed Assumption",
    subtitle = "Solid lines = Our successful model | Dashed lines = Flawed P-Olsen estimate"
  )

The Competing Predictors

We compared two approaches to predict agronomic outcomes:

The Standard Method

  • \(P_{CO2}\)
  • \(P_{AAE10}\)

These are static “snapshots” of the soil’s P capacity.

The Kinetic Method

  • PS (\(P^S\)): The size of the available P pool.
  • k: The rate of P release.
  • \(J_0=k*P^S\) The maximal desorption speed/initial Flow

This approach measures P as a dynamic process.

Measuring Success: The Key Outcomes

We tested the models against three agronomic metrics:

1. Normalized Yield (\(Y_{norm}\)) - How well did the crop perform relative to its maximum potential at that specific site?

2. P-Export (\(P_{up}\)) - How much phosphorus did the crop remove from the field?

3. P-Balance (\(P_{bal}\)) - What is the long-term surplus or deficit of P in the soil? This is a key indicator of sustainability.

How We Compared the Models

To ensure a fair and robust comparison, we used a consistent statistical approach:

1. Linear Mixed-Effects Models (lmer) - We built a separate model for each agronomic outcome (Yield, P-Export, P-Balance). - This approach accounts for the nested structure of the STYCS experiment (sites, years, blocks).

2. Standardized Coefficients (β) - All numeric variables were scaled and centered (mean=0, sd=1). - This allows us to directly compare the effect size of each predictor. A larger coefficient means a stronger effect.

3. The Comparison - In the following tables, each column represents a separate model where we test a different set of predictors.

What Do the P Metrics Actually Measure?

A robust P metric should reflect both the soil’s inherent properties (like texture and pH) and the impact of management (fertilization). We modeled each metric to see what drives it.

In [9]:
#| echo: false
#| message: false
#| warning: false

highlight_significant_cells <- function(df) {
  df_highlighted <- df

  for (col in names(df)[-1]) {
    df_highlighted[[col]] <- cell_spec(df[[col]],
      format = "html",
      color = case_when(
        grepl("\\*\\*\\*", df[[col]]) ~ "red",
        grepl("\\*\\*", df[[col]])    ~ "orange",
        grepl("\\*", df[[col]])       ~ "yellow",
        TRUE                          ~ "white"
      ),
      bold = grepl("\\*", df[[col]])
    )
  }

  return(df_highlighted)
}

# Define the new, clean names for the table
lmer_models_soil <- list(
  "m.PS" = fit.soil.PS,
  "m.k" = fit.soil.k,
  "m.log(k*PS)" = fit.soil.kPS,
  "m.PCO2" = fit.soil.CO2,
  "m.PAAE10" = fit.soil.AAE10
)
covariate_labels_soil_props <- c(
  "(Intercept)" = "Intercept",
  "soil_0_20_Corg" = "C-org",
  "soil_0_20_clay" = "Clay",
  "soil_0_20_silt" = "Silt",
  "soil_0_20_pH_H2O" = "pH",
  "Fed" = "Fed",
  "Ald" = "Ald",
  "R2m" = "marginal R²",
  "R2c" = "conditional R²"
)
model_labels_soil <- c(
  "m.PCO2" = "P-CO2",
  "m.PAAE10" = "P-AAE10",
  "m.PS" = "PS",
  "m.k" = "k",
  "m.log(k*PS)" = "J0"
)
results_table_soil <- create_coef_table(
  lmer_models_soil,
  covariate_labels = covariate_labels_soil_props,
  model_labels = model_labels_soil
)


results_table_soil <- results_table_soil[-1, ]

# Apply formatting and display
coef_table_soil_highlighted <- highlight_significant_cells(results_table_soil)
kable(coef_table_soil_highlighted,format.args = list(math_method = "mathjax"),
      escape = FALSE, format = "html", row.names = FALSE) %>%
  kable_styling(full_width = FALSE, position = "left")
Model PS k J0 P-CO2 P-AAE10
Alox 0.136 -0.660 -1.204 -0.034 -0.319
Feox -0.098 0.020 -0.571 -0.164 -0.138
Clay -0.062 -1.733** 0.611 -0.007 -0.121
C-org 0.351* 1.044** -0.412 0.166 0.232
pH -0.058 -0.280 0.094 0.075 0.057
Silt -0.046 0.252 0.113 -0.084 0.012
marginal R² 0.175 0.204 0.224 0.125 0.280
conditional R² 0.894 0.963 0.976 0.724 0.832

Predicting Yield: The Standard Test Excels

First, we tested which P metric was best at predicting site-normalized yield (\(Y_{norm}\)).

In [10]:
#| echo: false
#| message: false
#| warning: false

# Define the models for this comparison

lmer_models_yield <- list(
  "P_CO2" = fit.grud.CO2.Ynorm,
  "P_AAE10" = fit.grud.AAE10.Ynorm,
  "GRUD" = fit.grud.CO2.AAE10.Ynorm,
  "Kinetic" = fit.kin.Ynorm
)



# Define the clean labels for the table
covariate_labels <- c(
  "(Intercept)" = "Intercept",
  "log(soil_0_20_P_CO2)" = "P-CO2",
  "log(soil_0_20_P_AAE10)" = "P-AAE10",
  "k" = "k",
  "log(PS)" = "PS",
  "k:log(PS)" = "J0",
  "log(soil_0_20_P_CO2):log(soil_0_20_P_AAE10)" = "P-CO2 * P-AAE10",
  "R2m" = "marginal R²",
  "R2c" = "conditional R²")

model_labels_yield <- c(
  "GRUD" = "GRUD Model",
  "Kinetic" = "Kinetic Model",
  "P_CO2" = "P-CO2 only",
  "P_AAE10" = "P-AAE10 only"
)

# Create and filter the table
results_table_yield <- create_coef_table(
  lmer_models_yield,
  covariate_labels = covariate_labels,
  model_labels = model_labels_yield
)
results_table_yield <- results_table_yield[-1, ] # Remove intercept

# Display the table
coef_table_yield_highlighted <- highlight_significant_cells(results_table_yield)
kable(coef_table_yield_highlighted,
     escape = FALSE, format = "html", row.names = FALSE,
    format.args = list(math_method = "mathjax")) %>%
  kable_styling(full_width = FALSE, position = "left")
Model P-CO2 only P-AAE10 only GRUD Model Kinetic Model
k 0.166
J0 -0.012
PS 0.066
P-AAE10 0.067* 0.432**
P-CO2 0.027 -0.128
P-CO2 * P-AAE10 0.149*
marginal R² 0.012 0.084 0.291 0.019
conditional R² 0.083 0.361 0.436 0.045

Predicting P-Export: A mixed Picture

In [11]:
#| label: create-p-export-table-image
#| echo: false
#| message: false
#| warning: false

# Define models and labels
# ... (use the same covariate_labels and model_labels as before) ...
lmer_models_export <- list(
  "P_CO2" = fit.CO2.Pexport,
  "P_AAE10" = fit.AAE10.Pexport,
  "GRUD" = fit.grud.Pexport,
  "Kinetic" = fit.kin.Pexport
)




# Create, filter, and highlight the table
results_table_export <- create_coef_table(lmer_models_export, covariate_labels = covariate_labels, model_labels = model_labels_yield)
results_table_export <- results_table_export[-1, ]
coef_table_export_highlighted <- highlight_significant_cells(results_table_export)

# Build the kable object
kable(coef_table_export_highlighted,
      escape = FALSE, format = "html",
      row.names = FALSE,
      format.args = list(math_method = "mathjax")) %>%
  kable_styling(full_width = FALSE, font_size = 18)
Model P-CO2 only P-AAE10 only GRUD Model Kinetic Model
k -0.014
J0 0.080
PS -0.018
P-AAE10 0.025 -0.015
P-CO2 0.087 0.131
P-CO2 * P-AAE10 0.011
marginal R² 0.012 0.001 0.016 0.004
conditional R² 0.654 0.685 0.796 0.789
#| label: create-p-export-table-image
#| echo: false
#| message: false
#| warning: false

# Save the kable object as a PNG image

Observation: Here, the results are less clear. P-Export is a more complex variable to predict, and no single model or predictor stands out as being consistently powerful. ## P-balance model summary:

Predicting P-Balance: The Kinetic Model is Superior

In [12]:
lmer_models_balance <- list(
  "P_CO2" = fit.CO2.Pbalance,
  "P_AAE10" = fit.AAE10.Pbalance,
  "GRUD" = fit.grud.Pbalance,
  "Kinetic" = fit.kin.Pbalance
)





coef_table_balance <- create_coef_table(lmer_models_balance, covariate_labels = covariate_labels, model_labels = model_labels_yield)
coef_table_balance <- coef_table_balance[-1,]
# Apply formatting
coef_table_balance_highlighted <- highlight_significant_cells(coef_table_balance)

# Display with kable
kable(coef_table_balance_highlighted,
      escape = FALSE,
      format = "html",
      row.names = FALSE,
      ) |>
  kable_styling(full_width = FALSE, position = "left")
Model P-CO2 only P-AAE10 only GRUD Model Kinetic Model
k 0.155
J0 -0.151
PS 0.341***
P-AAE10 0.009 0.009
P-CO2 -0.023 -0.029
P-CO2 * P-AAE10 0.030
marginal R² 0.001 0.000 0.006 0.122
conditional R² 0.590 0.762 0.596 0.699

Key Findings

  1. A Robust Kinetic Model was Developed
    • The original linearized method was shown to be invalid for these soils.
    • Our non-linear approach provided a superior and statistically robust fit.
  2. Predictive Power is Purpose-Dependent
    • For predicting yield, the standard soil test (\(P_{AAE10}\)) was the most effective predictor.
  3. Kinetics Excel for Long-Term Assessment
    • For predicting the P-Balance, the kinetic parameter PS (the P pool) was vastly superior, while standard tests showed no predictive power.

The Right Tool for the Right Job

My research concludes that the ideal soil P test depends on the question being asked:

For Routine Fertilization

To answer a farmer’s question: “Is P limiting my yield this year?”

The Standard Soil Tests are effective and well-suited.

For Long-Term Sustainability

To answer a policymaker’s question: “Is this soil’s P status sustainable?”

A Kinetic Approach is the superior and necessary tool.