Log-Likelihood Visualization for Archimedean Copulas

Marius Hofert and Martin Mächler

2019-04-21

require(copula)
doExtras <- FALSE

Intro

This vignette visualizes (log) likelihood functions of Archimedean copulas, some of which are numerically challenging to compute. Because of this computational challenge, we also check for equivalence of some of the several computational methods, testing for numerical near-equality using all.equal(L1, L2).

Auxiliary functions

We start by defining the following auxiliary functions.

##' @title [m]inus Log-Likelihood for Archimedean Copulas ("fast version")
##' @param theta parameter (length 1 for our current families)
##' @param acop Archimedean copula (of class "acopula")
##' @param u data matrix n x d
##' @param n.MC if > 0 MC is applied with sample size equal to n.MC; otherwise,
##'        the exact formula is used
##' @param ... potential further arguments, passed to <acop> @dacopula()
##' @return negative log-likelihood
##' @author Martin Maechler (Marius originally)
mLogL <- function(theta, acop, u, n.MC=0, ...) { # -(log-likelihood)
    -sum(acop@dacopula(u, theta, n.MC=n.MC, log=TRUE, ...))
}
##' @title Plotting the Negative Log-Likelihood for Archimedean Copulas
##' @param cop an outer_nacopula (currently with no children)
##' @param u n x d  data matrix
##' @param xlim x-range for curve() plotting
##' @param main title for curve()
##' @param XtrArgs a list of further arguments for mLogL()
##' @param ... further arguments for curve()
##' @return invisible()
##' @author Martin Maechler
curveLogL <- function(cop, u, xlim, main, XtrArgs=list(), ...) {
    unam <- deparse(substitute(u))
    stopifnot(is(cop, "outer_nacopula"), is.list(XtrArgs),
              (d <- ncol(u)) >= 2, d == dim(cop),
              length(cop@childCops) == 0# not yet *nested* A.copulas
              )
    acop <- cop@copula
    th. <- acop@theta # the true theta
    acop <- setTheta(acop, NA) # so it's clear, the true theta is not used below
    if(missing(main)) {
        tau. <- cop@copula@tau(th.)
        main <- substitute("Neg. Log Lik."~ -italic(l)(theta, UU) ~ TXT ~~
               FUN(theta['*'] == Th) %=>% tau['*'] == Tau,
               list(UU = unam,
                TXT= sprintf("; n=%d, d=%d;  A.cop",
                         nrow(u), d),
                FUN = acop@name,
                Th = format(th.,digits=3),
                Tau = format(tau., digits=3)))
    }
    r <- curve(do.call(Vectorize(mLogL, "theta"), c(list(x, acop, u), XtrArgs)),
               xlim=xlim, main=main,
               xlab = expression(theta),
               ylab = substitute(- log(L(theta, u, ~~ COP)), list(COP=acop@name)),
               ...)
    if(is.finite(th.))
        axis(1, at = th., labels=expression(theta["*"]),
             lwd=2, col="dark gray", tck = -1/30)
    else warning("non-finite cop@copula@theta = ", th.)
    axis(1, at = initOpt(acop@name),
         labels = FALSE, lwd = 2, col = 2, tck = 1/20)
    invisible(r)
}

Ensure that we are told about it, if the numerical algorithms choose methods using Rmpfr (R package interfacing to multi precision arithmetic MPFR):

op <- options("copula:verboseUsingRmpfr"=TRUE) # see when "Rmpfr" methods are chosen automatically

Joe’s family

Easy case (\(\tau=0.2\))

## [1] 1.443824

Here, the three different methods work “the same”:

## [1] 1.432898
##    user  system elapsed 
##   1.150   0.003   1.160

##    user  system elapsed 
##   2.213   0.008   2.236
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
##  0.000e+00  4.900e+01  6.430e+02 2.777e+175  1.932e+04 5.553e+177
## [1] 1.449569
##    user  system elapsed 
##   1.081   0.010   1.098

## [1] 1.451916
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
##  0.000e+00  7.500e+01  9.080e+02 1.981e+259  1.434e+04 3.961e+261
## [1] -1789.59
## [1] -3882.819
## [1] -3517.366

Harder case (\(d=150\), \(\tau=0.3\))

## [1] 1.772108
## [1] 1.78459

##    user  system elapsed 
##   1.438   0.013   1.459

Even harder case (\(d=180\), \(\tau=0.4\))

## [1] 2.219066
## [1] 2.217593

Gumbel’s family

Easy case (\(\tau=0.2\))

## [1] 1.25
## [1] 1.227659

##    user  system elapsed 
##   1.640   0.030   1.681

Harder case (\(d=150\), \(\tau=0.6\))

## [1] 2.5
##    user  system elapsed 
##   0.308   0.016   0.326
##    user  system elapsed 
##   0.318   0.015   0.336
##    user  system elapsed 
##   0.317   0.016   0.335
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   41.59   53.30   81.09  116.91  137.54  707.13

Frank’s family (an already hard case)

n <- 64
d <- 5
tau <- 0.8
(theta <- copFrank@iTau(tau))
## [1] 18.19154
cop <- onacopulaL("Frank", list(theta,1:d))
set.seed(11) # these seeds give no problems: 101, 41, 21
U. <- rnacopula(n,cop)
cop@copula <- setTheta(cop@copula, NA) # forget the true theta
system.time(f.ML <- emle(U., cop)); f.ML # --> fine: theta = 18.033, Log-lik = 314.01
##    user  system elapsed 
##   0.048   0.000   0.049
## 
## Call:
## bbmle::mle2(minuslogl = nLL, start = start, optimizer = "optimize", 
##     lower = interval[1], upper = interval[2])
## 
## Coefficients:
##   theta 
## 18.0333 
## 
## Log-likelihood: 314.01
if(doExtras)
    system.time(f.mlMC <- emle(U., cop, n.MC = 1e4)) # with MC
stopifnot(all.equal(unname(coef(f.ML)), 18.03331, tolerance= 1e-6),
      all.equal(f.ML@min, -314.0143, tolerance=1e-6),
      !doExtras || ## Simulate MLE (= SMLE) is "extra" random,  hmm...
      all.equal(unname(coef(f.mlMC)), 17.8, tolerance= 0.01)
      ##           64-bit ubuntu: 17.817523
      ##         ? 64-bit Mac:    17.741
     )

cop@copula <- setTheta(cop@copula, theta)
r. <- curveLogL(cop, U., c(1, 200)) # => now looks fine

tail(as.data.frame(r.), 15)
##          x        y
## 87  172.14 2105.690
## 88  174.13 2143.642
## 89  176.12 2181.637
## 90  178.11 2219.675
## 91  180.10 2257.754
## 92  182.09 2295.874
## 93  184.08 2334.034
## 94  186.07 2372.232
## 95  188.06 2410.468
## 96  190.05 2448.742
## 97  192.04 2487.051
## 98  194.03 2525.396
## 99  196.02 2563.776
## 100 198.01 2602.189
## 101 200.00 2640.636
stopifnot( is.finite( r.$y ),
      ## and is convex (everywhere):
      diff(r.$y, d=2) > 0)
options(op) # revert to previous state

Session information

## R version 3.6.0 RC (2019-04-19 r76407)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Fedora 28 (Workstation Edition)
## 
## Matrix products: default
## BLAS:   /data/gannet/ripley/R/R-patched/lib/libRblas.so
## LAPACK: /data/gannet/ripley/R/R-patched/lib/libRlapack.so
## 
## attached base packages:
##  [1] parallel  grid      stats4    tools     stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
## [1] rugarch_1.4-1     gsl_2.1-6         mev_1.11          lattice_0.20-38  
## [5] bbmle_1.0.20      copula_0.999-19.1
## 
## loaded via a namespace (and not attached):
##  [1] splines_3.6.0               assertthat_0.2.1           
##  [3] expm_0.999-4                yaml_2.2.0                 
##  [5] GeneralizedHyperbolic_0.8-4 numDeriv_2016.8-1          
##  [7] pillar_1.3.1                glue_1.3.1                 
##  [9] DistributionUtils_0.6-0     digest_0.6.18              
## [11] colorspace_1.4-1            sandwich_2.5-1             
## [13] htmltools_0.3.6             Matrix_1.2-17              
## [15] plyr_1.8.4                  pcaPP_1.9-73               
## [17] pkgconfig_2.0.2             ismev_1.42                 
## [19] purrr_0.3.2                 mvtnorm_1.0-10             
## [21] scales_1.0.0                rootSolve_1.7              
## [23] gmm_1.6-2                   tibble_2.1.1               
## [25] nleqslv_3.3.2               ADGofTest_0.3              
## [27] mgcv_1.8-28                 gmp_0.5-13.5               
## [29] bayesplot_1.6.0             ggplot2_3.1.1              
## [31] lazyeval_0.2.2              magrittr_1.5               
## [33] crayon_1.3.4                mclust_5.4.3               
## [35] evaluate_0.13               ks_1.11.4                  
## [37] nlme_3.1-139                MASS_7.3-51.4              
## [39] xts_0.11-2                  truncnorm_1.0-8            
## [41] pspline_1.0-18              stringr_1.4.0              
## [43] revdbayes_1.3.3             munsell_0.5.0              
## [45] stabledist_0.7-1            compiler_3.6.0             
## [47] SkewHyperbolic_0.4-0        evd_2.3-3                  
## [49] rlang_0.3.4                 nloptr_1.2.1               
## [51] ggridges_0.5.1              Rsolnp_1.16                
## [53] Runuran_0.27                spd_2.0-1                  
## [55] partitions_1.9-19           rmarkdown_1.12             
## [57] boot_1.3-22                 gtable_0.3.0               
## [59] polynom_1.4-0               R6_2.4.0                   
## [61] zoo_1.8-5                   knitr_1.22                 
## [63] dplyr_0.8.0.1               KernSmooth_2.23-15         
## [65] stringi_1.4.3               Rcpp_1.0.1                 
## [67] tidyselect_0.2.5            xfun_0.6