Pearce et al. 2017 Figures in Partition Coefficient Evaluation

Robert Pearce

2018-01-23

This vignette generates the data and figures from the partition coefficient evaluation paper.

library(httk)
library(gdata)
library(ggplot2)
library(viridis)
library(CensRegMod)
library(gmodels)
library(gplots)
library(scales)
library(colorspace)

We first filter the measured rat Kp data, pc.data. Then the old and new Kp predictions are made, along with error and improvement measures, and these are all consolidated into a table for analysis and plotting. Note that the final table contains log10-transformed values and error and improvements derived from subtracting these values. Only relevant rat values are used. Compounds with Funbound.plasma and partition coefficients of zero are removed as well as compounds with approximated Funbound.plasma values.

pc.table <- NULL
pc.data <- subset(pc.data,fu != 0 & Exp_PC != 0 & Tissue %in% c("Adipose","Bone","Brain","Gut",
    "Heart","Kidney","Liver","Lung","Muscle","Skin","Spleen","Blood Cells") & 
    tolower(Species) == 'rat' & !CAS %in% c('10457-90-6','5786-21-0','17617-23-1','69-23-8','2898-12-6',
    '57562-99-9','59-99-4','2955-38-6','155-97-5','41903-57-5','58-55-9','77-32-7','59-05-2','60-54-8'))
cas.list <- get_cheminfo(model='schmitt',species='rat')
cas.list <-  cas.list[cas.list %in% pc.data[,'CAS']]
ma.data.list <- subset(chem.physical_and_invitro.data,!is.na(logMA))[,'CAS']
for(this.cas in cas.list){
  parameters <- parameterize_schmitt(chem.cas=this.cas,species='rat')
  init.parameters <- parameters
  charge <- calc_ionization(chem.cas=this.cas,pH=7.4)$fraction_charged
  if(!this.cas %in% ma.data.list){
    init.parameters$MA <- 10^(0.999831 - 0.016578*38.7 + 0.881721 * log10(parameters$Pow))
  }
  pcs <- predict_partitioning_schmitt(parameters=parameters,species='rat',regression=F)
  init.pcs <- predict_partitioning_schmitt(parameters=init.parameters,species='rat',regression=F)
  for(this.tissue in subset(pc.data,CAS==this.cas)[,'Tissue']){
    if(this.tissue == 'Blood Cells') this.pc <- 'rbc'
    else this.pc <- this.tissue
    pc.table <- rbind(pc.table,cbind(as.data.frame(this.cas),as.data.frame(this.tissue),
    as.data.frame(log10(init.pcs[[which(substr(names(init.pcs),2,nchar(names(init.pcs))-3)
                                  == tolower(this.pc))]] * init.parameters$Funbound.plasma)),
    as.data.frame(log10(pcs[[which(substr(names(pcs),2,nchar(names(pcs))-3)
                             == tolower(this.pc))]] * parameters$unadjusted.Funbound.plasma)),
    as.data.frame(log10(init.pcs[[which(substr(names(init.pcs),2,nchar(names(init.pcs))-3)
                                  == tolower(this.pc))]] * init.parameters$unadjusted.Funbound.plasma)),
    as.data.frame(log10(pcs[[which(substr(names(pcs),2,nchar(names(pcs))-3)
                             == tolower(this.pc))]] * parameters$Funbound.plasma)),
    as.data.frame(log10(subset(pc.data,CAS==this.cas & Tissue==this.tissue)[,'Exp_PC'])),
    as.data.frame(subset(pc.data,CAS==this.cas & Tissue==this.tissue)[,'LogP']),as.data.frame(charge),
    as.data.frame(as.character(subset(pc.data,CAS == this.cas)[1,'A.B.N'])),
    as.data.frame(subset(pc.data,CAS == this.cas)[1,'fu'])))
  }
}
colnames(pc.table) <- c('CAS','Tissue','fup.adjustment','ma.adjustment','init.Predicted',
                        'Predicted','Experimental','logP','charge','type','fup')
init.error <- pc.table[,'Experimental'] - pc.table[,'init.Predicted']
fup.error <- pc.table[,'Experimental'] - pc.table[,'fup.adjustment']
ma.error <- pc.table[,'Experimental'] - pc.table[,'ma.adjustment']
final.error <- pc.table[,'Experimental'] - pc.table[,'Predicted']
fup.improvement <- abs(init.error) - abs(fup.error)
ma.improvement <- abs(init.error) - abs(ma.error)
final.improvement <- abs(init.error) - abs(final.error)
pc.table <- cbind(pc.table,fup.improvement,ma.improvement, final.improvement,
                  final.error,init.error,ma.error,fup.error)

scientific_10 <- function(x) {                                  
  out <- gsub("1e", "10^", scientific_format()(x))              
  out <- gsub("\\+","",out)                                     
  out <- gsub("10\\^01","10",out)                               
  out <- parse(text=gsub("10\\^00","1",out))                    
}  


init.plot <- ggplot() + geom_point(data=pc.table,aes(10^(init.Predicted),10^(Experimental))) +
    geom_abline() + labs(y=expression(paste("Measured ",K[p])), x=expression(paste("Predicted ",K[p]))) +
    theme(axis.text=element_text(size=16),axis.title=element_text(size=16),
    plot.title=element_text(size=18,hjust = 0.5)) + scale_x_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
    scale_y_log10(label=scientific_10,limits=c(0.01,10^4.5)) + ggtitle('(A)')

final.plot <- ggplot() + geom_point(data=pc.table,aes(10^(Predicted),10^(Experimental))) +
    geom_abline() + labs(y=expression(paste("Measured ",K[p])),x=expression(paste("Predicted ",K[p]))) +
    theme(axis.text=element_text(size=16),axis.title=element_text(size=16),
    plot.title=element_text(size=18,hjust=0.5)) + scale_x_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
    scale_y_log10(label=scientific_10,limits=c(0.01,10^4.5)) + ggtitle('(B)')

fup.change.plot <-  ggplot() +
    geom_point(data=pc.table[order(pc.table[,'fup.improvement'],decreasing=F),],
    aes(10^(fup.adjustment),10^(Experimental),color=fup.improvement)) + geom_abline() +
    labs(y=expression(paste("Measured ",K[p])),x=expression(paste("Predicted ",K[p])),color='Improvement') +
    theme(axis.text=element_text(size=16),axis.title=element_text(size=16)) +
    scale_x_log10(label=scientific_10,limits=c(0.01,10^4.5)) + 
    scale_y_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
    scale_color_viridis(direction=-1,option='inferno')
ma.subset <- subset(pc.table,!CAS %in% ma.data.list)
ma.change.plot <- ggplot() +
    geom_point(data=ma.subset[order(ma.subset[,'ma.improvement'],decreasing=F),]
    ,aes(10^(ma.adjustment),10^(Experimental),color=ma.improvement)) + geom_abline() +
    labs(y=expression(paste("Measured ",K[p])),x=expression(paste("Predicted ",K[p])),color='Improvement') +
    theme(axis.text=element_text(size=16),axis.title=element_text(size=16)) +
    scale_x_log10(label=scientific_10,limits=c(0.01,10^4.5)) + 
    scale_y_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
    scale_color_viridis(direction=-1,option='inferno')

Now we calculate and plot the regressions for all tissues, together with their 95% confidence intervals.

regressions <- NULL

for(tissue in as.character(unique(pc.table[,'Tissue']))){
  fit <- lm(Experimental ~ Predicted ,data=subset(pc.table,Tissue==tissue))
  smry <- summary(fit)
  est <- estimable(fit, cm=diag(2), beta0=c(0,1), joint.test=TRUE)
  regressions <- rbind(regressions,cbind(tissue,as.data.frame(fit$coefficients[['(Intercept)']]),
      as.data.frame(fit$coefficients[['Predicted']]),
      as.data.frame(smry$coefficients[['Predicted','Pr(>|t|)']]),
      as.data.frame(smry$sigma),as.data.frame(smry$r.squared),
      as.data.frame(smry[[11]][1,1]),as.data.frame(smry[[11]][2,2]),
      as.data.frame(smry[[11]][1,2]),as.data.frame(smry$df[2]),as.data.frame(est[[3]])))
}
colnames(regressions) <- c('Tissue','Intercept','Slope','P-value','SE','R-squared',
                           'Int Var','Slp Var','Cov','df','estimable')
x.cf <- seq(-2,3.5,.01)
for(tissue in  as.character(unique(pc.table[,'Tissue']))){
  conf <- qt(0.975,df=subset(regressions,Tissue==tissue)[['df']]+1) *
      subset(regressions,Tissue==tissue)[['SE']] *
      sqrt(subset(regressions,Tissue==tissue)[['Int Var']] +
      x.cf^2 * subset(regressions,Tissue==tissue)[['Slp Var']] +
      2 * x.cf * subset(regressions,Tissue==tissue)[['Cov']] + 1)
  line <- subset(regressions,Tissue==tissue)[['Intercept']] +
      x.cf * subset(regressions,Tissue==tissue)[['Slope']]
  y.cf <- line + conf
  y.ncf <- line - conf

  cf <- cbind(as.data.frame(x.cf),as.data.frame(y.cf),as.data.frame(y.ncf))
  if(tissue == 'Blood Cells'){
    eval(parse(text= paste('Blood <- ggplot() + labs(y=expression(paste("Inferred ",K[p])),
                               x=expression(paste("Predicted ",K[p]))) + geom_abline(linetype = "dashed") +
                               geom_point(data=subset(pc.table,Tissue == \'',tissue,'\'),aes(10^(Predicted),
                               10^(Experimental)))  +  theme(axis.text=element_text(size=14),
                               axis.title=element_text(size=14),plot.title=element_text(size=14,hjust=0.5)) +
                               scale_x_log10(label=scientific_10,limits=c(0.01,1000)) + 
                               scale_y_log10(label=scientific_10,limits=c(0.01,1000)) +
                               geom_line(data=cf,aes(10^(x.cf),10^(y.cf))) +
                               geom_line(data=cf,aes(10^(x.cf),10^(y.ncf))) +
                               geom_abline(intercept=subset(regressions,Tissue==tissue)[[\'Intercept\']],
                               slope=subset(regressions,Tissue==tissue)[[\'Slope\']]) +
                               ggtitle(\'Red Blood Cells\')',sep='')))
  }else{
    eval(parse(text= paste(tissue,' <- ggplot() + labs(y=expression(paste("Measured ",K[p]))
                               ,x=expression(paste("Predicted ",K[p]))) + geom_abline(linetype = "dashed") +
                               geom_point(data=subset(pc.table,Tissue == \'',tissue,'\'),
                               aes(10^(Predicted),10^(Experimental))) + theme(axis.text=element_text(size=14),
                               axis.title=element_text(size=14),plot.title=element_text(size=14,hjust=0.5)) +
                               scale_x_log10(label=scientific_10,limits=c(0.01,1000)) + 
                               scale_y_log10(label=scientific_10,limits=c(0.01,1000)) +
                               geom_line(data=cf,aes(10^(x.cf),10^(y.cf))) +
                               geom_line(data=cf,aes(10^(x.cf),10^(y.ncf))) +
                               geom_abline(intercept=subset(regressions,Tissue==tissue)[[\'Intercept\']],
                               slope=subset(regressions,Tissue==tissue)[[\'Slope\']]) +
                               ggtitle(\'',tissue,'\')',sep='')))
  }
}

In vivo volume of distribution data are compared with the predictions, and errors are calculated. Regressions and improvements are calculated and plotted.

obach <- subset(Obach2008,CAS %in% get_cheminfo(model='schmitt'))
vd.table <- NULL

for(this.cas in obach[,'CAS']){
  parameters <- parameterize_schmitt(chem.cas=this.cas)
  init.parameters <- parameters
  if(!this.cas %in% ma.data.list){
    init.parameters$MA <- 10^(0.999831 - 0.016578*37 + 0.881721 * log10(parameters$Pow))
  }
  pcs <- predict_partitioning_schmitt(parameters=parameters,regression=F)
  init.pcs <- predict_partitioning_schmitt(parameters=init.parameters,regression=F)
  reg.pcs <- predict_partitioning_schmitt(parameters=parameters,regression=T)
  vdist <- calc_vdist(parameters=c(pcs,Funbound.plasma=parameters$Funbound.plasma))
  init.vdist <- calc_vdist(parameters=c(init.pcs,Funbound.plasma=parameters$unadjusted.Funbound.plasma))
  reg.vdist <- calc_vdist(parameters=c(reg.pcs,Funbound.plasma=parameters$Funbound.plasma))
  vd.table <- rbind(vd.table,cbind(as.data.frame(this.cas),as.data.frame(log10(init.vdist)),
                    as.data.frame(log10(vdist)),as.data.frame(log10(reg.vdist)),
                    as.data.frame(log10(subset(obach,CAS==this.cas)[['VDss (L/kg)']]))))
}
colnames(vd.table) <- c('CAS','init.vdist','adjusted.vdist','calibrated.vdist','Experimental')
init.error <- vd.table[,'Experimental'] - vd.table[,'init.vdist']
adjustment.error <- vd.table[,'Experimental'] - vd.table[,'adjusted.vdist']
calibration.error <- vd.table[,'Experimental'] - vd.table[,'calibrated.vdist']
adjustment.improvement <- abs(init.error) - abs(adjustment.error)
calibration.improvement <- abs(adjustment.error) - abs(calibration.error)
vd.table <- cbind(vd.table,adjustment.improvement,calibration.improvement,
                  init.error,adjustment.error,calibration.error)
fit <- lm(Experimental ~ calibrated.vdist,data=vd.table)
smry <- summary(fit)
calibrated.reg <- cbind(as.data.frame(fit$coefficients['(Intercept)']),
                        as.data.frame(fit$coefficients['calibrated.vdist']),
                        as.data.frame(smry$coefficients['calibrated.vdist','Pr(>|t|)']),
                        as.data.frame(smry$sigma),as.data.frame(smry$r.squared))
fit <- lm(Experimental ~ init.vdist,data=vd.table)
smry <- summary(fit)
init.reg <- cbind(as.data.frame(fit$coefficients['(Intercept)']),
                  as.data.frame(fit$coefficients['init.vdist']),
                  as.data.frame(smry$coefficients['init.vdist','Pr(>|t|)']),
                  as.data.frame(smry$sigma),as.data.frame(smry$r.squared))
fit <- lm(Experimental ~ adjusted.vdist,data=vd.table)
smry <- summary(fit)
adjustment.reg <- cbind(as.data.frame(fit$coefficients['(Intercept)']),
                       as.data.frame(fit$coefficients['adjusted.vdist']),
                       as.data.frame(smry$coefficients['adjusted.vdist','Pr(>|t|)']),
                       as.data.frame(smry$sigma),as.data.frame(smry$r.squared))
colnames(init.reg) <- colnames(adjustment.reg) <-
colnames(calibrated.reg) <- c('Intercept','Slope','P-value','Std Err','R-squared')

init.vd.plot <- ggplot(vd.table,aes(10^(init.vdist),10^(Experimental))) + geom_point() +
    geom_abline(intercept = init.reg[['Intercept']], slope = init.reg[["Slope"]]) +
    geom_abline(linetype = "dashed") + xlab("Predicted Volume of Distribution") +
    ylab("Measured Volume of Distribution") + theme(axis.text=element_text(size=16),
    axis.title=element_text(size=16),plot.title=element_text(size=18,hjust = 0.5)) +
    scale_x_log10(label=scientific_10,limits=c(10^(-1.5),10^(8.5))) + 
    scale_y_log10(label=scientific_10,limits=c(10^(-1.5),10^(8.5))) +
    ggtitle('(A)')

adjustment.plot <- ggplot() +
    geom_point(data=vd.table[order(vd.table[,'adjustment.improvement'],decreasing=F),],
    aes(10^(adjusted.vdist),10^(Experimental),color=adjustment.improvement)) +
    geom_abline(intercept = adjustment.reg[['Intercept']],slope = adjustment.reg[["Slope"]]) +
    geom_abline(linetype = "dashed") + xlab("Predicted Volume of Distribution") +
    ylab("Measured Volume of Distribution") + theme(legend.position = c(.95, .95),
    legend.justification = c("right", "top"),legend.box.just = "right",
    legend.margin = margin(6, 6, 6, 6),axis.text=element_text(size=16),
    axis.title=element_text(size=16),plot.title=element_text(size=18,hjust = 0.5)) +
    scale_x_log10(limits=c(10^(-1.5),10^(3))) + scale_y_log10(limits=c(10^(-1.5),10^(3))) +
    ggtitle('(B)') + scale_color_viridis(direction=-1,option='inferno')

calibration.plot <- ggplot() +
    geom_point(data=vd.table[order(vd.table[,'calibration.improvement'],decreasing=F),],
    aes(10^(calibrated.vdist),10^(Experimental),color=calibration.improvement)) +
    geom_abline(intercept = calibrated.reg[['Intercept']],slope = calibrated.reg[["Slope"]]) +
    geom_abline(linetype = "dashed") + xlab("Predicted Volume of Distribution") +
    ylab("Measured Volume of Distribution") + theme(legend.position = c(.95, .95),
    legend.justification = c("right", "top"),legend.box.just = "right",
    legend.margin = margin(6, 6, 6, 6),axis.text=element_text(size=16),
    axis.title=element_text(size=16),plot.title=element_text(size=18,hjust = 0.5)) +
    scale_x_log10(limits=c(10^(-1.5),10^(3))) + scale_y_log10(limits=c(10^(-1.5),10^(3))) +
    ggtitle('(C)') + scale_color_viridis(direction=-1,option='inferno')

Now we pull in the in vivo blood to plasma ratios, use these to calculate the inferred red blood cell to plasma ratios, and then make predictions for these values. A censored regressions is performed, and predictions are plotted against errors.

rb2p.data <- subset(chem.physical_and_invitro.data,!is.na(Human.Rblood2plasma))
measured.rb2p <- NULL
measured.krbc <- NULL
predicted.rb2p <- NULL
predicted.krbc <- NULL
cas <- NULL
charge <- NULL
fup <- NULL
logP <- NULL
pka_donor <- NULL
pka_accept <- NULL
for(this.cas in rb2p.data[rb2p.data[,'CAS'] %in% get_cheminfo(model='schmitt'),'CAS']){
  rb2p <- get_rblood2plasma(chem.cas=this.cas)
  krbc <- (rb2p + .44 - 1) / 0.44
  measured.rb2p <- c(measured.rb2p,rb2p)
  measured.krbc <- c(measured.krbc,krbc)
  parameters <- parameterize_schmitt(chem.cas=this.cas)
  pcs <- predict_partitioning_schmitt(parameters=parameters)
  predicted.krbc <- c(predicted.krbc,pcs[['Krbc2pu']] * parameters$Funbound.plasma)
  cas <- c(cas,this.cas)
  charge <- c(charge,calc_ionization(chem.cas=this.cas,pH=7.4)$fraction_charged)
  fup <- c(fup,parameters$unadjusted.Funbound.plasma)
  logP <-  c(logP,log10(parameters$Pow))
  pka_donor <- c(pka_donor,paste(parameters$pKa_Donor,collapse=','))
  pka_accept <- c(pka_accept,paste(parameters$pKa_Accept,collapse=','))
}
predicted.rb2p <-  1 - 0.44 + 0.44 * predicted.krbc
rb2p.table <- cbind(as.data.frame(cas),as.data.frame(predicted.rb2p),as.data.frame(measured.rb2p))
colnames(rb2p.table) <- c('cas','predicted.rb2p','measured.rb2p')
error <- log10(rb2p.table[,'measured.rb2p']) - log10(rb2p.table[,'predicted.rb2p'])
rb2p.table <- cbind(rb2p.table,error,charge,fup,logP)

error <- log10(measured.krbc) -  log10(predicted.krbc)
krbc.table <- cbind(as.data.frame(cas),as.data.frame(predicted.krbc),as.data.frame(measured.krbc),
                    as.data.frame(error),charge,fup,logP,pka_donor,pka_accept)

pdta <- data.frame(x = predicted.krbc,
                   y = measured.krbc)
pdta$y[pdta$y <= 0.1] <- 0.1
pdta$Censoring <- factor(c("Not Censored","Censored")[as.numeric(pdta$y <= 0.1) + 1])
y <- measured.krbc
x <- cbind(rep(1, length(y)),-1 * log10(predicted.krbc))
colnames(x) <- c("Intercept","Predicted")
cc <- as.numeric(y <= 0.1)
y[y < 0.1] <- 0.1
y <- -log10(y)

out <- em.cens(cc, x, y, dist="Normal")

censored.regression <- ggplot() +
    geom_point(data=pdta,aes(x=x,y=y, color=Censoring)) +
    scale_x_log10(limits=c(.0009,40)) + scale_y_log10(limits=c(.1,4),breaks=c(.1,.5,2.5)) +
    labs(y=expression(paste("Inferred ",K[p])),x=expression(paste("Predicted ",K[p]))) +
    geom_abline(intercept=0, slope=1, linetype='dashed') +
    theme(axis.text=element_text(size=16),axis.title=element_text(size=16),
    plot.title=element_text(size=18,hjust=0.5),legend.position = c(0.11, .8)) +
    geom_abline(slope=out$betas[2],intercept=-out$betas[1]) + ggtitle('(B)')

rb2p.plot <- ggplot(rb2p.table,aes(predicted.rb2p,measured.rb2p)) +
    geom_point()  + scale_x_log10(lim=c(.52,18)) + 
    scale_y_log10(lim=c(.52,2.5),breaks=c(0.5,1,2)) + geom_abline(linetype='dashed') +
    labs(y="Measured B:P Ratio ",
    x="Predicted B:P Ratio") +
    theme(axis.text=element_text(size=16),axis.title=element_text(size=16),
    plot.title=element_text(size=18,hjust=0.5)) + ggtitle('(A)')

Lastly, we make the heatmap.

heatmap.table <- NULL
for(this.cas in get_cheminfo(model='schmitt')){
    parms <- parameterize_schmitt(chem.cas=this.cas)
    pcs <- predict_partitioning_schmitt(parameters=parms)
    heatmap.table <- cbind(heatmap.table,log10(unlist(pcs)[1:11]*parms$Funbound.plasma))
}
rownames(heatmap.table) <-  c('Adipose','Bone','Brain','Gut','Heart',
                              'Kidney','Liver','Lung','Muscle','Skin','Spleen')
colnames(heatmap.table) <- rep("",dim(heatmap.table)[2])

pal <- function (n, h = c(260, -328), c = 80, l = c(30, 100), power = 1.5,
    fixup = TRUE, gamma = NULL, alpha = 1, ...)
{
    if (!is.null(gamma))
        warning("'gamma' is deprecated and has no effect")
    if (n < 1L)
        return(character(0L))
   h <- rep(h, length.out = 2L)
    c <- c[1L]
    l <- rep(l, length.out = 2L)
    power <- rep(power, length.out = 2L)
    rval <- seq(1, -1, length = n)
    rval <- hex(polarLUV(L = l[2L] - diff(l) * abs(rval)^power[2L],
        C = c * abs(rval)^power[1L], H = ifelse(rval > 0, h[1L],
            h[2L])), fixup = fixup, ...)
    if (!missing(alpha)) {
        alpha <- pmax(pmin(alpha, 1), 0)
        alpha <- format(as.hexmode(round(alpha * 255 + 1e-04)),
            width = 2L, upper.case = TRUE)
        rval <- paste(rval, alpha, sep = "")
    }
    return(rval)
}

hclust.ave <- function(x) hclust(x, method="ward.D2")
par(cex.main=1.5,cex.lab=2)
lhei <- c(1,4)
lwid <- c(1.25,2)
lmat <- rbind(c(2,3),c(4,1))
heatmap.2(heatmap.table,dendrogram='column',col=pal,trace="none",
          hclustfun=hclust.ave,key.xlab=expression(paste(log[10],K[p]," Value")),
          key.ylab=expression(paste("Number of ",K[p])),
          key.title="Partition Coefficients",xlab="Chemicals",cex.lab=2,
          lmat=lmat,margins=c(2,5),lwid=lwid,lhei=lhei)