How would you plot a regression in 3D that has an interaction and its CIs?
首先这是我的数据:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | df<-data.frame(fpergandei=c(0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), mintemp2=c(1.29569224,1.40509105,0.74869815,1.51448987,2.49907923,-1.00168292, -0.01709357,-0.34529002,-0.56408765,-0.01709357,0.20170407,0.42050170,-1.00168292, -1.98627227,0.09230525,-0.23589120,-0.78288528,-0.89228410,-0.01709357,1.95208514, 0.31110289,0.31110289,0.09230525,0.96749579,-0.23589120,-0.01709357,-1.22048055, -0.23589120,-0.45468883,-0.67348647,-1.65807582,-0.45468883), moist2=c(0.24947803,-1.17491998,0.05498936,-1.58242006,-1.46943140,2.16472842,1.64979649, 1.65535331,-1.00821540,0.40506897,-0.22840843,-0.28027207,0.92370544,1.04039865, 1.36639871,1.32564870,-0.18210160,-0.25248798,-0.81557900,-0.72481762,-1.75282919, -0.58219259,-0.27286298,-0.07281749,0.07536436,-0.04688566,-0.41363574,-0.54699940, -0.19691978,0.89036452,0.84035315,-1.03785177)) df fpergandei mintemp2 moist2 1 0 1.29569224 0.24947803 2 0 1.40509105 -1.17491998 3 0 0.74869815 0.05498936 4 0 1.51448987 -1.58242006 5 0 2.49907923 -1.46943140 6 0 -1.00168292 2.16472842 7 0 -0.01709357 1.64979649 8 0 -0.34529002 1.65535331 9 1 -0.56408765 -1.00821540 10 1 -0.01709357 0.40506897 11 1 0.20170407 -0.22840843 12 1 0.42050170 -0.28027207 13 0 -1.00168292 0.92370544 14 0 -1.98627227 1.04039865 15 0 0.09230525 1.36639871 16 0 -0.23589120 1.32564870 17 1 -0.78288528 -0.18210160 18 1 -0.89228410 -0.25248798 19 1 -0.01709357 -0.81557900 20 1 1.95208514 -0.72481762 21 1 0.31110289 -1.75282919 22 1 0.31110289 -0.58219259 23 1 0.09230525 -0.27286298 24 1 0.96749579 -0.07281749 25 1 -0.23589120 0.07536436 26 1 -0.01709357 -0.04688566 27 1 -1.22048055 -0.41363574 28 1 -0.23589120 -0.54699940 29 1 -0.45468883 -0.19691978 30 1 -0.67348647 0.89036452 31 1 -1.65807582 0.84035315 32 1 -0.45468883 -1.03785177 |
请注意,我将" fpergandei"分解为两个级别,分别为1和0。
因此,我最近运行了带有两个连续解释变量的GLM二项式。结果产生了两个解释变量之间的显着相互作用。我最终使用persp()函数绘制模型
1 2 3 4 5 6 7 8 9 10 | mylogit<- glm(fpergandei~mintemp2*moist2,data=fedelog,family="binomial") press_grid <- seq(-2.2, 2.2, by = 0.1) v_grid <- seq(-2.2, 2.2, by = 0.1) newdat <- data.frame(mintemp2 = rep(press_grid, times = length(v_grid)), moist2 = rep(v_grid, each = length(press_grid))) pred <- predict.glm(mylogit, newdata = newdat, type="response") z <- matrix(pred, length(press_grid)) res<-persp(press_grid, v_grid, z, xlab ="Min. Temperature", ylab = "Moisture", zlab ="Predicted Probability", main ="Plot Name", theta = 60, phi = 27, col = mycol) |
我还弄清楚了如何使用persp函数绘制置信区间。
1 2 3 4 5 6 7 | pred2 <- predict.glm(mylogit, newdata = newdat, type="response", se.fit = TRUE)#with confinterval pred$se.fit #standard errors for all predicted values CIlow <- exp(pred-1.96*pred2$se.fit)/(1+exp(pred-1.96*pred2$se.fit)) #calculating lower confidence interval CIup <- exp(pred+1.96*pred2$se.fit)/(1+exp(pred+1.96*pred2$se.fit)) #calculating upper confidence intervals |
我的问题是,当我绘制上下置信区间的表面时,它们相互重叠,上下重叠:
1 2 3 4 5 6 7 8 9 10 11 | res<-persp(press_grid, v_grid, ci.low, xlab ="Min. Temperature", ylab = "Moisture", zlab ="Predicted Probability", main ="Lower Confidence Interval", theta = 60, phi = 27, col ="grey") #surface w/ CI low par(new=TRUE) res<-persp(press_grid, v_grid, z, xlab ="Min. Temperature", ylab = "Moisture", zlab ="Predicted Probability", main ="Logistic Curve)", theta = 60, phi = 27, col = mycol) #surface with pred par(new=TRUE) res<-persp(press_grid, v_grid, ci.up, xlab ="Min. Temperature", ylab = "Moisture", zlab ="Predicted Probability", main ="Upper Confidence Interval", theta = 60, phi = 27, col ="grey") #surface w/ CI low |
有什么办法可以使表面图最终不会彼此重叠而只会造成混乱?
这里是密谋的尝试。这种方法的好处是,您可以旋转视图,直到对透视图满意为止
1 2 3 4 5 | library(plotly) mylogit <- glm(fpergandei ~ mintemp2 * moist2, data = df, family ="binomial") |
以下两个向量可以称为" mintemp2"和" moist2"。由于OP
,我保留了以下名称
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | press_grid <- seq(-2.2, 2.2, by = 0.1) v_grid <- seq(-2.2, 2.2, by = 0.1) newdat <- expand.grid(press_grid, v_grid) #the grid results in the same values as the newdat in the OP colnames(newdat) <- c("mintemp2","moist2") pred <- predict.glm(mylogit, newdata = newdat, type="link", se=TRUE) ymin <- mylogit$family$linkinv(pred$fit - 1.96 * pred$se.fit) ymax <- mylogit$family$linkinv(pred$fit + 1.96 * pred$se.fit) fit <- mylogit$family$linkinv(pred$fit) z <- matrix(fit, length(press_grid)) ci.low <- matrix(ymin, length(press_grid)) ci.up <- matrix(ymax, length(press_grid)) plot_ly(x = press_grid, y = v_grid) %>% add_surface(z = z, colorscale = list(c(0,1),c("red","blue"))) %>% add_surface(z = ci.low, opacity = 0.5, showscale = FALSE, colorscale = list(c(0,1),c("grey","grey"))) %>% add_surface(z = ci.up, opacity = 0.5, showscale = FALSE, colorscale = list(c(0,1),c("grey","grey"))) |
这里是一个想法,如何将其绘制为二维,我认为更有效
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | library(tidyverse) data.frame(pred = fit, low = ymin, high = ymax, newdat) %>% filter(moist2 %in% unique(.$moist2)[c(T, rep(F, 3))]) %>% mutate(facet = factor(rep(c("-2.2 - -1.4","-1 - -0.2","0.2 - 1","1.4 - 2.2"), each = length(.$moist2)/4), levels = c("-2.2 - -1.4","-1 - -0.2","0.2 - 1","1.4 - 2.2")), moist2 = as.factor(moist2)) %>% ggplot()+ geom_line(aes(x = mintemp2, y = pred, color = moist2))+ geom_ribbon(aes(x = mintemp2, ymin = low, ymax = high, fill = moist2), alpha = 0.1)+ facet_wrap(~facet, ncol = 2)+ theme_bw()+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) |
通常使用较少的一个变量值,并将范围吐入多个方面。乍一看,可以看到0.2-1 moist2范围改变了概率函数的方向,我认为这很有趣,并强调了这两个变量的相互作用。我在3D图中看不到的东西。