ggplot: how to plot heatmap regardless of the number of variables
使用
下的
数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | df <- read.table(text = c(" NA NA NA NA NA NA NA NA NA NA NA NA 0.4748 NA NA NA NA NA NA NA NA NA NA NA 0.905 0.5362 NA NA NA NA NA NA NA NA NA NA 0.0754 0.0118 0.0614 NA NA NA NA NA NA NA NA NA 0.8768 0.3958 0.7952 0.1034 NA NA NA NA NA NA NA NA 0.5708 0.2056 0.4984 0.2356 0.6736 NA NA NA NA NA NA NA 0.2248 0.6204 0.268 0.0014 0.183 0.0768 NA NA NA NA NA NA 0.483 0.9824 0.5314 0.0114 0.3906 0.1968 0.6308 NA NA NA NA NA 0.697 0.732 0.7604 0.0264 0.594 0.3334 0.416 0.7388 NA NA NA NA 0.2918 0.7286 0.3382 0.003 0.2386 0.1122 0.8712 0.7266 0.509 NA NA NA 0.5904 0.8352 0.6704 0.0188 0.4966 0.273 0.5192 0.8328 0.8736 0.5914 NA NA 0.3838 0.8768 0.4476 0.0042 0.3148 0.1498 0.7288 0.873 0.6178 0.8276 0.7432 NA "), header = F) colnames(df) <- c("TK1", "TK2", "TK3", "TK4" ,"TK5", "TK6", "TK7", "TK8", "TK9", "TK10","TK11","TK12") rownames(df) <- c("TK1", "TK2", "TK3", "TK4" ,"TK5", "TK6", "TK7", "TK8", "TK9", "TK10","TK11","TK12") df # TK1 TK2 TK3 TK4 TK5 TK6 TK7 TK8 TK9 TK10 TK11 TK12 #TK1 NA NA NA NA NA NA NA NA NA NA NA NA #TK2 0.4748 NA NA NA NA NA NA NA NA NA NA NA #TK3 0.9050 0.5362 NA NA NA NA NA NA NA NA NA NA #TK4 0.0754 0.0118 0.0614 NA NA NA NA NA NA NA NA NA #TK5 0.8768 0.3958 0.7952 0.1034 NA NA NA NA NA NA NA NA #TK6 0.5708 0.2056 0.4984 0.2356 0.6736 NA NA NA NA NA NA NA #TK7 0.2248 0.6204 0.2680 0.0014 0.1830 0.0768 NA NA NA NA NA NA #TK8 0.4830 0.9824 0.5314 0.0114 0.3906 0.1968 0.6308 NA NA NA NA NA #TK9 0.6970 0.7320 0.7604 0.0264 0.5940 0.3334 0.4160 0.7388 NA NA NA NA #TK10 0.2918 0.7286 0.3382 0.0030 0.2386 0.1122 0.8712 0.7266 0.5090 NA NA NA #TK11 0.5904 0.8352 0.6704 0.0188 0.4966 0.2730 0.5192 0.8328 0.8736 0.5914 NA NA #TK12 0.3838 0.8768 0.4476 0.0042 0.3148 0.1498 0.7288 0.8730 0.6178 0.8276 0.7432 NA |
我无法更改输入数据。每次根据用户,我都会使用不同的变量以这种格式继续获取它。
我使用下面的代码创建了一个新变量
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | trial <- df trial$Relationship <- rownames(df) trial1 <- subset(trial, select = c(13, 1, 2, 3,4,5,6,7,8,9,10,11,12)) df2 <- gather(trial1,"Relation1","Strength", 2:13) df2 <- df2 %>% dplyr::mutate(Strength1 = round(Strength, digits = 2))%>% dplyr::select(Relationship,Relation1, Strength1 ) df3 <- df2 %>% extract(Relationship, into = c("Relationship1","Relationship2"),"(\\\\D+)(\\\\d+)", remove = FALSE, convert=TRUE) %>% mutate(Relationship = factor(Relationship, levels = paste0(Relationship1[1], min(Relationship2):max(Relationship2)))) %>% select(-Relationship1, -Relationship2) %>% extract(Relation1, into = c("Relation11","Relation12"),"(\\\\D+)(\\\\d+)", remove = FALSE, convert=TRUE) %>% mutate(Relation1 = factor(Relation1, levels = paste0(Relation11[1], min(Relation12):max(Relation12)))) %>% select(-Relation11, -Relation12) df3$Relation1 = with(df3, factor(Relation1, levels = rev(levels(Relation1)))) ggheatmap <- ggplot(df3, aes(Relationship, Relation1, fill = Strength1))+ geom_tile(color ="white")+ scale_fill_gradient2(low ="red", high ="green", mid ="lightgreen", midpoint = 0.5, limit = c(0,1), space ="Lab", name="Correlation") + theme_minimal() ggheatmap + geom_text(aes(Relationship, Relation1, label = Strength1), color ="black", size = 4) + labs(x = expression(""), y=expression("")) |
结果
问题
我想使热图的绘制动态化。因此,无论变量数量和观察结果如何,都可以绘制热图,而无需为不同数量的变量更改代码?
反正有这样做吗?
在这种情况下,我觉得您的方法circuit回(我用值(ggplot2)表示热图)。这段代码只需要colnames(df)和rownames(df)。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | library(reshape2); library(ggplot2) df2 <- melt(as.matrix(df), id.var = names(df)[1]) # as.matrix() fixes colnames of long df. df2$Var2 <- with(df2, factor(Var2, levels=rev(levels(Var2)))) ggheatmap <- ggplot(df2, aes(Var1, Var2, fill=value)) + geom_tile(color ="white")+ scale_fill_gradient2(low ="red", high ="green", mid ="lightgreen", midpoint = 0.5, limit = c(0,1), space ="Lab", name="Correlation") + theme_minimal() ggheatmap + geom_text(aes(label = round(value, 2)), color ="black", size = 4) + labs(x = expression(""), y=expression("")) |
1 2 3 | library(ggplot2) library(tidyr) library(dplyr) |
此代码块不管列和行的数量如何都起作用
1 2 3 4 5 6 | df <- df %>% mutate(Relationship = rownames(.)) %>% #Replaces trial$Relationship <- rownames(df) select(Relationship, everything()) %>% #Replaces trial1 <- subset(trial, select = c(13, 1, 2, 3,4,5,6,7,8,9,10,11,12)) gather('Relation1', 'Strength', -1) %>% #Replaces df2 <- gather(trial1,"Relation1","Strength", 2:13) mutate(Strength = round(Strength, digits = 2)) |
下面的代码块是获取列的因子级别的更简洁的方法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | # Order Relatinoship variables by numeric suffix # Since its a square matrix you only have to do it once for both columns factorLevels <- df %>% select(Relationship) %>% distinct() %>% extract(Relationship, into = c("TK","num"),"(\\\\D+)(\\\\d+)", remove = FALSE, convert=TRUE) %>% arrange(num) %>% select(Relationship) df <- df %>% mutate(Relationship = factor(Relationship, levels = factorLevels$Relationship), Relation1 = factor(Relation1, levels = rev(factorLevels$Relationship))) |
修改后的绘图代码
1 2 3 4 5 6 7 8 9 10 | ggheatmap <- ggplot(df, aes(Relationship, Relation1, fill = Strength))+ geom_tile(color ="white")+ scale_fill_gradient2(low ="red", high ="green", mid ="lightgreen", midpoint = 0.5, limit = c(0,1), space ="Lab", name="Correlation") + theme_minimal() ggheatmap + geom_text(aes(Relationship, Relation1, label = Strength), color ="black", size = 4) + labs(x = expression(""), y=expression("")) |