如何在ggplot2绘制的CIF图下方添加手动生成的风险表
问题
我正在R环境中绘制死亡与癫痫事件的累积发病率函数(CIF),目前已完成以下操作:
- 使用
cmprsk包构建cuminc模型 - 通过
survminer包的ggcompetingrisks函数绘制CIF图并完成样式调整 - 因数据集规模过大,无法使用
survival等原生支持风险表生成的工具,已手动生成包含风险人数、死亡及癫痫累积事件数的长格式数据risk_tbl_long
现在需要将手动生成的风险表添加到已绘制好的CIF图(cif_plot)下方,实现类似参考图的效果,现有代码如下:
library(survminer) library(cmprsk) library(ggplot2) library(purrr) set.seed(123) # Sample size n <- 1000 # Create test dataset bd_cs <- data.frame( id = 1:n, exposure = factor( sample(c("unexposed", "ZIKV_EXP", "CZS"), n, replace = TRUE), levels = c("unexposed", "ZIKV_EXP", "CZS") ), futime_month = round(runif(n, 0, 48), 2), # follow-up times between 0 and 48 months outcome = factor( sample(c(0, 1, 2), n, replace = TRUE), levels = c(0, 1, 2) ) ) # Check it head(test_data) cif_model <- cmprsk::cuminc(ftime = bd_cs$futime_month, fstatus = bd_cs$outcome, group = bd_cs$exposure, cencode = "0") cif_plot <- survminer::ggcompetingrisks( fit = cif_model, multiple_panels = FALSE, xlab = "\n Age (months)", ylab = "Cumulative incidence of event \n", title = "" ) cif_plot$mapping <- aes(x = time, y = est, color = group, linetype = event) cif_plot <- cif_plot + labs(linetype = "Outcome", color = "Exposure") + geom_line(linewidth = 1) + scale_color_manual( labels = c("CZS", "Unexposed", "ZIKV exposed"), values = c("orange", "magenta", "blue") ) + scale_linetype_manual( values = c("solid", "dotted"), labels = c("Epilepsy", "Death") ) + scale_y_continuous( limits = c(0, 0.6), breaks = seq(0, 0.6,0.05) ) + scale_x_continuous( limits = c(0, 48), breaks = seq(0, 48, 6) ) + theme_bw() #Creating table risk manually time_point <- seq(0, 48, by = 6) risk_tbl <- bd_cs %>% group_split(exposure) %>% map_dfr(function(group_df) { tibble( exposure = unique(group_df$exposure), !!!set_names( map(time_point, function(tp) sum(group_df$futime_month >= tp)), paste0("number.at.risk_", time_point) ), !!!set_names( map(time_point, function(tp) sum(group_df$outcome == "2" & group_df$futime_month <= tp)), paste0("death_", time_point) ), !!!set_names( map(time_point, function(tp) sum(group_df$outcome == "1" & group_df$futime_month <= tp)), paste0("epilepsy_", time_point) ) ) }) risk_tbl_long <- risk_tbl %>% pivot_longer( cols = c(-exposure), names_to = c("data", "time"), names_sep = "_", values_to = "values" )
解决方案
可以使用patchwork包将CIF图和手动生成的风险表垂直拼接,同时保证x轴对齐,具体步骤如下:
1. 安装并加载依赖包
需要额外加载dplyr、tidyr和patchwork包:
library(dplyr) library(tidyr) library(patchwork)
2. 预处理风险表数据
将risk_tbl_long的类型转换、标签调整,确保和CIF图的分组、刻度一致:
# 将time转为数值型,对齐x轴刻度 risk_tbl_long$time <- as.numeric(risk_tbl_long$time) # 替换分组和数据类型标签,匹配CIF图的图例 risk_tbl_long <- risk_tbl_long %>% mutate( exposure = factor(exposure, levels = c("unexposed", "ZIKV_EXP", "CZS"), labels = c("Unexposed", "ZIKV exposed", "CZS")), data = factor(data, levels = c("number.at.risk", "death", "epilepsy"), labels = c("Number at risk", "Cumulative Death", "Cumulative Epilepsy")) )
3. 绘制风险表
用ggplot2将风险表绘制成文本型图表,按数据类型分面:
risk_plot <- ggplot(risk_tbl_long, aes(x = time, y = fct_rev(exposure), label = values)) + # 每个时间点对应位置添加数值文本 geom_text(size = 3.5, color = "black") + # 按数据类型分面,形成多行表格 facet_grid(data ~ ., scales = "free_y", space = "free_y") + # 对齐CIF图的x轴范围和刻度 scale_x_continuous(limits = c(0, 48), breaks = seq(0, 48, 6)) + # 调整主题,移除冗余元素 theme_bw() + theme( axis.text.y = element_text(size = 10), axis.title.y = element_blank(), axis.title.x = element_text(size = 10, margin = margin(t = 10)), axis.text.x = element_blank(), axis.ticks.x = element_blank(), strip.text.y = element_text(angle = 0, hjust = 0, size = 10), panel.grid = element_blank(), panel.border = element_blank(), strip.background = element_rect(fill = "lightgray") ) + xlab("Age (months)")
4. 拼接CIF图和风险表
调整CIF图的x轴标签避免重复,再用patchwork垂直拼接两个图形:
# 隐藏CIF图的x轴标签,避免和风险表重复 cif_plot <- cif_plot + theme(axis.title.x = element_blank()) # 拼接图形,设置CIF图与风险表的高度比例为3:1 combined_plot <- cif_plot + risk_plot + plot_layout(ncol = 1, heights = c(3, 1)) # 查看最终结果 print(combined_plot)
完整整合代码
将所有步骤整合后的完整代码如下:
library(survminer) library(cmprsk) library(ggplot2) library(purrr) library(dplyr) library(tidyr) library(patchwork) set.seed(123) # Sample size n <- 1000 # Create test dataset bd_cs <- data.frame( id = 1:n, exposure = factor( sample(c("unexposed", "ZIKV_EXP", "CZS"), n, replace = TRUE), levels = c("unexposed", "ZIKV_EXP", "CZS") ), futime_month = round(runif(n, 0, 48), 2), # follow-up times between 0 and 48 months outcome = factor( sample(c(0, 1, 2), n, replace = TRUE), levels = c(0, 1, 2) ) ) # 修正原代码的错误:test_data不存在,改为bd_cs head(bd_cs) cif_model <- cmprsk::cuminc(ftime = bd_cs$futime_month, fstatus = bd_cs$outcome, group = bd_cs$exposure, cencode = "0") cif_plot <- survminer::ggcompetingrisks( fit = cif_model, multiple_panels = FALSE, xlab = "\n Age (months)", ylab = "Cumulative incidence of event \n", title = "" ) cif_plot$mapping <- aes(x = time, y = est, color = group, linetype = event) cif_plot <- cif_plot + labs(linetype = "Outcome", color = "Exposure") + geom_line(linewidth = 1) + scale_color_manual( labels = c("CZS", "Unexposed", "ZIKV exposed"), values = c("orange", "magenta", "blue") ) + scale_linetype_manual( values = c("solid", "dotted"), labels = c("Epilepsy", "Death") ) + scale_y_continuous( limits = c(0, 0.6), breaks = seq(0, 0.6,0.05) ) + scale_x_continuous( limits = c(0, 48), breaks = seq(0, 48, 6) ) + theme_bw() #Creating table risk manually time_point <- seq(0, 48, by = 6) risk_tbl <- bd_cs %>% group_split(exposure) %>% map_dfr(function(group_df) { tibble( exposure = unique(group_df$exposure), !!!set_names( map(time_point, function(tp) sum(group_df$futime_month >= tp)), paste0("number.at.risk_", time_point) ), !!!set_names( map(time_point, function(tp) sum(group_df$outcome == "2" & group_df$futime_month <= tp)), paste0("death_", time_point) ), !!!set_names( map(time_point, function(tp) sum(group_df$outcome == "1" & group_df$futime_month <= tp)), paste0("epilepsy_", time_point) ) ) }) risk_tbl_long <- risk_tbl %>% pivot_longer( cols = c(-exposure), names_to = c("data", "time"), names_sep = "_", values_to = "values" ) # 预处理风险表数据 risk_tbl_long$time <- as.numeric(risk_tbl_long$time) risk_tbl_long <- risk_tbl_long %>% mutate( exposure = factor(exposure, levels = c("unexposed", "ZIKV_EXP", "CZS"), labels = c("Unexposed", "ZIKV exposed", "CZS")), data = factor(data, levels = c("number.at.risk", "death", "epilepsy"), labels = c("Number at risk", "Cumulative Death", "Cumulative Epilepsy")) ) # 绘制风险表 risk_plot <- ggplot(risk_tbl_long, aes(x = time, y = fct_rev(exposure), label = values)) + geom_text(size = 3.5, color = "black") + facet_grid(data ~ ., scales = "free_y", space = "free_y") + scale_x_continuous(limits = c(0, 48), breaks = seq(0, 48, 6)) + theme_bw() + theme( axis.text.y = element_text(size = 10), axis.title.y = element_blank(), axis.title.x = element_text(size = 10, margin = margin(t = 10)), axis.text.x = element_blank(), axis.ticks.x = element_blank(), strip.text.y = element_text(angle = 0, hjust = 0, size = 10), panel.grid = element_blank(), panel.border = element_blank(), strip.background = element_rect(fill = "lightgray") ) + xlab("Age (months)") # 调整CIF图并拼接 cif_plot <- cif_plot + theme(axis.title.x = element_blank()) combined_plot <- cif_plot + risk_plot + plot_layout(ncol = 1, heights = c(3, 1)) print(combined_plot)
关键说明
patchwork包确保两个图形的x轴完全对齐,无需手动调整坐标- 风险表通过
facet_grid实现按数据类型分栏,模拟传统生存分析图的风险表样式 - 调整主题元素移除冗余网格线、边框,让风险表更简洁清晰
内容的提问来源于stack exchange,提问作者ted




