如何在Python中为PCoA图绘制置信椭圆?
Python实现PCoA散点图添加分类置信椭圆
我有分为1、2、3三类的生物样本,已基于样本中部分基因相对丰度的Bray-Curtis距离完成PCoA分析并绘制散点图。
原使用脚本
import numpy as np import pandas as pd from scipy.spatial import distance from skbio.stats.ordination import pcoa, pca import matplotlib.pyplot as plt import seaborn as sns df = pd.read_excel("relative_abundances.xlsx") dfnumpy = df.to_numpy() dfdistances=distance.pdist(dfnumpy, 'braycurtis') #bray-curtis距离矩阵 pcoa_results = pcoa(dfdistances, dimensions=2) #PCoA分析 coordinates = pcoa_results.samples #PCoA坐标 df_pcoa = coordinates[["PC1", "PC2"]] indexdf = pd.read_excel("index.xlsx") #为每个样本分配类型 df_merged = pd.concat([df_pcoa.reset_index(drop=True), indexdf.reset_index(drop=True)], axis=1) #合并坐标与类型数据 sns.scatterplot(data=df_merged,x="PC1",y="PC2",hue="Type") #绘制PCoA散点图 plt.title("Figure 1: Scatter Plot", fontsize=16) plt.xlabel('PcoA 1', fontsize=16) plt.ylabel('PcoA 2', fontsize=16) plt.show(block=True)
需求说明
我使用relative_abundances.csv和index.csv数据生成了基础散点图,现在希望给每类样本添加一个置信椭圆(类似带分类置信椭圆的PCoA示例图)。我找到了matplotlib的置信椭圆文档,但无法和现有散点图整合,有人说Python无法实现,推荐用R的ggplot2,现寻求Python中的可行方案。
解决方案
方法1:整合matplotlib官方置信椭圆函数
直接复用matplotlib官方的置信椭圆计算逻辑,在散点图基础上遍历每个类别绘制椭圆:
import numpy as np import pandas as pd from scipy.spatial import distance from skbio.stats.ordination import pcoa import matplotlib.pyplot as plt import seaborn as sns from matplotlib.patches import Ellipse import matplotlib.transforms as transforms # 定义置信椭圆生成函数 def confidence_ellipse(x, y, ax, n_std=1.96, facecolor='none', **kwargs): if x.size != y.size: raise ValueError("x和y维度必须一致") # 计算协方差矩阵与相关系数 cov = np.cov(x, y) pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) # 椭圆参数计算 ell_radius_x = np.sqrt(1 + pearson) ell_radius_y = np.sqrt(1 - pearson) ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, facecolor=facecolor, **kwargs) # 坐标系转换(匹配样本均值与标准差) scale_x = np.sqrt(cov[0, 0]) * n_std mean_x = np.mean(x) scale_y = np.sqrt(cov[1, 1]) * n_std mean_y = np.mean(y) transf = transforms.Affine2D() \ .rotate_deg(45) \ .scale(scale_x, scale_y) \ .translate(mean_x, mean_y) ellipse.set_transform(transf + ax.transData) return ax.add_patch(ellipse) # 原PCoA分析流程不变 df = pd.read_excel("relative_abundances.xlsx") dfnumpy = df.to_numpy() dfdistances=distance.pdist(dfnumpy, 'braycurtis') pcoa_results = pcoa(dfdistances, dimensions=2) coordinates = pcoa_results.samples df_pcoa = coordinates[["PC1", "PC2"]] indexdf = pd.read_excel("index.xlsx") df_merged = pd.concat([df_pcoa.reset_index(drop=True), indexdf.reset_index(drop=True)], axis=1) # 绘制散点图 fig, ax = plt.subplots(figsize=(8, 6)) sns.scatterplot(data=df_merged, x="PC1", y="PC2", hue="Type", s=60, ax=ax) # 遍历每个类别绘制95%置信椭圆 unique_types = df_merged["Type"].unique() colors = sns.color_palette(n_colors=len(unique_types)) for type_label, color in zip(unique_types, colors): subset = df_merged[df_merged["Type"] == type_label] confidence_ellipse(subset["PC1"], subset["PC2"], ax, n_std=1.96, edgecolor=color, linewidth=2) # 设置图属性 plt.title("PCoA图(带95%置信椭圆)", fontsize=16) plt.xlabel('PCoA 1', fontsize=16) plt.ylabel('PCoA 2', fontsize=16) plt.legend(title="样本类型") plt.show(block=True)
方法2:使用Seaborn简化实现
利用Seaborn的lmplot结合回归拟合功能,快速生成带置信椭圆的图:
import numpy as np import pandas as pd from scipy.spatial import distance from skbio.stats.ordination import pcoa import matplotlib.pyplot as plt import seaborn as sns # 原PCoA分析流程不变 df = pd.read_excel("relative_abundances.xlsx") dfnumpy = df.to_numpy() dfdistances=distance.pdist(dfnumpy, 'braycurtis') pcoa_results = pcoa(dfdistances, dimensions=2) coordinates = pcoa_results.samples df_pcoa = coordinates[["PC1", "PC2"]] indexdf = pd.read_excel("index.xlsx") df_merged = pd.concat([df_pcoa.reset_index(drop=True), indexdf.reset_index(drop=True)], axis=1) # 绘制带置信椭圆的散点图 g = sns.lmplot(data=df_merged, x="PC1", y="PC2", hue="Type", fit_reg=False, scatter_kws={"s": 60}) # 为每个类别添加置信椭圆 for label, color in zip(df_merged["Type"].unique(), sns.color_palette()): subset = df_merged[df_merged["Type"] == label] sns.regplot(data=subset, x="PC1", y="PC2", scatter=False, ci=95, color=color, line_kws={"linestyle": ""}) # 设置图属性 plt.title("PCoA图(带95%置信椭圆)", fontsize=16) plt.xlabel('PCoA 1', fontsize=16) plt.ylabel('PCoA 2', fontsize=16) plt.show(block=True)
说明
- 两种方法默认使用95%置信水平(
n_std=1.96),若需调整为99%,可将参数改为n_std=2.58; - 方法1更灵活,支持自定义椭圆填充色、线条样式等;方法2代码更简洁,适合快速实现。
内容的提问来源于stack exchange,提问作者Lina




