You need to enable JavaScript to run this app.
最新活动
大模型
产品
解决方案
定价
生态与合作
支持与服务
开发者
了解我们

如何在Python中为PCoA图绘制置信椭圆?

Python实现PCoA散点图添加分类置信椭圆

我有分为1、2、3三类的生物样本,已基于样本中部分基因相对丰度的Bray-Curtis距离完成PCoA分析并绘制散点图。

原使用脚本

import numpy as np
import pandas as pd
from scipy.spatial import distance
from skbio.stats.ordination import pcoa, pca
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_excel("relative_abundances.xlsx")
dfnumpy = df.to_numpy()
dfdistances=distance.pdist(dfnumpy, 'braycurtis') #bray-curtis距离矩阵
pcoa_results = pcoa(dfdistances, dimensions=2) #PCoA分析
coordinates = pcoa_results.samples #PCoA坐标
df_pcoa = coordinates[["PC1", "PC2"]]

indexdf = pd.read_excel("index.xlsx") #为每个样本分配类型
df_merged = pd.concat([df_pcoa.reset_index(drop=True), indexdf.reset_index(drop=True)], axis=1) #合并坐标与类型数据

sns.scatterplot(data=df_merged,x="PC1",y="PC2",hue="Type") #绘制PCoA散点图
plt.title("Figure 1: Scatter Plot", 
          fontsize=16)
plt.xlabel('PcoA 1', 
           fontsize=16)
plt.ylabel('PcoA 2', 
           fontsize=16)
plt.show(block=True)

需求说明

我使用relative_abundances.csvindex.csv数据生成了基础散点图,现在希望给每类样本添加一个置信椭圆(类似带分类置信椭圆的PCoA示例图)。我找到了matplotlib的置信椭圆文档,但无法和现有散点图整合,有人说Python无法实现,推荐用R的ggplot2,现寻求Python中的可行方案。


解决方案

方法1:整合matplotlib官方置信椭圆函数

直接复用matplotlib官方的置信椭圆计算逻辑,在散点图基础上遍历每个类别绘制椭圆:

import numpy as np
import pandas as pd
from scipy.spatial import distance
from skbio.stats.ordination import pcoa
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms

# 定义置信椭圆生成函数
def confidence_ellipse(x, y, ax, n_std=1.96, facecolor='none', **kwargs):
    if x.size != y.size:
        raise ValueError("x和y维度必须一致")
    # 计算协方差矩阵与相关系数
    cov = np.cov(x, y)
    pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
    # 椭圆参数计算
    ell_radius_x = np.sqrt(1 + pearson)
    ell_radius_y = np.sqrt(1 - pearson)
    ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
                      facecolor=facecolor, **kwargs)
    # 坐标系转换(匹配样本均值与标准差)
    scale_x = np.sqrt(cov[0, 0]) * n_std
    mean_x = np.mean(x)
    scale_y = np.sqrt(cov[1, 1]) * n_std
    mean_y = np.mean(y)
    transf = transforms.Affine2D() \
        .rotate_deg(45) \
        .scale(scale_x, scale_y) \
        .translate(mean_x, mean_y)
    ellipse.set_transform(transf + ax.transData)
    return ax.add_patch(ellipse)

# 原PCoA分析流程不变
df = pd.read_excel("relative_abundances.xlsx")
dfnumpy = df.to_numpy()
dfdistances=distance.pdist(dfnumpy, 'braycurtis')
pcoa_results = pcoa(dfdistances, dimensions=2)
coordinates = pcoa_results.samples
df_pcoa = coordinates[["PC1", "PC2"]]

indexdf = pd.read_excel("index.xlsx")
df_merged = pd.concat([df_pcoa.reset_index(drop=True), indexdf.reset_index(drop=True)], axis=1)

# 绘制散点图
fig, ax = plt.subplots(figsize=(8, 6))
sns.scatterplot(data=df_merged, x="PC1", y="PC2", hue="Type", s=60, ax=ax)

# 遍历每个类别绘制95%置信椭圆
unique_types = df_merged["Type"].unique()
colors = sns.color_palette(n_colors=len(unique_types))
for type_label, color in zip(unique_types, colors):
    subset = df_merged[df_merged["Type"] == type_label]
    confidence_ellipse(subset["PC1"], subset["PC2"], ax, 
                       n_std=1.96, edgecolor=color, linewidth=2)

# 设置图属性
plt.title("PCoA图(带95%置信椭圆)", fontsize=16)
plt.xlabel('PCoA 1', fontsize=16)
plt.ylabel('PCoA 2', fontsize=16)
plt.legend(title="样本类型")
plt.show(block=True)

方法2:使用Seaborn简化实现

利用Seaborn的lmplot结合回归拟合功能,快速生成带置信椭圆的图:

import numpy as np
import pandas as pd
from scipy.spatial import distance
from skbio.stats.ordination import pcoa
import matplotlib.pyplot as plt
import seaborn as sns

# 原PCoA分析流程不变
df = pd.read_excel("relative_abundances.xlsx")
dfnumpy = df.to_numpy()
dfdistances=distance.pdist(dfnumpy, 'braycurtis')
pcoa_results = pcoa(dfdistances, dimensions=2)
coordinates = pcoa_results.samples
df_pcoa = coordinates[["PC1", "PC2"]]

indexdf = pd.read_excel("index.xlsx")
df_merged = pd.concat([df_pcoa.reset_index(drop=True), indexdf.reset_index(drop=True)], axis=1)

# 绘制带置信椭圆的散点图
g = sns.lmplot(data=df_merged, x="PC1", y="PC2", hue="Type", 
               fit_reg=False, scatter_kws={"s": 60})

# 为每个类别添加置信椭圆
for label, color in zip(df_merged["Type"].unique(), sns.color_palette()):
    subset = df_merged[df_merged["Type"] == label]
    sns.regplot(data=subset, x="PC1", y="PC2", 
                scatter=False, ci=95, color=color,
                line_kws={"linestyle": ""})

# 设置图属性
plt.title("PCoA图(带95%置信椭圆)", fontsize=16)
plt.xlabel('PCoA 1', fontsize=16)
plt.ylabel('PCoA 2', fontsize=16)
plt.show(block=True)

说明

  • 两种方法默认使用95%置信水平(n_std=1.96),若需调整为99%,可将参数改为n_std=2.58
  • 方法1更灵活,支持自定义椭圆填充色、线条样式等;方法2代码更简洁,适合快速实现。

内容的提问来源于stack exchange,提问作者Lina

火山引擎 最新活动