Analysis of spatial factors corresponding to myoepithelial subpopulations

[1]:
import pandas as pd
import numpy as np
import scanpy as sc
import anndata as ad
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
import os
import sys

import warnings
warnings.filterwarnings("ignore")

Load results

[2]:
res_path = "/gpfs/gibbs/pi/zhao/jz874/project/jiazhao/inspire_revision/tutorials/new_examples/human_breast_cancer_xenium"
adata = sc.read_h5ad(res_path + "/adata_inspire.h5ad")
basis_df = pd.read_csv(res_path + "/basis_df_inspire.csv", index_col=0)
adata.obsm["factors"] = np.array(adata.obs[["Proportion of spatial factor "+str(j+1) for j in range(50)]].values)

Visualization of gene signatures

[3]:
topic_id = 30

topic_profile = np.array(basis_df.iloc[topic_id,:].values)
order = np.argsort(-topic_profile)
print(basis_df.columns[order])

potential_marker = list(basis_df.columns[order[:50]])
marker_ours = []
for i in range(len(potential_marker)):
    id_1 = np.argsort(-basis_df[potential_marker[i]])[0]
    id_2 = np.argsort(-basis_df[potential_marker[i]])[1]
    val_1 = basis_df[potential_marker[i]][id_1]
    val_2 = basis_df[potential_marker[i]][id_2]
    if (id_1 == topic_id) & (val_1 >= (val_2*1.5)):
        marker_ours.append(potential_marker[i])
print(marker_ours)
Index(['KRT15', 'SFRP1', 'KRT23', 'PTN', 'TACSTD2', 'KRT7', 'PIGR', 'ALDH1A3',
       'OPRPN', 'KIT',
       ...
       'CD93', 'SFRP4', 'PTPRC', 'ADH1B', 'LYZ', 'IL7R', 'VWF', 'CXCL12',
       'PECAM1', 'POSTN'],
      dtype='object', length=313)
['KRT15', 'SFRP1', 'KRT23', 'PIGR', 'OPRPN', 'ELF5', 'S100A8']
[4]:
topic_id = 34

topic_profile = np.array(basis_df.iloc[topic_id,:].values)
order = np.argsort(-topic_profile)
print(basis_df.columns[order])

potential_marker = list(basis_df.columns[order[:50]])
marker_ours = []
for i in range(len(potential_marker)):
    id_1 = np.argsort(-basis_df[potential_marker[i]])[0]
    id_2 = np.argsort(-basis_df[potential_marker[i]])[1]
    val_1 = basis_df[potential_marker[i]][id_1]
    val_2 = basis_df[potential_marker[i]][id_2]
    if (id_1 == topic_id) & (val_1 >= (val_2*1.5)):
        marker_ours.append(potential_marker[i])
print(marker_ours)
Index(['KRT6B', 'KRT5', 'KRT16', 'KRT14', 'DSP', 'SERPINA3', 'C5orf46', 'DSC2',
       'MYLK', 'EGFR',
       ...
       'CD8A', 'CD93', 'PTPRC', 'SFRP4', 'LYZ', 'CXCL12', 'ADH1B', 'VWF',
       'IL7R', 'PECAM1'],
      dtype='object', length=313)
['KRT6B', 'KRT16', 'C5orf46', 'DSC2', 'FOXC2', 'CLCA2']
[5]:
topic_id = 37

topic_profile = np.array(basis_df.iloc[topic_id,:].values)
order = np.argsort(-topic_profile)
print(basis_df.columns[order])

potential_marker = list(basis_df.columns[order[:50]])
marker_ours = []
for i in range(len(potential_marker)):
    id_1 = np.argsort(-basis_df[potential_marker[i]])[0]
    id_2 = np.argsort(-basis_df[potential_marker[i]])[1]
    val_1 = basis_df[potential_marker[i]][id_1]
    val_2 = basis_df[potential_marker[i]][id_2]
    if (id_1 == topic_id) & (val_1 >= (val_2*1.5)):
        marker_ours.append(potential_marker[i])
print(marker_ours)
Index(['MYLK', 'KRT14', 'MYH11', 'KRT5', 'OXTR', 'DST', 'ACTA2', 'ACTG2',
       'SFRP1', 'PTN',
       ...
       'CD3E', 'CXCR4', 'PTPRC', 'CXCL12', 'VWF', 'SFRP4', 'ADH1B', 'LYZ',
       'IL7R', 'PECAM1'],
      dtype='object', length=313)
['KRT14', 'OXTR', 'DST', 'ACTG2', 'PGR']
[6]:
gene_list = ['KRT15', 'KRT23', 'SFRP1', 'ALDH1A3'] + ['KRT16', 'KRT6B', 'DSC2', 'C5orf46', 'FOXC2'] + ['ACTA2', 'OXTR','ACTG2', 'PGR']


n_factors = 3
gene_set = gene_list
profile = basis_df.iloc[[30,34,37], :]
profile = profile[gene_set]
factor_names = ["Factor: Myoepi 1", "Factor: Myoepi 2", "Factor: Myoepi 3"]

f = plt.figure(figsize=(4.5,1.1))
ax = f.add_subplot(111)
# ax.set_ylabel('Factor', fontsize=14)
im = ax.imshow(profile, cmap='viridis', interpolation='nearest', aspect='auto')
plt.yticks(np.arange(n_factors), factor_names, rotation=0, fontsize=14)
plt.xticks(np.arange(len(gene_set)), gene_set, rotation=90, fontsize=14, style="italic")
# plt.title("Gene signature of factors", fontsize=15)
plt.vlines(x=np.arange(len(gene_set))-0.5, ymin=-0.5, ymax=n_factors-0.5, color="gray", linewidth=1.5, alpha=0.2)
plt.hlines(y=np.arange(n_factors)-0.5, xmin=-0.5, xmax=len(gene_set)-0.5, color="gray", linewidth=1.5, alpha=0.2)
plt.vlines(x=-0.5, ymin=-0.5, ymax=n_factors-0.5, color="k", linewidth=2, alpha=1)
plt.vlines(x=len(gene_set)-0.5, ymin=-0.5, ymax=n_factors-0.5, color="k", linewidth=2, alpha=1)
plt.hlines(y=-0.5, xmin=-0.5, xmax=len(gene_set)-0.5, color="k", linewidth=2, alpha=1)
plt.hlines(y=n_factors-0.5, xmin=-0.5, xmax=len(gene_set)-0.5, color="k", linewidth=2, alpha=1)

plt.show()
../../_images/tutorials_human_breast_cancer_xenium_human_breast_cancer_myo_factors_8_0.png

Correlation analysis of spatial factors and their associated gene expression profiles

[9]:
factor_myoepi = np.array(adata.obs[["Proportion of spatial factor 31", "Proportion of spatial factor 35", "Proportion of spatial factor 38"]].values)
corr_factor = np.corrcoef(factor_myoepi.T)

import seaborn as sns

plt.figure(figsize = (2*0.9,1.6*0.9))
sns.heatmap(corr_factor, vmin=0., vmax=1.)
plt.yticks(ticks=[0.5,1.5,2.5], labels=["Factor: Myoepi 1", "Factor: Myoepi 2", "Factor: Myoepi 3"], rotation=0, fontsize=12)
plt.xticks(ticks=[0.5,1.5,2.5], labels=["", "", ""])

plt.show()
../../_images/tutorials_human_breast_cancer_xenium_human_breast_cancer_myo_factors_13_0.png
[10]:
import seaborn as sns
corr_loading = basis_df.iloc[[30,34,37], :].T.corr()

import seaborn as sns

plt.figure(figsize = (2*0.9,1.6*0.9))
sns.heatmap(corr_loading, vmin=0., vmax=1., cmap="viridis")
plt.yticks(ticks=[0.5,1.5,2.5], labels=["Loadng for myoepi 1", "Loadng for myoepi 2", "Loadng for myoepi 3"], rotation=0, fontsize=12)
plt.xticks(ticks=[0.5,1.5,2.5], labels=["", "", ""])

plt.show()
../../_images/tutorials_human_breast_cancer_xenium_human_breast_cancer_myo_factors_14_0.png