Impute gene expressions for seqFISH data from Stereo-seq data

[1]:
import pandas as pd
import numpy as np
import scanpy as sc
import anndata as ad
import os

from sklearn.neighbors import NearestNeighbors

Load results

[2]:
res_path = "Results/INSPIRE_diff_tech_embryo"
adata_full = sc.read_h5ad(res_path + "/adata_inspire.h5ad")

Gene imputation

[3]:
ad_0 = adata_full[adata_full.obs.slice.values.astype(str) == "0", :] # seqfish
z_0 = ad_0.obsm["latent"]
ad_1 = adata_full[adata_full.obs.slice.values.astype(str) == "1", :] # stereo-seq
z_1 = ad_1.obsm["latent"]

neigh = NearestNeighbors(n_neighbors=1)
neigh.fit(z_1)
nn_idx = neigh.kneighbors(z_0, 1, return_distance=False).reshape(-1)
[4]:
print("Load Stereo-seq data...")
data_dir = "data/Stereoseq_mouse_embryo"
adata_stereoseq = sc.read_h5ad(os.path.join(data_dir, "E9.5_E1S1.MOSTA.h5ad"))
adata_stereoseq.X = adata_stereoseq.layers['count']
adata_stereoseq.var_names_make_unique()

adata_1 = adata_stereoseq.copy()
adata_1.obs.index = adata_1.obs.index + "-1"
adata_1 = adata_1[ad_1.obs.index, :]

print("Load seqFISH data...")

data_dir = "data/seqFISH_mouse_embryo"
counts = pd.read_csv(data_dir+"/counts.csv", index_col=0)
metadata = pd.read_csv(data_dir+"/metadata.csv", index_col=0)
metadata = metadata.loc[counts.index, :]
adata_seqfish = ad.AnnData(np.array(counts.values))
adata_seqfish.var.index = counts.columns
adata_seqfish.obs = metadata
adata_seqfish = adata_seqfish[adata_seqfish.obs["embryo"] == "embryo2", ]
adata_seqfish = adata_seqfish[adata_seqfish.obs["celltype_mapped_refined"] != "Low quality", ]
adata_seqfish.obsm["spatial"] = np.array(adata_seqfish.obs[["x_global", "y_global"]])
adata_seqfish.var_names_make_unique()

adata_0 = adata_seqfish.copy()
adata_0.obs.index = adata_0.obs.index + "-0"
adata_0 = adata_0[ad_0.obs.index, :]
Load Stereo-seq data...
Load seqFISH data...
[5]:
adata_1_unique = adata_1[:, ~adata_1.var.index.isin(adata_0.var.index)].copy()
adata_1_unique.var_names_make_unique()
hvg_num = 2000
sc.pp.highly_variable_genes(adata_1_unique, flavor='seurat_v3', n_top_genes=hvg_num)
hvg = adata_1_unique.var[adata_1_unique.var.highly_variable == True].sort_values(by="highly_variable_rank").index
hvg = sorted(list(hvg))
[6]:
gene_impu = np.zeros((adata_0.shape[0], hvg_num))
for i, gene in enumerate(hvg):
    if i % 100 == 0:
        print(i)
    gene_val = adata_1[:, [gene]].X.toarray().reshape(-1)[nn_idx]
    gene_impu[:, i] = gene_val
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
[7]:
adata_seqfish_imputed = ad.AnnData(gene_impu)
adata_seqfish_imputed.var.index = hvg
adata_seqfish_imputed.obs.index = ad_0.obs.index
[8]:
res_path = "Results/INSPIRE_diff_tech_embryo"
adata_seqfish_imputed.write(res_path + "/adata_seqfish_imputed.h5ad")
[ ]: