{ "cells": [ { "cell_type": "markdown", "id": "c4f99678-6c21-4d96-bad9-3e40230b21af", "metadata": {}, "source": [ "## Impute gene expressions for seqFISH data from Stereo-seq data" ] }, { "cell_type": "code", "execution_count": 1, "id": "894a8ce5-ee58-4de7-9038-57c63fc1117d", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scanpy as sc\n", "import anndata as ad\n", "import os\n", "\n", "from sklearn.neighbors import NearestNeighbors" ] }, { "cell_type": "markdown", "id": "fe933b37-9367-44c6-95ef-d99eba2a764e", "metadata": {}, "source": [ "### Load results" ] }, { "cell_type": "code", "execution_count": 2, "id": "4d8c0357-c407-49bd-b206-70354bf51ece", "metadata": {}, "outputs": [], "source": [ "res_path = \"Results/INSPIRE_diff_tech_embryo\"\n", "adata_full = sc.read_h5ad(res_path + \"/adata_inspire.h5ad\")" ] }, { "cell_type": "markdown", "id": "a42b2f0f-1043-40ae-a7ab-73b3c67278af", "metadata": {}, "source": [ "### Gene imputation" ] }, { "cell_type": "code", "execution_count": 3, "id": "248995e0-36b0-4d7c-bb27-0bcb4f767b6a", "metadata": {}, "outputs": [], "source": [ "ad_0 = adata_full[adata_full.obs.slice.values.astype(str) == \"0\", :] # seqfish\n", "z_0 = ad_0.obsm[\"latent\"]\n", "ad_1 = adata_full[adata_full.obs.slice.values.astype(str) == \"1\", :] # stereo-seq\n", "z_1 = ad_1.obsm[\"latent\"]\n", "\n", "neigh = NearestNeighbors(n_neighbors=1)\n", "neigh.fit(z_1)\n", "nn_idx = neigh.kneighbors(z_0, 1, return_distance=False).reshape(-1)" ] }, { "cell_type": "code", "execution_count": 4, "id": "2f41dcb7-9fd6-4d9f-84f2-f683088c5f64", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Load Stereo-seq data...\n", "Load seqFISH data...\n" ] } ], "source": [ "print(\"Load Stereo-seq data...\")\n", "data_dir = \"data/Stereoseq_mouse_embryo\"\n", "adata_stereoseq = sc.read_h5ad(os.path.join(data_dir, \"E9.5_E1S1.MOSTA.h5ad\"))\n", "adata_stereoseq.X = adata_stereoseq.layers['count']\n", "adata_stereoseq.var_names_make_unique()\n", "\n", "adata_1 = adata_stereoseq.copy()\n", "adata_1.obs.index = adata_1.obs.index + \"-1\"\n", "adata_1 = adata_1[ad_1.obs.index, :]\n", "\n", "print(\"Load seqFISH data...\")\n", "\n", "data_dir = \"data/seqFISH_mouse_embryo\"\n", "counts = pd.read_csv(data_dir+\"/counts.csv\", index_col=0)\n", "metadata = pd.read_csv(data_dir+\"/metadata.csv\", index_col=0)\n", "metadata = metadata.loc[counts.index, :]\n", "adata_seqfish = ad.AnnData(np.array(counts.values))\n", "adata_seqfish.var.index = counts.columns\n", "adata_seqfish.obs = metadata\n", "adata_seqfish = adata_seqfish[adata_seqfish.obs[\"embryo\"] == \"embryo2\", ]\n", "adata_seqfish = adata_seqfish[adata_seqfish.obs[\"celltype_mapped_refined\"] != \"Low quality\", ]\n", "adata_seqfish.obsm[\"spatial\"] = np.array(adata_seqfish.obs[[\"x_global\", \"y_global\"]])\n", "adata_seqfish.var_names_make_unique()\n", "\n", "adata_0 = adata_seqfish.copy()\n", "adata_0.obs.index = adata_0.obs.index + \"-0\"\n", "adata_0 = adata_0[ad_0.obs.index, :]" ] }, { "cell_type": "code", "execution_count": 5, "id": "e7b6d9dc-d13d-4f19-9066-4c3adccc1e52", "metadata": {}, "outputs": [], "source": [ "adata_1_unique = adata_1[:, ~adata_1.var.index.isin(adata_0.var.index)].copy()\n", "adata_1_unique.var_names_make_unique() \n", "hvg_num = 2000\n", "sc.pp.highly_variable_genes(adata_1_unique, flavor='seurat_v3', n_top_genes=hvg_num)\n", "hvg = adata_1_unique.var[adata_1_unique.var.highly_variable == True].sort_values(by=\"highly_variable_rank\").index\n", "hvg = sorted(list(hvg))" ] }, { "cell_type": "code", "execution_count": 6, "id": "aab8ef42-7960-4570-a50e-8cd58bcdbc56", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "100\n", "200\n", "300\n", "400\n", "500\n", "600\n", "700\n", "800\n", "900\n", "1000\n", "1100\n", "1200\n", "1300\n", "1400\n", "1500\n", "1600\n", "1700\n", "1800\n", "1900\n" ] } ], "source": [ "gene_impu = np.zeros((adata_0.shape[0], hvg_num))\n", "for i, gene in enumerate(hvg):\n", " if i % 100 == 0:\n", " print(i)\n", " gene_val = adata_1[:, [gene]].X.toarray().reshape(-1)[nn_idx]\n", " gene_impu[:, i] = gene_val" ] }, { "cell_type": "code", "execution_count": 7, "id": "23940cc7-0cd5-4de5-95ac-d509a3825f38", "metadata": {}, "outputs": [], "source": [ "adata_seqfish_imputed = ad.AnnData(gene_impu)\n", "adata_seqfish_imputed.var.index = hvg\n", "adata_seqfish_imputed.obs.index = ad_0.obs.index" ] }, { "cell_type": "code", "execution_count": 8, "id": "be3b8ba1-aa2d-4140-84bb-353538cc7937", "metadata": {}, "outputs": [], "source": [ "res_path = \"Results/INSPIRE_diff_tech_embryo\"\n", "adata_seqfish_imputed.write(res_path + \"/adata_seqfish_imputed.h5ad\")" ] }, { "cell_type": "code", "execution_count": null, "id": "b6da60e5-d729-43ef-9848-f7d1dcf14f6e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15" } }, "nbformat": 4, "nbformat_minor": 5 }