{ "cells": [ { "cell_type": "markdown", "id": "0bdd16ae-cc8a-4432-be05-5c7fd9a47678", "metadata": {}, "source": [ "## Calculate benchmarking scores" ] }, { "cell_type": "code", "execution_count": 1, "id": "7d76cff5-e6eb-4b71-8bd1-ea2388e37906", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scanpy as sc\n", "import anndata as ad\n", "\n", "import INSPIRE\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "id": "595f6f33-ae19-4854-afe3-8ee242fd5487", "metadata": {}, "source": [ "### Load results" ] }, { "cell_type": "code", "execution_count": 2, "id": "f76109bf-4dc3-44aa-a9bc-5d44494f7edf", "metadata": {}, "outputs": [], "source": [ "res_dir = \"Results/INSPIRE_DLPFC\"\n", "adata = sc.read_h5ad(res_dir + \"/adata_inspire.h5ad\")\n", "basis_df = pd.read_csv(res_dir + \"/basis_df_inspire.csv\", index_col=0)\n", "basis = np.array(basis_df.values)" ] }, { "cell_type": "code", "execution_count": 3, "id": "d9cea66b-df98-4abb-b13a-f871f6282d6c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 14243 × 13988\n", " obs: 'in_tissue', 'array_row', 'array_col', 'barcode', 'slice_id', 'layer', 'n_genes', 'slice', 'Proportion of spatial factor 1', 'Proportion of spatial factor 2', 'Proportion of spatial factor 3', 'Proportion of spatial factor 4', 'Proportion of spatial factor 5', 'Proportion of spatial factor 6', 'Proportion of spatial factor 7', 'Proportion of spatial factor 8', 'Proportion of spatial factor 9', 'Proportion of spatial factor 10', 'Proportion of spatial factor 11', 'Proportion of spatial factor 12', 'Proportion of spatial factor 13', 'Proportion of spatial factor 14', 'Proportion of spatial factor 15', 'Proportion of spatial factor 16', 'Proportion of spatial factor 17', 'Proportion of spatial factor 18', 'Proportion of spatial factor 19', 'Proportion of spatial factor 20', 'GM'\n", " uns: 'layer_colors', 'slice_colors'\n", " obsm: 'X_umap', 'latent', 'spatial'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata" ] }, { "cell_type": "markdown", "id": "cd9d645d-0315-4fab-b807-b7a8566a16ab", "metadata": {}, "source": [ "### Calculate ASW, ARI, and NMI" ] }, { "cell_type": "code", "execution_count": 4, "id": "e3ea1b1d-dfd0-4da7-9176-1acb2fb04cfa", "metadata": {}, "outputs": [], "source": [ "n_slices = len(set(adata.obs.slice.values))" ] }, { "cell_type": "code", "execution_count": 5, "id": "56e009c7-3413-4237-9bcb-6e1a57db43da", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "slice 0 ASW = 0.21675144\n", "slice 1 ASW = 0.23563725\n", "slice 2 ASW = 0.20992284\n", "slice 3 ASW = 0.21229379\n" ] } ], "source": [ "# ASW\n", "for i in range(n_slices):\n", " adata_tmp = adata[adata.obs.slice.values.astype(str) == str(i), :]\n", " ASW = INSPIRE.utils.calculate_ASW(adata_tmp.obsm[\"latent\"], adata_tmp.obs[\"layer\"].astype(str))\n", " print(\"slice\", str(i), \"ASW =\", ASW)" ] }, { "cell_type": "code", "execution_count": 6, "id": "06b33a32-9782-4837-b38f-14bd5fb4a761", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "slice 0 ARI = 0.5760796384927773\n", "slice 1 ARI = 0.6430556022130648\n", "slice 2 ARI = 0.6137699726059762\n", "slice 3 ARI = 0.6139559439969932\n" ] } ], "source": [ "# ARI\n", "for i in range(n_slices):\n", " adata_tmp = adata[adata.obs.slice.values.astype(str) == str(i), :]\n", " ARI = INSPIRE.utils.calculate_ARI(adata_tmp.obs[\"layer\"].astype(str), adata_tmp.obs[\"GM\"])\n", " print(\"slice\", str(i), \"ARI =\", ARI)" ] }, { "cell_type": "code", "execution_count": 7, "id": "fde24d2e-5bee-4a8e-96c7-72f9cff8c97a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "slice 0 NMI = 0.7143137356477687\n", "slice 1 NMI = 0.7319254540318172\n", "slice 2 NMI = 0.72224978876709\n", "slice 3 NMI = 0.712963408054093\n" ] } ], "source": [ "# NMI\n", "for i in range(n_slices):\n", " adata_tmp = adata[adata.obs.slice.values.astype(str) == str(i), :]\n", " NMI = INSPIRE.utils.calculate_NMI(adata_tmp.obs[\"layer\"].astype(str), adata_tmp.obs[\"GM\"])\n", " print(\"slice\", str(i), \"NMI =\", NMI)" ] }, { "cell_type": "markdown", "id": "68c201cc-3da0-46e2-8c20-2fe737fff363", "metadata": {}, "source": [ "### Calculate factor diversity and factor coherence" ] }, { "cell_type": "code", "execution_count": 8, "id": "0cc03188-d08f-4c52-b194-a7d798a7befb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "factor_diversity = 0.525\n" ] } ], "source": [ "# factor diversity\n", "factor_diversity = INSPIRE.utils.calculate_factor_diversity(basis, n_top_genes=10)\n", "print(\"factor_diversity =\", factor_diversity)" ] }, { "cell_type": "code", "execution_count": 9, "id": "5fe04eb3-ef95-4872-bf6b-762d2d336475", "metadata": {}, "outputs": [], "source": [ "# get count matrix\n", "data_dir = \"data/DLPFC/spatialLIBD\"\n", "adata_st_list = []\n", "\n", "slice_idx = 151673\n", "adata_st = sc.read_visium(path=data_dir+\"/%d\" % slice_idx,\n", " count_file=\"%d_filtered_feature_bc_matrix.h5\" % slice_idx)\n", "adata_st.obs.index = adata_st.obs.index + \"-0\"\n", "adata_st.var_names_make_unique()\n", "adata_st_list.append(adata_st)\n", "\n", "slice_idx = 151674\n", "adata_st = sc.read_visium(path=data_dir+\"/%d\" % slice_idx,\n", " count_file=\"%d_filtered_feature_bc_matrix.h5\" % slice_idx)\n", "adata_st.obs.index = adata_st.obs.index + \"-1\"\n", "adata_st.var_names_make_unique()\n", "adata_st_list.append(adata_st)\n", "\n", "slice_idx = 151675\n", "adata_st = sc.read_visium(path=data_dir+\"/%d\" % slice_idx,\n", " count_file=\"%d_filtered_feature_bc_matrix.h5\" % slice_idx)\n", "adata_st.obs.index = adata_st.obs.index + \"-2\"\n", "adata_st.var_names_make_unique()\n", "adata_st_list.append(adata_st)\n", "\n", "slice_idx = 151676\n", "adata_st = sc.read_visium(path=data_dir+\"/%d\" % slice_idx,\n", " count_file=\"%d_filtered_feature_bc_matrix.h5\" % slice_idx)\n", "adata_st.obs.index = adata_st.obs.index + \"-3\"\n", "adata_st.var_names_make_unique()\n", "adata_st_list.append(adata_st)\n", "\n", "adata_raw = ad.concat(adata_st_list)\n", "adata_raw = adata_raw[adata.obs.index, :]\n", "adata_raw = adata_raw[:, basis_df.columns]" ] }, { "cell_type": "code", "execution_count": 10, "id": "6b430e5c-4f78-4521-941a-4706bf4067b6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "factor_coherence = 0.11904762806481868\n" ] } ], "source": [ "# factor coherence\n", "factor_coherence = INSPIRE.utils.calculate_factor_coherence(basis, n_top_genes=10, gene_counts=np.array(adata_raw.X.todense()))\n", "print(\"factor_coherence =\", factor_coherence)" ] }, { "cell_type": "code", "execution_count": null, "id": "52a6569c-cc5a-4d15-8f87-40fff4b85bfe", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15" } }, "nbformat": 4, "nbformat_minor": 5 }