{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e2e5d533",
   "metadata": {},
   "source": [
    "# ComSeg on large scale data with SOPA / SpatialData\n",
    "\n",
    "#### ComSeg is now integrated in SOPA : https://gustaveroussy.github.io/sopa/tutorials/comseg/.  This tutorial is depreciated\n",
    "\n",
    "To ease the application of ComSeg on large datasets, ComSeg can be used with SOPA: https://gustaveroussy.github.io/sopa/  \n",
    "Sopa is build on top of Spatial data\n",
    "\n",
    "The following example is a modified version of the official SOPA tutorial : \n",
    "https://gustaveroussy.github.io/sopa/tutorials/api_usage/  \n",
    "and done with the version 1.0.14 of SOPA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "4e462b82",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import sopa.segmentation\n",
    "import sopa.io\n",
    "from matplotlib import pyplot as plt\n",
    "from tqdm import tqdm\n",
    "from sopa._sdata import to_intrinsic\n",
    "import sopa\n",
    "from spatialdata import SpatialData, read_zarr\n",
    "from spatialdata.models import PointsModel\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8cdcef05",
   "metadata": {},
   "source": [
    "### Load example data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "29e12379",
   "metadata": {},
   "outputs": [],
   "source": [
    "sdata = sopa.io.uniform()\n",
    "image_key = \"image\"\n",
    "points_key = \"transcripts\"\n",
    "gene_column = \"genes\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a2170a04",
   "metadata": {},
   "source": [
    "### Segmentation of nuclei with Cellpose \n",
    "The nucleus segmentation will be used as prior by ComSeg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0a10bcf7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "patches = sopa.segmentation.Patches2D(sdata, image_key, patch_width=1200, patch_overlap=50)\n",
    "patches.write()\n",
    "from sopa._sdata import get_spatial_image\n",
    "print(get_spatial_image(sdata, image_key).c.values)\n",
    "channels = [\"DAPI\"]\n",
    "method = sopa.segmentation.methods.cellpose_patch(diameter=35, channels=channels, flow_threshold=2, cellprob_threshold=-6)\n",
    "segmentation = sopa.segmentation.StainingSegmentation(sdata, method, channels, min_area=2500)\n",
    "\n",
    "\n",
    "# The cellpose boundaries will be temporary saved here. You can choose a different path\n",
    "cellpose_temp_dir = \"tuto.zarr/.sopa_cache/cellpose\"\n",
    "segmentation.write_patches_cells(cellpose_temp_dir)\n",
    "\n",
    "cells = sopa.segmentation.StainingSegmentation.read_patches_cells(cellpose_temp_dir)\n",
    "cells = sopa.segmentation.shapes.solve_conflicts(cells)\n",
    "shapes_key = \"cellpose_boundaries\" # name of the key given to the cells in sdata.shapes\n",
    "sopa.segmentation.StainingSegmentation.add_shapes(sdata, cells, image_key, shapes_key)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d9986b01",
   "metadata": {},
   "source": [
    "### Compute the patches for ComSeg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1a69ac14",
   "metadata": {},
   "outputs": [],
   "source": [
    "image_key = \"image\"\n",
    "points_key = \"transcripts\" # (ignore this for multiplex imaging)\n",
    "gene_column = \"genes\" # (option\n",
    "config_comseg = {}\n",
    "baysor_temp_dir = \"tuto.zarr/.sopa_cache/comseg\"\n",
    "\n",
    "patches = sopa.segmentation.Patches2D(sdata, points_key, patch_width=200, patch_overlap=50)\n",
    "valid_indices = patches.patchify_transcripts(baysor_temp_dir, config=config_comseg, use_prior=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "362286f5",
   "metadata": {},
   "source": [
    "### Compute centroid for ComSeg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "52f3d7dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_centroids_to_sdata(sdata,\n",
    "                           points_key=\"transcripts\",\n",
    "                           shapes_key='cellpose_boundaries', z_constant=None):\n",
    "    centroid = sdata[shapes_key].geometry.centroid\n",
    "    x_centroid = list(centroid.geometry.x)\n",
    "    y_centroid = list(centroid.geometry.y)\n",
    "    if z_constant is not None:\n",
    "        z_centroid = [z_constant] * len(y_centroid)\n",
    "        coords = pd.DataFrame({\"x\": x_centroid, \"y\": y_centroid, \"z\": z_centroid})\n",
    "\n",
    "    else:\n",
    "        if \"z\" in sdata[points_key].columns:\n",
    "            z = list(sdata[points_key].z.unique().compute())\n",
    "            assert len(z)==1, \"3D point cloud with 2D segmentation, manually set z_constant\"\n",
    "            z_centroid = [z[0]] * len(y_centroid)\n",
    "            coords = pd.DataFrame({\"x\": x_centroid, \"y\": y_centroid, \"z\": z_centroid})\n",
    "        else:\n",
    "            coords = pd.DataFrame({\"x\": x_centroid, \"y\": y_centroid})\n",
    "    points = PointsModel.parse(coords)\n",
    "    sdata['centroid'] = points\n",
    "    sdata['centroid'] = to_intrinsic(sdata, sdata['centroid'], points_key)\n",
    "    return sdata\n",
    "\n",
    "sdata_centroid = SpatialData()\n",
    "sdata_centroid['cellpose_boundaries'] = sdata['cellpose_boundaries']\n",
    "sdata_centroid['transcripts'] = sdata['transcripts']\n",
    "sdata_centroid = add_centroids_to_sdata(sdata_centroid,\n",
    "                           points_key=\"transcripts\",\n",
    "                           shapes_key='cellpose_boundaries',\n",
    "                               z_constant=1)\n",
    "\n",
    "\n",
    "baysor_temp_dir = \"tuto.zarr/.sopa_cache/comseg_centroid\"\n",
    "config_comseg ={}\n",
    "points_key = \"centroid\"\n",
    "patches = sopa.segmentation.Patches2D(sdata_centroid, points_key, patch_width=200, patch_overlap=50)\n",
    "valid_indices = patches.patchify_transcripts(baysor_temp_dir, config=config_comseg, use_prior=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "359e3b8d",
   "metadata": {},
   "source": [
    "### Run ComSeg on each patch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "e4c5ea4d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import comseg\n",
    "from comseg import dataset as ds\n",
    "from comseg import dictionary\n",
    "from comseg import model\n",
    "import json\n",
    "\n",
    "#### HYPERPARAMETER ####\n",
    "MEAN_CELL_DIAMETER = 15  # in micrometer\n",
    "MAX_CELL_RADIUS = 50  # in micrometer\n",
    "#########################\n",
    "\n",
    "path_transcript = \"tuto.zarr/.sopa_cache/comseg\"\n",
    "path_centroid = \"tuto.zarr/.sopa_cache/comseg_centroid\"\n",
    "\n",
    "for patch_index in tqdm(list(range(len(patches.ilocs)))):\n",
    "    path_dataset_folder = Path(path_transcript) / str(patch_index)\n",
    "    path_dataset_folder_centroid = Path(path_centroid) / str(patch_index)\n",
    "    \n",
    "    dataset = ds.ComSegDataset(\n",
    "        path_dataset_folder=path_dataset_folder,\n",
    "        dict_scale={\"x\": 1, 'y': 1, \"z\": 1},\n",
    "        mean_cell_diameter = MEAN_CELL_DIAMETER,\n",
    "        gene_column = \"genes\",\n",
    "        )\n",
    "\n",
    "    \n",
    "    dico_proba_edge, count_matrix = dataset.compute_edge_weight(  # in micrometer\n",
    "    images_subset=None,\n",
    "    n_neighbors=40,\n",
    "    sampling=True,\n",
    "    sampling_size=10000\n",
    "    )\n",
    "    \n",
    "    Comsegdict = dictionary.ComSegDict(\n",
    "    dataset=dataset,\n",
    "    mean_cell_diameter=MEAN_CELL_DIAMETER,\n",
    "    community_detection=\"with_prior\",\n",
    "    prior_name=\"cell\",\n",
    "    )\n",
    "    Comsegdict.run_all(max_cell_radius = MAX_CELL_RADIUS,\n",
    "                        path_dataset_folder_centroid=path_dataset_folder_centroid,\n",
    "                               file_extension=\".csv\")\n",
    "\n",
    "\n",
    "                       \n",
    "    anndata_comseg, json_dict = Comsegdict.anndata_from_comseg_result(\n",
    "        return_polygon = True,\n",
    "        alpha = 0.5,\n",
    "        min_rna_per_cell = 5)\n",
    "    anndata_comseg.write_loom(path_dataset_folder / 'segmentation_counts.loom')\n",
    "    ## save the json_dict as json\n",
    "    with open(path_dataset_folder / \"segmentation_polygons.json\", 'w') as f:\n",
    "        json.dump(json_dict['transcripts'], f)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "288748e6",
   "metadata": {},
   "source": [
    "### Aggregate results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "4d536ef7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "\n",
    "from sopa.segmentation.baysor.resolve import resolve\n",
    "resolve(sdata, path_transcript, gene_column, min_area=10)\n",
    "shapes_key = \"baysor_boundaries\"\n",
    "aggregator = sopa.segmentation.Aggregator(sdata, image_key=image_key, shapes_key=shapes_key)\n",
    "aggregator.compute_table(gene_column=gene_column, average_intensities=True)\n",
    "sdata"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d3a6f1fc",
   "metadata": {},
   "source": [
    "### Plot results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "ce084c0b",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import spatialdata_plot\n",
    "sdata.pl.render_points(size=0.01, color=\"r\")\\\n",
    "    .pl.render_images()\\\n",
    "    .pl.render_shapes(shapes_key, outline=True, fill_alpha=0, outline_color=\"w\")\\\n",
    "    .pl.show(\"global\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f5e9f7a1",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sopa",
   "language": "python",
   "name": "sopa"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}