add mitoEM benchmark (part 2)

linok-bc · linok-bc · commit adcb8cfbad93 · 2024-06-05T19:26:48.000-04:00
diff --git a/notebooks/tutorial_benchmarks/mitoem_benchmark.ipynb b/notebooks/tutorial_benchmarks/mitoem_benchmark.ipynb
@@ -0,0 +1,298 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "private_outputs": true,
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# **MitoEM Benchmark**\n",
+        "\n",
+        "This notebook aims to produce a reproducable benchmark for the [Connectomics MitoEM tutorial](https://connectomics.readthedocs.io/en/latest/tutorials/mito.html). Both evaluation data and a pre-trained benchmark are provided for the user. In this notebook, due to resource limitations, we perform inference on ten slices of data."
+      ],
+      "metadata": {
+        "id": "cEtYbrL25jrh"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## (1) Install dependencies and fetch prepared data"
+      ],
+      "metadata": {
+        "id": "HXmj6mY8Iv6Y"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-qQ1kkxYSaso"
+      },
+      "outputs": [],
+      "source": [
+        "# install Connectomics and dependencies\n",
+        "%%capture\n",
+        "! pip install torch==1.12.0+cu113 torchvision==0.13.0+cu113 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu113\n",
+        "! git clone https://github.com/zudi-lin/pytorch_connectomics.git\n",
+        "%cd pytorch_connectomics/\n",
+        "! pip install --editable .\n",
+        "! pip install numpy\"<1.24\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# fetch training data from Hugging Face along with a small amount of cleanup\n",
+        "! mkdir -p datasets/MitoEM\n",
+        "! wget -q -O datasets/MitoEM/EM30-R-im.zip --show-progress https://huggingface.co/datasets/pytc/EM30/resolve/main/EM30-R-im.zip?download=true\n",
+        "! unzip -q datasets/MitoEM/EM30-R-im.zip -d datasets/MitoEM/EM30-R-im\n",
+        "! rm -r datasets/MitoEM/EM30-R-im/__MACOSX\n",
+        "! rm datasets/MitoEM/EM30-R-im.zip\n",
+        "! wget -q -O datasets/MitoEM/mito_val.zip --show-progress https://huggingface.co/datasets/pytc/MitoEM/resolve/main/EM30-R-mito-train-val-v2.zip?download=true\n",
+        "! unzip -q datasets/MitoEM/mito_val.zip -d datasets/MitoEM/EM30-R-val\n",
+        "! rm datasets/MitoEM/mito_val.zip\n",
+        "\n",
+        "# fetch pre-trained model weights from Hugging Face\n",
+        "! mkdir -p outputs/MitoEM\n",
+        "! wget -q -O outputs/MitoEM/mito_u3d-bc_mitoem_300k.pth.tar --show-progress https://huggingface.co/pytc/mito/resolve/main/mito_u3d-bc_mitoem_300k.pth.tar?download=true"
+      ],
+      "metadata": {
+        "id": "AkoIDRs6gpdf"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## (2) Specify model inference parameters\n",
+        "The provided model configuration that comes in the [Connectomics Github repo](https://github.com/zudi-lin/pytorch_connectomics.git) must be modified to accomodate the resource limitations of Colab. Important configurations for inference include:\n",
+        "\n",
+        "* SYSTEM.NUM_GPUS: the number of GPUs that are available for inference\n",
+        "* DATASET.INPUT_PATH: the location that images are stored\n",
+        "* INFERENCE.IMAGE_NAME: the images that one is running inference on\n",
+        "* INFERNCE.OUTPUT_PATH: the location that the results will be located in"
+      ],
+      "metadata": {
+        "id": "piJNEA6yJAAK"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "base_yaml = \"\"\"SYSTEM:\n",
+        "  NUM_CPUS: 1\n",
+        "  NUM_GPUS: 1\n",
+        "MODEL:\n",
+        "  ARCHITECTURE: unet_plus_3d\n",
+        "  BLOCK_TYPE: residual_se\n",
+        "  INPUT_SIZE: [17, 225, 225]\n",
+        "  OUTPUT_SIZE: [17, 225, 225]\n",
+        "  IN_PLANES: 1\n",
+        "  NORM_MODE: sync_bn\n",
+        "  FILTERS: [32, 64, 96, 128, 160]\n",
+        "DATASET:\n",
+        "  IMAGE_NAME: [\"im_train.json\"]\n",
+        "  LABEL_NAME: [\"mito_train.json\"]\n",
+        "  INPUT_PATH: datasets/MitoEM/EM30-R-im/im\n",
+        "  OUTPUT_PATH: outputs/MitoEM-R/\n",
+        "  PAD_SIZE: [4, 64, 64]\n",
+        "SOLVER:\n",
+        "  LR_SCHEDULER_NAME: WarmupCosineLR\n",
+        "  BASE_LR: 0.04\n",
+        "  ITERATION_STEP: 1\n",
+        "  ITERATION_SAVE: 5000\n",
+        "  ITERATION_TOTAL: 150000\n",
+        "  SAMPLES_PER_BATCH: 2\n",
+        "INFERENCE:\n",
+        "  INPUT_SIZE: [10, 1024, 1024]\n",
+        "  OUTPUT_SIZE: [10, 1024, 1024]\n",
+        "  IMAGE_NAME: imstack_400_410.tif\n",
+        "  OUTPUT_PATH: outputs/MitoEM/EM30-R-im/results/\n",
+        "  OUTPUT_NAME: result # will automatically save to HDF5\n",
+        "  PAD_SIZE: [0, 0, 0]\n",
+        "  AUG_MODE: mean\n",
+        "  AUG_NUM: None\n",
+        "  STRIDE: [1, 513, 513]\n",
+        "  SAMPLES_PER_BATCH: 1\"\"\"\n",
+        "\n",
+        "bc_yaml = \"\"\"MODEL:\n",
+        "  OUT_PLANES: 2\n",
+        "  TARGET_OPT: [\"0\", \"4-1-1\"]\n",
+        "  LOSS_OPTION:\n",
+        "    - - WeightedBCEWithLogitsLoss\n",
+        "      - DiceLoss\n",
+        "    - - WeightedBCEWithLogitsLoss\n",
+        "      - DiceLoss\n",
+        "  LOSS_WEIGHT: [[1.0, 0.5], [1.0, 0.5]]\n",
+        "  WEIGHT_OPT: [[\"1\", \"0\"], [\"1\", \"0\"]]\n",
+        "  OUTPUT_ACT: [[\"none\", \"sigmoid\"], [\"none\", \"sigmoid\"]]\n",
+        "INFERENCE:\n",
+        "  OUTPUT_ACT: [\"sigmoid\", \"sigmoid\"]\"\"\"\n",
+        "\n",
+        "with open('configs/MitoEM/MitoEM-R-Base.yaml', 'w') as fp:\n",
+        "    fp.write(base_yaml)\n",
+        "\n",
+        "with open('configs/MitoEM/MitoEM-R-BC.yaml', 'w') as fp:\n",
+        "    fp.write(bc_yaml)\n"
+      ],
+      "metadata": {
+        "id": "s4nO2B1etbH7"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# move data around such that the configuration above points to the right data\n",
+        "from PIL import Image\n",
+        "import tifffile\n",
+        "import numpy as np\n",
+        "\n",
+        "imstack = np.zeros([10, 4096, 4096])\n",
+        "for idx in range(400, 410):\n",
+        "    imstack[idx-400] = np.array(Image.open(f'datasets/MitoEM/EM30-R-im/im/im{idx:04}.png'))\n",
+        "tifffile.imwrite('datasets/MitoEM/EM30-R-im/im/imstack_400_410.tif', imstack)"
+      ],
+      "metadata": {
+        "id": "E0Tk6gdP0U78"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## (3) Model infence"
+      ],
+      "metadata": {
+        "id": "DRzRysVYK1_m"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "! time python -u scripts/main.py --config-base configs/MitoEM/MitoEM-R-Base.yaml --config-file configs/MitoEM/MitoEM-R-BC.yaml --inference --checkpoint outputs/MitoEM/mito_u3d-bc_mitoem_300k.pth.tar"
+      ],
+      "metadata": {
+        "id": "WJTmH2LVdaBs",
+        "collapsed": true
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## (4) Evaluation\n",
+        "Evaluation is done using code from the [mAP_3Dvolume Github repo master branch](https://github.com/ygCoconut/mAP_3Dvolume/tree/master), which is where the MitoEM Grand Challenge also performs evaluation."
+      ],
+      "metadata": {
+        "id": "kGNKOeO-K_E1"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# import dependencies\n",
+        "import glob\n",
+        "import h5py\n",
+        "import itertools\n",
+        "import numpy as np\n",
+        "from scipy import ndimage\n",
+        "from connectomics.data.utils import readvol, writeh5\n",
+        "from connectomics.utils.process import bc_watershed"
+      ],
+      "metadata": {
+        "id": "5NDORNjo7H6I"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# perform watershed processing on the data (currently represented as semantic/countour segmentations) to retrieve mitochondria instance segmentation\n",
+        "data = np.array(h5py.File(\"outputs/MitoEM/EM30-R-im/results/result.h5\")['vol0'])\n",
+        "connected = bc_watershed(data, thres1=0.85, thres2=0.6, thres3=0.8, thres_small=512)\n",
+        "with h5py.File(\"outputs/MitoEM/EM30-R-im/results/watershed.h5\", \"w\") as fp:\n",
+        "    fp.create_dataset('main', data=connected.astype(np.uint16))"
+      ],
+      "metadata": {
+        "id": "6uSZ0THHI2_z"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prepare validation data for evaluation\n",
+        "files = [f\"datasets/MitoEM/EM30-R-val/mito-val-v2/seg{idx:04}.tif\" for idx in range(400, 410)]\n",
+        "data = []\n",
+        "for file in files:\n",
+        "    data.append(tifffile.imread(file))\n",
+        "data = np.array(data)\n",
+        "writeh5(\"outputs/MitoEM/EM30-R-im/results/validation_gt.h5\", data)"
+      ],
+      "metadata": {
+        "id": "FfMDYhcpJCJo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# fetch and execute validation scripts\n",
+        "! wget -q --show-progress https://raw.githubusercontent.com/ygCoconut/mAP_3Dvolume/master/demo.py\n",
+        "! wget -q --show-progress https://raw.githubusercontent.com/ygCoconut/mAP_3Dvolume/master/vol3d_eval.py\n",
+        "! wget -q --show-progress https://raw.githubusercontent.com/ygCoconut/mAP_3Dvolume/master/vol3d_util.py\n",
+        "! python demo.py -gt outputs/MitoEM/EM30-R-im/results/validation_gt.h5 -p outputs/MitoEM/EM30-R-im/results/watershed.h5"
+      ],
+      "metadata": {
+        "id": "ePJhmUZI3xee"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# show performance stats\n",
+        "# more data is available at pytorch_connectomics/map_output_match_fn.txt and pytorch_connectomics/map_output_match_p.txt\n",
+        "! cat map_output_map.txt"
+      ],
+      "metadata": {
+        "id": "ZbkzUEhoSjYs"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "wycBG-owqTmD"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}