Skip to content

Commit de1b4fc

Browse files
authored
Restructure codebase (#12)
* Restructure codebase * Add .python-version file * Fix formatting * Remove pip-audit dependency and related GitHub Action from workflows
1 parent 05c14a7 commit de1b4fc

File tree

93 files changed

+6471
-890
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+6471
-890
lines changed

.github/workflows/code_checks.yaml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: code checks
2+
permissions:
3+
contents: read
4+
pull-requests: write
5+
6+
on:
7+
push:
8+
branches:
9+
- main
10+
paths:
11+
- .pre-commit-config.yaml
12+
- .github/workflows/code_checks.yml
13+
- '**.py'
14+
- uv.lock
15+
- pyproject.toml
16+
- '**.ipynb'
17+
pull_request:
18+
branches:
19+
- main
20+
paths:
21+
- .pre-commit-config.yaml
22+
- .github/workflows/code_checks.yml
23+
- '**.py'
24+
- uv.lock
25+
- pyproject.toml
26+
- '**.ipynb'
27+
28+
jobs:
29+
run-code-check:
30+
runs-on: ubuntu-latest
31+
steps:
32+
- uses: actions/checkout@v4
33+
34+
- name: Install uv
35+
uses: astral-sh/setup-uv@v5
36+
with:
37+
version: "latest"
38+
enable-cache: true
39+
40+
- name: "Set up Python"
41+
uses: actions/setup-python@v5
42+
with:
43+
python-version-file: ".python-version"
44+
45+
- name: Install the project
46+
run: uv sync --all-extras --dev
47+
48+
- name: Install dependencies and check code
49+
run: |
50+
source .venv/bin/activate
51+
pre-commit run --all-files

.github/workflows/static_code_checks.yaml

Lines changed: 0 additions & 26 deletions
This file was deleted.

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,6 @@ target/
8181
profile_default/
8282
ipython_config.py
8383

84-
# pyenv
85-
.python-version
86-
8784
# pipenv
8885
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
8986
# However, in case of collaboration, if having platform-specific dependencies or dependencies

.pre-commit-config.yaml

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
repos:
22
- repo: https://github.com/pre-commit/pre-commit-hooks
3-
rev: v4.4.0 # Use the ref you want to point at
3+
rev: v5.0.0 # Use the ref you want to point at
44
hooks:
55
- id: trailing-whitespace
66
- id: check-ast
@@ -11,29 +11,19 @@ repos:
1111
- id: end-of-file-fixer
1212
- id: mixed-line-ending
1313
args: [--fix=lf]
14-
- id: requirements-txt-fixer
15-
- id: trailing-whitespace
14+
- id: detect-private-key
15+
- id: check-byte-order-marker
16+
- id: check-merge-conflict
17+
- id: check-symlinks
1618
- id: check-yaml
19+
args: [--unsafe]
20+
- id: check-toml
1721

1822
- repo: https://github.com/astral-sh/ruff-pre-commit
19-
rev: v0.0.280
23+
rev: v0.11.4
2024
hooks:
2125
- id: ruff
22-
23-
- repo: https://github.com/psf/black
24-
rev: 23.7.0
25-
hooks:
26-
- id: black
27-
28-
- repo: https://github.com/pre-commit/mirrors-mypy
29-
rev: v1.4.1
30-
hooks:
31-
- id: mypy
32-
33-
- repo: https://github.com/nbQA-dev/nbQA
34-
rev: 1.7.0
35-
hooks:
36-
- id: nbqa-black
37-
- id: nbqa-ruff
38-
- id: nbqa-check-ast
39-
- id: nbqa-mypy
26+
args: [--fix, --exit-non-zero-on-fix]
27+
types_or: [ python, pyi, jupyter ]
28+
- id: ruff-format
29+
types_or: [ python, pyi, jupyter ]

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.9

README.md

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,56 @@
22
This repository contains reference implementations of three self-supervised learning
33
techniques explored during the Vector Institute's Self-Supervised Learning (SSL) Bootcamp.
44

5-
# Installing dependencies
6-
```
7-
python3 -m venv /path/to/new/virtual/environment/ssl_env
8-
source /path/to/new/virtual/environment/ssl_env/bin/activate
9-
pip install --upgrade pip
10-
pip install -r requirements.txt
11-
```
5+
# Summary of Reference Implementations
126

13-
If you are on the Vector Institute's Vaughan cluster, the environment is already set up and can be activated with
7+
| Name | Description | Reference Implementation |
8+
|------|-------------|-------|
9+
Internal Contrastive Learning (ICL) + Latent Outlier Exposure (LOE)| ICL learns to maximize the mutual information between two complementary subsets based on the assumption that the relation between a subset of features and the rest of the features is class-dependent. LOE extends ICL to work with contaminated datasets. | [Anomaly Detection in Tabular Data with ICL](src/contrastive_learning/ICL/ICL.ipynb), [Latent Outlier Exposure for Anomaly Detection with Contaminated Data](src/contrastive_learning/LatentOE/LatentOE_Notebook.ipynb)
10+
SimMTM | Reconstructs a time series signal from multiple randomly masked versions. Uses series-wise representation similarity to do a weighted aggregation of point-wise representations before reconstruction. | [Beijing PM2.5 Air Quality Forecasting](src/masked_modelling/simmtm/simmtm-BeijingPM25Quality-forecasting.ipynb)
11+
TabRet | TabRet is a pre-trainable Transformer-based model for tabular data and designed to work on a downstream task that contains columns not seen in pre-training. Unlike other methods, TabRet has an extra learning step before fine-tuning called retokenizing, which calibrates feature embeddings based on the masked autoencoding loss. | [Stroke Prediction with the BRFSS dataset](src/masked_modelling/tabret/TabRet.ipynb)
12+
Data2Vec | Combines masked prediction with self-distillation to predict contextualized latent representations (produced by the teacher network) based on a partial/masked view of the input (given to the student network). | [Image Classification with STL-10 dataset](src/self_distillation/data2vec_vision.ipynb)
1413

14+
15+
# Setting up the environment
16+
Prior to installing the dependencies for this project, it is recommended to install
17+
[uv](https://github.com/astral-sh/uv?tab=readme-ov-file#installation) and create
18+
a virtual environment. You may use whatever virtual environment management tool
19+
that you like, including uv, conda, and virtualenv.
20+
21+
With uv, you can create a virtual environment with the following command:
22+
23+
```bash
24+
uv venv -n --seed --python 3.9 /path/to/new/virtual/environment/ssl_env
1525
```
16-
source /ssd003/projects/aieng/public/ssl_bootcamp_resources/venv/bin/activate
17-
```
26+
This will create a new virtual environment in the specified path.
27+
28+
**Note**: If you are using the Vector Institute's Vaughan cluster, a virtual
29+
environment has already been created for you at `/ssd003/projects/aieng/public/ssl_bootcamp_resources/venv`.
30+
31+
Once you have created a virtual environment, you can activate it with the command:
1832

19-
# Using pre-commit hooks
20-
To check your code at commit time
2133
```
22-
pre-commit install
34+
source /path/to/new/virtual/environment/ssl_env/bin/activate
2335
```
2436

25-
You can also get pre-commit to fix your code
37+
Then, you can install the dependencies for this project with the following command:
38+
39+
```bash
40+
git clone https://github.com/VectorInstitute/SSL-Bootcamp.git
41+
cd SSL-Bootcamp
42+
uv sync --no-cache --active --dev
43+
```
44+
**Note**: The `--active` flag in the above command assumes that you have already
45+
activated your virtual environment. If you prefer not to create a new virtual
46+
environment yourself, you can omit the `--active` flag and uv will create a new virtual environment
47+
for you in the `.venv` directory inside the project root.
48+
49+
## Using pre-commit hooks
50+
To ensure that your code adheres to the project's style and formatting guidelines,
51+
you can use pre-commit hooks to check for common issues, such as code formatting,
52+
linting, and security vulnerabilities. Run the following command before pushing
53+
your code to the repository:
54+
2655
```
2756
pre-commit run --all-files
2857
```

pyproject.toml

Lines changed: 58 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,66 @@
1-
[build-system]
2-
requires = ["setuptools", "wheel"]
3-
build-backend = "setuptools.build_meta"
4-
5-
[tool.black]
6-
line-length = 88
7-
8-
[tool.mypy]
9-
ignore_missing_imports = true
10-
install_types = true
11-
pretty = true
12-
non_interactive = true
13-
disallow_untyped_defs = true
14-
no_implicit_optional = true
15-
check_untyped_defs = true
1+
[project]
2+
name = "ssl-bootcamp"
3+
version = "0.1.0"
4+
description = "Reference implementations for the Vector Institute's self-supervised learning (SSL) bootcamp (2023)"
5+
readme = "README.md"
6+
authors = [{name = "Vector AI Engineering", email = "ai_engineering@vectorinstitute.ai"}]
7+
license = "MIT"
8+
repository = "https://github.com/VectorInstitute/SSL-Bootcamp"
9+
requires-python = ">=3.9"
10+
dependencies = [
11+
"copulas>=0.12.0",
12+
"deepod>=0.4.1",
13+
"ipykernel>=6.29.5",
14+
"lightning==2.0.6",
15+
"lightning-bolts>=0.7.0",
16+
"matplotlib>=3.7.5",
17+
"notebook>=7.3.3",
18+
"numpy>=1.24.4",
19+
"optuna>=4.2.1",
20+
"pandas>=2.0.3",
21+
"pyod>=2.0.4",
22+
"pytorch-tabular==1.0.2",
23+
"rtdl==0.0.13",
24+
"scikit-learn>=1.3.2",
25+
"scipy>=1.10.1",
26+
"timm==0.9.2",
27+
"toml>=0.10.2",
28+
"torch==1.13.0",
29+
"torchvision==0.14.0",
30+
"transformers==4.31.0",
31+
"transtab>=0.0.5",
32+
"wandb>=0.19.9",
33+
"xgboost>=2.1.4",
34+
]
35+
36+
[dependency-groups]
37+
dev = [
38+
"pre-commit>=3.5.0",
39+
"ruff>=0.11.4",
40+
]
1641

1742
[tool.ruff]
18-
select = ["B", "C", "D", "E", "F", "I", "W"]
19-
line-length = 88
43+
include = ["*.py", "pyproject.toml", "*.ipynb"]
44+
line-length = 119
45+
46+
[tool.ruff.format]
47+
quote-style = "double"
48+
indent-style = "space"
49+
docstring-code-format = true
2050

21-
[tool.ruff.isort]
51+
[tool.ruff.lint]
52+
select = ["A", "B", "C", "E", "F", "I", "W"]
53+
fixable = ["A", "B", "COM", "C", "C4", "RET", "SIM", "ICN", "Q", "RSE", "E", "F", "I", "W", "PL"]
54+
ignore = ["E501", "C901"]
55+
56+
[tool.ruff.lint.isort]
2257
lines-after-imports = 2
2358

24-
[tool.nbqa.addopts]
25-
ruff = [
26-
"--ignore=D100,D203,D211,D212,D213,D401",
27-
"--fix",
28-
"--line-length=119",
29-
]
30-
black = ["--line-length=119"]
59+
[tool.ruff.lint.pycodestyle]
60+
max-doc-length = 119
61+
62+
[tool.ruff.lint.pydocstyle]
63+
convention = "numpy"
3164

3265
[tool.nbqa.md]
3366
blacken-docs = true

requirements.txt

Lines changed: 0 additions & 32 deletions
This file was deleted.
File renamed without changes.

0 commit comments

Comments
 (0)