pytorch-tabular
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 18 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 18 deletions
diff --git a/‎.prettierignore‎
Lines changed: 0 additions & 2 deletions b/‎.prettierignore‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎examples/covertype_classification.py‎
Lines changed: 3 additions & 1 deletion b/‎examples/covertype_classification.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 10 additions & 6 deletions b/‎pyproject.toml‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎src/pytorch_tabular/categorical_encoders.py‎
Lines changed: 10 additions & 0 deletions b/‎src/pytorch_tabular/categorical_encoders.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/pytorch_tabular/config/config.py‎
Lines changed: 8 additions & 0 deletions b/‎src/pytorch_tabular/config/config.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/pytorch_tabular/feature_extractor.py‎
Lines changed: 6 additions & 0 deletions b/‎src/pytorch_tabular/feature_extractor.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/pytorch_tabular/models/autoint/autoint.py‎
Lines changed: 1 addition & 0 deletions b/‎src/pytorch_tabular/models/autoint/autoint.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/pytorch_tabular/models/autoint/config.py‎
Lines changed: 1 addition & 0 deletions b/‎src/pytorch_tabular/models/autoint/config.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/pytorch_tabular/models/base_model.py‎
Lines changed: 12 additions & 1 deletion b/‎src/pytorch_tabular/models/base_model.py‎
Lines changed: 12 additions & 1 deletion
@@ -26,24 +26,12 @@ repos:
           - id: check-docstring-first
           - id: detect-private-key
 
-    - repo: https://github.com/asottile/pyupgrade
-      rev: v3.15.1
-      hooks:
-          - id: pyupgrade
-            args: ["--py38-plus"]
-            name: Upgrade code
-
     - repo: https://github.com/PyCQA/docformatter
       rev: v1.7.5
       hooks:
           - id: docformatter
-            args: [--in-place, --wrap-summaries=120, --wrap-descriptions=120]
-
-    - repo: https://github.com/psf/black
-      rev: 24.2.0
-      hooks:
-          - id: black
-            name: Black code
+            additional_dependencies: [tomli]
+            args: ["--in-place"]
 
     - repo: https://github.com/executablebooks/mdformat
       rev: 0.7.17
@@ -58,20 +46,19 @@ repos:
                     docs/|
                     README.md
                     )
-    - repo: https://github.com/asottile/yesqa
-      rev: v1.5.0
-      hooks:
-          - id: yesqa
 
     - repo: https://github.com/astral-sh/ruff-pre-commit
       rev: v0.2.2
       hooks:
           - id: ruff
             args: ["--fix"]
+          - id: ruff-format
+          - id: ruff
 
     - repo: https://github.com/pre-commit/mirrors-prettier
       rev: v4.0.0-alpha.8
       hooks:
           - id: prettier
+            files: \.(json|yml|yaml|toml)
             # https://prettier.io/docs/en/options.html#print-width
             args: ["--print-width=120"]
@@ -92,7 +92,9 @@
     normalize_continuous_features=True,
 )
 head_config = LinearHeadConfig(
-    layers="", dropout=0.1, initialization="kaiming"  # No additional layer in head, just a mapping layer to output_dim
+    layers="",
+    dropout=0.1,
+    initialization="kaiming",  # No additional layer in head, just a mapping layer to output_dim
 ).__dict__  # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)
 model_config = CategoryEmbeddingModelConfig(
     task="classification",
 
@@ -1,16 +1,13 @@
-[tool.black]
-# https://github.com/psf/black
-line-length = 120
-exclude = "(.eggs|.git|.hg|.mypy_cache|.venv|_build|buck-out|build|dist)"
-
-
 [tool.ruff]
+target-version = "py38"
 line-length = 120
 # Enable Pyflakes `E` and `F` codes by default.
 select = [
     "E", "W",  # see: https://pypi.org/project/pycodestyle
     "F",  # see: https://pypi.org/project/pyflakes
     "I",  # isort
+    "UP", # see: https://docs.astral.sh/ruff/rules/#pyupgrade-up
+    "RUF100",  # yesqa
 #    "D",  # see: https://pypi.org/project/pydocstyle
 #    "N",  # see: https://pypi.org/project/pep8-naming
 ]
@@ -45,3 +42,10 @@ ignore-init-module-imports = true
 [tool.ruff.pydocstyle]
 # Use Google-style docstrings.
 convention = "google"
+
+[tool.docformatter]
+recursive = true
+# this need to be shorter as some docstings are r"""...
+wrap-summaries = 119
+wrap-descriptions = 120
+blank = true
@@ -51,6 +51,7 @@ def transform(self, X):
         :return: encoded DataFrame of shape (n_samples, n_features), initial categorical columns are dropped, and
             replaced with encoded columns. DataFrame passed in argument is unchanged.
         :rtype: pandas.DataFrame
+
         """
         if not self._mapping:
             raise ValueError("`fit` method must be called before `transform`.")
@@ -80,6 +81,7 @@ def fit_transform(self, X, y=None):
         :return: encoded DataFrame of shape (n_samples, n_features), initial categorical columns are dropped, and
             replaced with encoded columns. DataFrame passed in argument is unchanged.
         :rtype: pandas.DataFrame
+
         """
         self.fit(X, y)
         return self.transform(X)
@@ -104,6 +106,7 @@ def save_as_object_file(self, path):
 
         Args:
             path (str): path to save the encoder
+
         """
         if not self._mapping:
             raise ValueError("`fit` method must be called before `save_as_object_file`.")
@@ -114,6 +117,7 @@ def load_from_object_file(self, path):
 
         Args:
             path (str): path to load the encoder
+
         """
         for k, v in pickle.load(open(path, "rb")).items():
             setattr(self, k, v)
@@ -131,6 +135,7 @@ def __init__(self, cols=None, handle_unseen="impute", handle_missing="impute"):
             'ignore' - skip unseen categories
             'impute' - impute new categories to a predefined value, which is same as NAN_CATEGORY
         :return: None
+
         """
         self._input_check("handle_unseen", handle_unseen, ["error", "ignore", "impute"])
         self._input_check("handle_missing", handle_missing, ["error", "impute"])
@@ -141,6 +146,7 @@ def fit(self, X, y=None):
 
         :param pandas.DataFrame X: DataFrame of features, shape (n_samples, n_features). Must contain columns to encode.
         :return: None
+
         """
         self._before_fit_check(X, y)
         if self.handle_missing == "error":
@@ -161,6 +167,7 @@ def __init__(self, tabular_model):
 
         Args:
             tabular_model (TabularModel): The trained TabularModel object
+
         """
         self._categorical_encoder = tabular_model.datamodule.categorical_encoder
         self.cols = tabular_model.model.hparams.categorical_cols
@@ -198,6 +205,7 @@ def fit(self, X, y=None):
         """Just for compatibility.
 
         Does not do anything
+
         """
         return self
 
@@ -213,6 +221,7 @@ def transform(self, X: DataFrame, y=None) -> DataFrame:
 
         Returns:
             DataFrame: The encoded dataframe
+
         """
         if not self._mapping:
             raise ValueError(
@@ -245,6 +254,7 @@ def fit_transform(self, X: DataFrame, y=None) -> DataFrame:
 
         Returns:
             DataFrame: The encoded dataframe
+
         """
         self.fit(X, y)
         return self.transform(X)
 
@@ -94,6 +94,7 @@ class DataConfig:
 
         handle_missing_values (bool): Whether to handle missing values in categorical columns as
                 unknown
+
     """
 
     target: Optional[List[str]] = field(
@@ -201,6 +202,7 @@ class InferredConfig:
                 list of tuples (cardinality, embedding_dim).
 
         embedded_cat_dim (int): The number of features or dimensions of the embedded categorical features
+
     """
 
     categorical_dim: int = field(
@@ -341,6 +343,7 @@ class TrainerConfig:
 
         trainer_kwargs (Dict[str, Any]): Additional kwargs to be passed to PyTorch Lightning Trainer. See
                 https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.trainer.html#pytorch_lightning.trainer.Trainer
+
     """
 
     batch_size: int = field(default=64, metadata={"help": "Number of samples in each batch of training"})
@@ -575,6 +578,7 @@ class ExperimentConfig:
         log_logits (bool): Turn this on to log the logits as a histogram in W&B
 
         exp_log_freq (int): step count between logging of gradients and parameters.
+
     """
 
     project_name: str = field(
@@ -651,6 +655,7 @@ class OptimizerConfig:
 
         lr_scheduler_monitor_metric (Optional[str]): Used with ReduceLROnPlateau, where the plateau is
                 decided based on this metric
+
     """
 
     optimizer: str = field(
@@ -703,6 +708,7 @@ def __init__(
         Args:
             exp_version_manager (str, optional): The path of the yml file which acts as version control.
                 Defaults to ".pt_tmp/exp_version_manager.yml".
+
         """
         super().__init__()
         self._exp_version_manager = exp_version_manager
@@ -776,6 +782,7 @@ class ModelConfig:
                 not apply any restrictions
 
         seed (int): The seed for reproducibility. Defaults to 42
+
     """
 
     task: str = field(
@@ -956,6 +963,7 @@ class SSLModelConfig:
         learning_rate (float): The learning rate of the model. Defaults to 1e-3
 
         seed (int): The seed for reproducibility. Defaults to 42
+
     """
 
     task: str = field(init=False, default="ssl")
 
@@ -26,6 +26,7 @@ def __init__(self, tabular_model, extract_keys=["backbone_features"], drop_origi
             tabular_model (TabularModel): The trained TabularModel object
             extract_keys (list, optional): The keys of the features to extract. Defaults to ["backbone_features"].
             drop_original (bool, optional): Whether to drop the original columns. Defaults to True.
+
         """
         assert not (
             isinstance(tabular_model.model, NODEModel)
@@ -40,6 +41,7 @@ def fit(self, X, y=None):
         """Just for compatibility.
 
         Does not do anything
+
         """
         return self
 
@@ -55,6 +57,7 @@ def transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
 
         Returns:
             pd.DataFrame: The encoded dataframe
+
         """
 
         X_encoded = X.copy(deep=True)
@@ -99,6 +102,7 @@ def fit_transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
 
         Returns:
             pd.DataFrame: The encoded dataframe
+
         """
         self.fit(X, y)
         return self.transform(X)
@@ -108,6 +112,7 @@ def save_as_object_file(self, path):
 
         Args:
             path (str): The path to save the file
+
         """
         if not self._mapping:
             raise ValueError("`fit` method must be called before `save_as_object_file`.")
@@ -118,6 +123,7 @@ def load_from_object_file(self, path):
 
         Args:
             path (str): The path to load the file from
+
         """
         for k, v in pickle.load(open(path, "rb")).items():
             setattr(self, k, v)
@@ -19,6 +19,7 @@ def __init__(self, config: DictConfig):
 
         Args:
             config (DictConfig): config of the model
+
         """
         super().__init__()
         self.hparams = config
 
@@ -108,6 +108,7 @@ class AutoIntConfig(ModelConfig):
                 not apply any restrictions
 
         seed (int): The seed for reproducibility. Defaults to 42
+
     """
 
     attn_embed_dim: int = field(
 
@@ -49,6 +49,7 @@ def safe_merge_config(config: DictConfig, inferred_config: DictConfig) -> DictCo
 
     Returns:
         The merged configuration.
+
     """
     # using base config values if exist
     inferred_config.embedding_dims = config.get("embedding_dims") or inferred_config.embedding_dims
@@ -90,6 +91,7 @@ def __init__(
                 A custom optimizer as callable or string to be imported. Defaults to None.
             custom_optimizer_params (Dict, optional): A dictionary of custom optimizer parameters. Defaults to {}.
             kwargs (Dict, optional): Additional keyword arguments.
+
         """
         super().__init__()
         assert "inferred_config" in kwargs, "inferred_config not found in initialization arguments"
@@ -231,6 +233,7 @@ def calculate_loss(self, output: Dict, y: torch.Tensor, tag: str) -> torch.Tenso
 
         Returns:
             torch.Tensor: The loss value
+
         """
         y_hat = output["logits"]
         reg_terms = [k for k, v in output.items() if "regularization" in k]
@@ -287,6 +290,7 @@ def calculate_metrics(self, y: torch.Tensor, y_hat: torch.Tensor, tag: str) -> L
 
         Returns:
             List[torch.Tensor]: The list of metric values
+
         """
         metrics = []
         for metric, metric_str, prob_inp, metric_params in zip(
@@ -349,13 +353,15 @@ def embed_input(self, x: Dict) -> torch.Tensor:
         return self.embedding_layer(x)
 
     def apply_output_sigmoid_scaling(self, y_hat: torch.Tensor) -> torch.Tensor:
-        """Applies sigmoid scaling to the output of the model if the task is regression and the target range is defined.
+        """Applies sigmoid scaling to the output of the model if the task is regression and the target range is
+        defined.
 
         Args:
             y_hat (torch.Tensor): The output of the model
 
         Returns:
             torch.Tensor: The output of the model with sigmoid scaling applied
+
         """
         if (self.hparams.task == "regression") and (self.hparams.target_range is not None):
             for i in range(self.hparams.output_dim):
@@ -373,6 +379,7 @@ def pack_output(self, y_hat: torch.Tensor, backbone_features: torch.tensor) -> D
 
         Returns:
             The packed output of the model
+
         """
         # if self.head is the Identity function it means that we cannot extract backbone features,
         # because the model cannot be divide in backbone and head (i.e. TabNet)
@@ -388,6 +395,7 @@ def compute_head(self, backbone_features: Tensor) -> Dict[str, Any]:
 
         Returns:
             The output of the model
+
         """
         y_hat = self.head(backbone_features)
         y_hat = self.apply_output_sigmoid_scaling(y_hat)
@@ -398,6 +406,7 @@ def forward(self, x: Dict) -> Dict[str, Any]:
 
         Args:
             x (Dict): The input of the model with 'continuous' and 'categorical' keys
+
         """
         x = self.embed_input(x)
         x = self.compute_backbone(x)
@@ -413,6 +422,7 @@ def predict(self, x: Dict, ret_model_output: bool = False) -> Union[torch.Tensor
 
         Returns:
             The output of the model
+
         """
         assert self.hparams.task != "ssl", "It's not allowed to use the method predict in case of ssl task"
         ret_value = self.forward(x)
@@ -427,6 +437,7 @@ def extract_embedding(self):
         """Extracts the embedding of the model.
 
         This is used in `CategoricalEmbeddingTransformer`
+
         """
         if self.hparams.categorical_dim > 0:
             if not isinstance(self.embedding_layer, PreEncoded1dLayer):