Unset Loss.reduction to prevent double-reduction in AdversarialRegularization.

csferng · tensorflow-copybara · commit b2bf5c186c3f · 2019-10-04T11:37:25.000-07:00
`AdversarialRegularization` creates a loss wrapper around the provided loss in `compile()` for handling sample weights and loss reduction (aggregation). If the provided loss is a `tf.keras.losses.Loss` object, it comes with loss reduction by default which causes an error in the loss wrapper because the wrapper expects unreduced loss values. This change disables the loss reduction in the provided `Loss` object, so the loss wrapper can function properly. An alternative approach would be disabling the loss reduction in the loss wrapper while doing the loss reduction in the `Loss` object. However, the alternative approach would run into an error when running with `tf.distribute.Strategy`, because the `SUM_OVER_BATCH_SIZE` reduction type requires special logic outside the `Loss` object. Such logic is already implemented in the loss wrapper, so letting the wrapper handle loss reduction looks cleaner. Fixes #21 PiperOrigin-RevId: 272923076
diff --git a/neural_structured_learning/keras/adversarial_regularization.py b/neural_structured_learning/keras/adversarial_regularization.py
@@ -165,14 +165,21 @@ def __init__(self, loss_fn, name, weight):
     else:
       self.batch_size_reduction = False
     super(_LossWrapper, self).__init__(name=name, reduction=reduction)
-    self.loss_fn = loss_fn
     self.weight = weight
+    if isinstance(loss_fn, tf.keras.losses.Loss) and self.batch_size_reduction:
+      self.loss_fn = loss_fn.__class__.from_config(loss_fn.get_config())
+      self.loss_fn.reduction = tf.losses.Reduction.NONE
+    else:
+      self.loss_fn = loss_fn
 
   def call(self, y_true, y_pred):
     return self.loss_fn(y_true, y_pred)
 
   def __call__(self, *args, **kwargs):
-    loss_value = super(_LossWrapper, self).__call__(*args, **kwargs)
+    if isinstance(self.loss_fn, tf.keras.losses.Loss):
+      loss_value = self.loss_fn(*args, **kwargs)
+    else:
+      loss_value = super(_LossWrapper, self).__call__(*args, **kwargs)
     if self.batch_size_reduction:
       size = tf.cast(tf.size(loss_value), dtype=loss_value.dtype)
       loss_value = tf.math.divide_no_nan(tf.math.reduce_sum(loss_value), size)
diff --git a/neural_structured_learning/keras/adversarial_regularization_multi_device_test.py b/neural_structured_learning/keras/adversarial_regularization_multi_device_test.py
@@ -89,16 +89,19 @@ def _set_up_linear_regression(self, sample_weight=1.0):
     w_new = w - learning_rate * (grad_w_labeled_loss + grad_w_adv_loss)
     return w, x0, y0, learning_rate, adv_config, w_new
 
+  def _get_mirrored_strategy(self):
+    device_type = 'GPU' if tf.test.is_gpu_available() else 'CPU'
+    devices = ['{}:{}'.format(device_type, i) for i in range(NUM_REPLICAS)]
+    return tf.distribute.MirroredStrategy(devices)
+
   def test_train_with_distribution_strategy(self):
     w, x0, y0, lr, adv_config, w_new = self._set_up_linear_regression()
     inputs = tf.data.Dataset.from_tensor_slices({
         'feature': x0,
         'label': y0
     }).batch(NUM_REPLICAS)
 
-    device_type = 'GPU' if tf.test.is_gpu_available() else 'CPU'
-    devices = ['{}:{}'.format(device_type, i) for i in range(NUM_REPLICAS)]
-    strategy = tf.distribute.MirroredStrategy(devices)
+    strategy = self._get_mirrored_strategy()
     with strategy.scope():
       # Makes sure we are running on multiple devices.
       self.assertEqual(NUM_REPLICAS, strategy.num_replicas_in_sync)
@@ -112,6 +115,25 @@ def test_train_with_distribution_strategy(self):
     # The updated weight should be the same regardless of the number of devices.
     self.assertAllClose(w_new, keras.backend.get_value(model.weights[0]))
 
+  def test_train_with_loss_object(self):
+    w, x0, y0, lr, adv_config, w_new = self._set_up_linear_regression()
+    inputs = tf.data.Dataset.from_tensor_slices({
+        'feature': x0,
+        'label': y0
+    }).batch(NUM_REPLICAS)
+
+    strategy = self._get_mirrored_strategy()
+    with strategy.scope():
+      model = build_linear_keras_functional_model(input_shape=(2,), weights=w)
+      adv_model = adversarial_regularization.AdversarialRegularization(
+          model, label_keys=['label'], adv_config=adv_config)
+      adv_model.compile(
+          optimizer=keras.optimizers.SGD(lr),
+          loss=tf.keras.losses.MeanSquaredError())
+    adv_model.fit(x=inputs)
+
+    self.assertAllClose(w_new, keras.backend.get_value(model.weights[0]))
+
 
 if __name__ == '__main__':
   tf.compat.v1.enable_v2_behavior()
diff --git a/neural_structured_learning/keras/adversarial_regularization_test.py b/neural_structured_learning/keras/adversarial_regularization_test.py
@@ -344,6 +344,20 @@ def test_train_with_distribution_strategy(self, model_fn):
 
     self.assertAllClose(w_new, keras.backend.get_value(model.weights[0]))
 
+  def test_train_with_loss_object(self):
+    w, x0, y0, lr, adv_config, w_new = self._set_up_linear_regression()
+
+    inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)}
+    model = build_linear_keras_functional_model(input_shape=(2,), weights=w)
+    adv_model = adversarial_regularization.AdversarialRegularization(
+        model, label_keys=['label'], adv_config=adv_config)
+    adv_model.compile(
+        optimizer=keras.optimizers.SGD(lr),
+        loss=tf.keras.losses.MeanSquaredError())
+    adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1)
+
+    self.assertAllClose(w_new, keras.backend.get_value(model.weights[0]))
+
   def test_train_with_metrics(self):
     w, x0, y0, lr, adv_config, _ = self._set_up_linear_regression()