Add a constructor parameter for passing label features to base model.

csferng · tensorflow-copybara · commit 36747689b4f6 · 2020-04-06T15:08:36.000-07:00
Fixes #37. PiperOrigin-RevId: 305126834
diff --git a/neural_structured_learning/keras/adversarial_regularization.py b/neural_structured_learning/keras/adversarial_regularization.py
@@ -460,7 +460,8 @@ def __init__(self,
                base_model,
                label_keys=('label',),
                sample_weight_key=None,
-               adv_config=None):
+               adv_config=None,
+               base_with_labels_in_features=False):
     """Constructor of `AdversarialRegularization` class.
 
     Args:
@@ -474,13 +475,22 @@ def __init__(self,
         the weight is 1.0 for each input example.
       adv_config: Instance of `nsl.configs.AdvRegConfig` for configuring
         adversarial regularization.
+      base_with_labels_in_features: A Boolean value indicating whether the base
+        model expects label features as input. This option is effective only
+        when the base model is a subclassed Keras model. (For functional and
+        Sequential models, the expected inputs can be inferred from the model
+        itself.) If set to true, the base model will be called with an input
+        dictionary including label and sample-weight features. If set to false,
+        label and sample-weight features will not present in base model's input
+        dictionary.
     """
     super(AdversarialRegularization,
           self).__init__(name='AdversarialRegularization')
     self.base_model = base_model
     self.label_keys = label_keys
     self.sample_weight_key = sample_weight_key
     self.adv_config = adv_config or nsl_configs.AdvRegConfig()
+    self._base_with_labels_in_features = base_with_labels_in_features
 
   def compile(self,
               optimizer,
@@ -585,42 +595,45 @@ def _compute_total_loss(self, labels, outputs, sample_weights=None):
                                         outputs, sample_weights)
     return loss
 
-  def _split_inputs(self, inputs):
+  def _extract_labels_and_weights(self, inputs):
     sample_weights = inputs.get(self.sample_weight_key, None)
     if sample_weights is not None:
       sample_weights = tf.stop_gradient(sample_weights)
     # Labels shouldn't be perturbed when generating adversarial examples.
     labels = [
         tf.stop_gradient(inputs[label_key]) for label_key in self.label_keys
     ]
-    # Removes labels and sample weights from the input dictionary, since they
-    # are only used in this class and base model does not need them as inputs.
+    return labels, sample_weights
+
+  def _remove_labels_and_weights(self, inputs):
     non_feature_keys = set(self.label_keys).union([self.sample_weight_key])
-    inputs = {
+    return {
         key: value
         for key, value in six.iteritems(inputs)
         if key not in non_feature_keys
     }
-    # In some cases, Sequential models are automatically compiled to graph
-    # networks with automatically generated input names. In this case, the user
-    # isn't expected to know those names, so we just flatten the inputs. But the
-    # input names are sometimes meaningful (e.g. DenseFeatures layer). We check
-    # if there is any intersection between the user-provided names and model's
-    # input names. If there is, we assume the names are meaningful and preserve
-    # the dictionary.
-    if (isinstance(self.base_model, tf.keras.Sequential) and
-        not (set(getattr(self.base_model, 'input_names', []))
-             & set(inputs.keys()))):
-      inputs = tf.nest.flatten(inputs)
-    return inputs, labels, sample_weights
 
   def _call_base_model(self, inputs, **kwargs):
-    if isinstance(inputs, dict) and self.base_model._is_graph_network:  # pylint: disable=protected-access
-      base_input_names = getattr(self.base_model, 'input_names', None)
+    base_input_names = getattr(self.base_model, 'input_names', [])
+    if (isinstance(self.base_model, tf.keras.Sequential) and
+        not set(base_input_names) & set(inputs.keys())):
+      # In some cases, Sequential models are automatically compiled to graph
+      # networks with automatically generated input names. In this case, the
+      # user isn't expected to know those names, so we just flatten the inputs.
+      # But the input names are sometimes meaningful (e.g. DenseFeatures layer).
+      # We check if there is any intersection between the user-provided names
+      # and model's input names. If there is, we assume the names are meaningful
+      # and do name-based lookup in the next branch.
+      inputs = tf.nest.flatten(self._remove_labels_and_weights(inputs))
+    elif self.base_model._is_graph_network:  # pylint: disable=protected-access
       if base_input_names:
         # Converts input dictionary to a list so it conforms with the model's
         # expected input.
         inputs = [inputs[name] for name in base_input_names]
+    elif not self._base_with_labels_in_features:
+      # Removes labels and sample weights from the input dictionary, since they
+      # are only used in this class and base model does not need them as inputs.
+      inputs = self._remove_labels_and_weights(inputs)
     return self.base_model(inputs, **kwargs)
 
   def _forward_pass(self, inputs, labels, sample_weights, base_model_kwargs):
@@ -647,7 +660,7 @@ def call(self, inputs, **kwargs):
       raise ValueError('Labels are not in the input. For predicting examples '
                        'without labels, please use the base model instead.')
 
-    inputs, labels, sample_weights = self._split_inputs(inputs)
+    labels, sample_weights = self._extract_labels_and_weights(inputs)
     outputs, labeled_loss, metrics, tape = self._forward_pass(
         inputs, labels, sample_weights, kwargs)
     self.add_loss(labeled_loss)
@@ -690,8 +703,9 @@ def perturb_on_batch(self, x, **config_kwargs):
       A dictionary of NumPy arrays, `SparseTensor`, or `RaggedTensor` objects of
       the generated adversarial examples.
     """
-    x = tf.nest.map_structure(tf.convert_to_tensor, x, expand_composites=True)
-    inputs, labels, sample_weights = self._split_inputs(x)
+    inputs = tf.nest.map_structure(
+        tf.convert_to_tensor, x, expand_composites=True)
+    labels, sample_weights = self._extract_labels_and_weights(inputs)
     _, labeled_loss, _, tape = self._forward_pass(inputs, labels,
                                                   sample_weights,
                                                   {'training': False})
diff --git a/neural_structured_learning/keras/adversarial_regularization_test.py b/neural_structured_learning/keras/adversarial_regularization_test.py
@@ -117,14 +117,14 @@ def evaluate(self, *args, **kwargs):
       # is not created until the first call to the model, so the initialization
       # is not captured in the global_variables_initializer above.
       with tf.keras.backend.get_session().as_default():
-        return super(AdversarialLossTest, self).evaluate(
-            *args, **kwargs)
+        return super(AdversarialLossTest, self).evaluate(*args, **kwargs)
     else:
-      return super(AdversarialLossTest, self).evaluate(
-          *args, **kwargs)
+      return super(AdversarialLossTest, self).evaluate(*args, **kwargs)
 
   @parameterized.named_parameters([
       ('sequential', build_linear_keras_sequential_model),
+      ('sequential_no_input_layer',
+       build_linear_keras_sequential_model_no_input_layer),
       ('functional', build_linear_keras_functional_model),
       ('subclassed', build_linear_keras_subclassed_model),
   ])
@@ -511,6 +511,37 @@ def test_train_with_2_inputs(self, name1, name2):
     self.assertAllClose(w1_new, tf.keras.backend.get_value(dense1.weights[0]))
     self.assertAllClose(w2_new, tf.keras.backend.get_value(dense2.weights[0]))
 
+  def test_train_subclassed_base_model_with_label_input(self):
+    w, x0, y0, lr, adv_config, _ = self._set_up_linear_regression()
+
+    inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)}
+
+    class BaseModel(tf.keras.Model):
+
+      def __init__(self):
+        super(BaseModel, self).__init__()
+        self.dense = tf.keras.layers.Dense(
+            w.shape[-1],
+            use_bias=False,
+            kernel_initializer=tf.keras.initializers.Constant(w))
+        self.seen_input_keys = set()
+
+      def call(self, inputs):
+        self.seen_input_keys |= set(inputs.keys())
+        return self.dense(inputs['feature'])
+
+    model = BaseModel()
+    adv_model = adversarial_regularization.AdversarialRegularization(
+        model,
+        label_keys=['label'],
+        adv_config=adv_config,
+        base_with_labels_in_features=True)
+    adv_model.compile(
+        optimizer=tf.keras.optimizers.SGD(lr), loss='MSE', metrics=['mae'])
+    adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1)
+
+    self.assertIn('label', model.seen_input_keys)
+
   def test_evaluate_binary_classification_metrics(self):
     # multi-label binary classification model
     w = np.array([[4.0, 1.0, -5.0], [-3.0, 1.0, 2.0]])
@@ -564,10 +595,17 @@ def test_evaluate_classification_metrics(self):
     self.assertAllClose(cross_entropy,
                         results['sparse_categorical_crossentropy'])
 
-  def test_perturb_on_batch(self):
+  @parameterized.named_parameters([
+      ('sequential', build_linear_keras_sequential_model),
+      ('sequential_no_input_layer',
+       build_linear_keras_sequential_model_no_input_layer),
+      ('functional', build_linear_keras_functional_model),
+      ('subclassed', build_linear_keras_subclassed_model),
+  ])
+  def test_perturb_on_batch(self, model_fn):
     w, x0, y0, lr, adv_config, _ = self._set_up_linear_regression()
     inputs = {'feature': x0, 'label': y0}
-    model = build_linear_keras_functional_model(input_shape=(2,), weights=w)
+    model = model_fn(input_shape=(2,), weights=w)
     adv_model = adversarial_regularization.AdversarialRegularization(
         model, label_keys=['label'], adv_config=adv_config)
     adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss=['MSE'])