Convert input dictionary to a list for functional Keras models.

csferng · tensorflow-copybara · commit 6e699738d6e0 · 2020-01-15T14:32:10.000-08:00
Functional Keras models may expect their input features to be in a specific order, which may be different from the alphabetic order used for serializing input dictionaries. Keras `Model` class handles the different ordering by performing a name lookup before executing the model's forward pass. However, the name lookup is only performed when the model is called via high-level interfaces like `model.fit()`, but not when the model is called directly like `model(input)`. Since `nsl.keras.AdversarialRegularization` always calls its base model directly, this creates an interface discrepancy. For example, ``` input = {'a': ..., 'b': ...} model = tf.keras.Model( [tf.keras.Input(..., name='b'), tf.keras.Input(..., name='a')], ...) adv_model = nsl.keras.AdversarialRegularization(model) ... # Compiles both models model.fit(input) # works adv_model.fit(input) # error ``` This fix does the name lookup before calling the base model if the base model is a functional model. Sequential models are excluded because their feature name may not be specified. Subclassed Keras models are also excluded because some subclassed models actually expect dictionary-style input instead of a list. Fixes #27 PiperOrigin-RevId: 289938495
diff --git a/neural_structured_learning/keras/adversarial_regularization.py b/neural_structured_learning/keras/adversarial_regularization.py
@@ -585,6 +585,8 @@ def _compute_total_loss(self, labels, outputs, sample_weights=None):
 
   def _split_inputs(self, inputs):
     sample_weights = inputs.get(self.sample_weight_key, None)
+    if sample_weights is not None:
+      sample_weights = tf.stop_gradient(sample_weights)
     # Labels shouldn't be perturbed when generating adversarial examples.
     labels = [
         tf.stop_gradient(inputs[label_key]) for label_key in self.label_keys
@@ -599,11 +601,21 @@ def _split_inputs(self, inputs):
     }
     return inputs, labels, sample_weights
 
+  def _call_base_model(self, inputs, **kwargs):
+    if (self.base_model._is_graph_network and  # pylint: disable=protected-access
+        not isinstance(self.base_model, tf.keras.Sequential)):
+      base_input_names = getattr(self.base_model, 'input_names', None)
+      if base_input_names:
+        # Converts input dictionary to a list so it conforms with the model's
+        # expected input.
+        inputs = [inputs[name] for name in base_input_names]
+    return self.base_model(inputs, **kwargs)
+
   def _forward_pass(self, inputs, labels, sample_weights, base_model_kwargs):
     """Runs the usual forward pass to compute outputs, loss, and metrics."""
     with tf.GradientTape() as tape:
       tape.watch(list(inputs.values()))
-      outputs = self.base_model(inputs, **base_model_kwargs)
+      outputs = self._call_base_model(inputs, **base_model_kwargs)
       # If the base_model is a subclassed model, its output_names are not
       # available before its first call. If it is a dynamic subclassed model,
       # its output_names are not available even after its first call, so we
@@ -634,7 +646,7 @@ def call(self, inputs, **kwargs):
     adv_loss = adversarial_loss(
         inputs,
         labels,
-        self.base_model,
+        self._call_base_model,
         self._compute_total_loss,
         sample_weights=sample_weights,
         adv_config=self.adv_config,
diff --git a/neural_structured_learning/keras/adversarial_regularization_test.py b/neural_structured_learning/keras/adversarial_regularization_test.py
@@ -39,6 +39,16 @@ def build_linear_keras_sequential_model(input_shape, weights):
   return model
 
 
+def build_linear_keras_sequential_model_no_input_layer(input_shape, weights):
+  return tf.keras.Sequential([
+      tf.keras.layers.Dense(
+          weights.shape[-1],
+          use_bias=False,
+          input_shape=input_shape,
+          kernel_initializer=tf.keras.initializers.Constant(weights)),
+  ])
+
+
 def build_linear_keras_functional_model(input_shape,
                                         weights,
                                         input_name='feature'):
@@ -276,6 +286,8 @@ def _set_up_linear_regression(self, sample_weight=1.0):
 
   @parameterized.named_parameters([
       ('sequential', build_linear_keras_sequential_model),
+      ('sequential_no_input_layer',
+       build_linear_keras_sequential_model_no_input_layer),
       ('functional', build_linear_keras_functional_model),
       ('subclassed', build_linear_keras_subclassed_model),
   ])
@@ -460,6 +472,45 @@ def test_train_with_2_outputs(self):
     self.assertAllClose(expected_metric,
                         history.history['mean_absolute_error_label2'][0])
 
+  @parameterized.named_parameters([
+      ('order_1_2', 'first', 'second'),
+      ('order_2_1', 'second', 'first'),
+  ])
+  def test_train_with_2_inputs(self, name1, name2):
+    x1, x2 = np.array([[1.]]), np.array([[4., 5.]])
+    w1, w2 = np.array([[2.]]), np.array([[3.], [6.]])
+    y = np.array([0.])
+    inputs = {name1: x1, name2: x2, 'label': y}
+    lr, adv_step_size = 0.001, 0.1
+
+    input1 = tf.keras.Input(shape=(1,), name=name1)
+    input2 = tf.keras.Input(shape=(2,), name=name2)
+    dense1 = tf.keras.layers.Dense(
+        w1.shape[-1],
+        use_bias=False,
+        kernel_initializer=tf.keras.initializers.Constant(w1))
+    dense2 = tf.keras.layers.Dense(
+        w2.shape[-1],
+        use_bias=False,
+        kernel_initializer=tf.keras.initializers.Constant(w2))
+    output = tf.keras.layers.Add()([dense1(input1), dense2(input2)])
+    model = tf.keras.Model(inputs=[input1, input2], outputs=output)
+
+    adv_config = configs.make_adv_reg_config(
+        multiplier=1.0, adv_step_size=adv_step_size, adv_grad_norm='l2')
+    adv_model = adversarial_regularization.AdversarialRegularization(
+        model, label_keys=['label'], adv_config=adv_config)
+    adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MAE')
+    adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1)
+
+    # loss = |x1 * w1 + x2 * w2|, gradient(loss, [x1, x2]) = [w1, w2]
+    w_norm = np.sqrt((np.sum(w1 * w1) + np.sum(w2 * w2)))
+    x1_adv, x2_adv = x1 + adv_step_size * w1.T / w_norm, x2 + adv_step_size * w2.T / w_norm
+    # gradient(loss, [w1, w2]) = [x1, x2]
+    w1_new, w2_new = w1 - lr * (x1 + x1_adv).T, w2 - lr * (x2 + x2_adv).T
+    self.assertAllClose(w1_new, tf.keras.backend.get_value(dense1.weights[0]))
+    self.assertAllClose(w2_new, tf.keras.backend.get_value(dense2.weights[0]))
+
   def test_evaluate_binary_classification_metrics(self):
     # multi-label binary classification model
     w = np.array([[4.0, 1.0, -5.0], [-3.0, 1.0, 2.0]])