Restrict the expectation of neighbor features to the training mode. This updates both the Keras and the Estimator wrappers in NSL.

arjung · tensorflow-copybara · commit 989c055590c8 · 2019-12-12T14:18:51.000-08:00
PiperOrigin-RevId: 285269590
diff --git a/neural_structured_learning/estimator/adversarial_regularization.py b/neural_structured_learning/estimator/adversarial_regularization.py
@@ -80,7 +80,7 @@ def adv_model_fn(features, labels, mode, params=None, config=None):
         `num_ps_replicas`, or `model_dir`. Unused currently.
 
     Returns:
-      A `tf.EstimatorSpec` whose loss incorporates graph-based regularization.
+      A `tf.estimator.EstimatorSpec` with adversarial regularization.
     """
 
     # Uses the same variable scope for calculating the original objective and
diff --git a/neural_structured_learning/estimator/graph_regularization.py b/neural_structured_learning/estimator/graph_regularization.py
@@ -77,7 +77,7 @@ def graph_reg_model_fn(features, labels, mode, params=None, config=None):
         as `num_ps_replicas`, or `model_dir`. Unused currently.
 
     Returns:
-      A `tf.EstimatorSpec` whose loss incorporates graph-based regularization.
+      A `tf.estimator.EstimatorSpec` with graph regularization.
     """
 
     # Uses the same variable scope for calculating the original objective and
@@ -86,10 +86,19 @@ def graph_reg_model_fn(features, labels, mode, params=None, config=None):
         tf.compat.v1.get_variable_scope(),
         reuse=tf.compat.v1.AUTO_REUSE,
         auxiliary_name_scope=False):
-      # Extract sample features, neighbor features, and neighbor weights.
-      sample_features, nbr_features, nbr_weights = (
-          utils.unpack_neighbor_features(features,
-                                         graph_reg_config.neighbor_config))
+      nbr_features = dict()
+      nbr_weights = None
+      if mode == tf.estimator.ModeKeys.TRAIN:
+        # Extract sample features, neighbor features, and neighbor weights if we
+        # are in training mode.
+        sample_features, nbr_features, nbr_weights = (
+            utils.unpack_neighbor_features(features,
+                                           graph_reg_config.neighbor_config))
+      else:
+        # Otherwise, we strip out all neighbor features and use just the
+        # sample's features.
+        sample_features = utils.strip_neighbor_features(
+            features, graph_reg_config.neighbor_config)
 
       # If no 'params' is passed, then it is possible for base_model_fn not to
       # accept a 'params' argument. See documentation for tf.estimator.Estimator
diff --git a/neural_structured_learning/estimator/graph_regularization_test.py b/neural_structured_learning/estimator/graph_regularization_test.py
@@ -174,14 +174,14 @@ def embedding_fn(features, unused_mode):
               """
 
     input_fn = single_example_input_fn(
-        example, input_shape=[1], max_neighbors=1)
+        example, input_shape=[1], max_neighbors=0)
     predictions = graph_reg_est.predict(input_fn=input_fn)
     predicted_scores = [x['predictions'] for x in predictions]
     self.assertAllClose([[3.0]], predicted_scores)
 
-  def train_and_check_params(self, example, max_neighbors, weight, bias,
-                             expected_grad_from_weight,
-                             expected_grad_from_bias):
+  def _train_and_check_params(self, example, max_neighbors, weight, bias,
+                              expected_grad_from_weight,
+                              expected_grad_from_bias):
     """Runs training for one step and verifies gradient-based updates."""
 
     def embedding_fn(features, unused_mode):
@@ -261,7 +261,8 @@ def test_graph_reg_wrapper_one_neighbor_with_training(self):
     # which includes the supervised loss as well as the graph loss.
     orig_pred = np.dot(x0, weight) + bias  # [9.0]
 
-    # Based on the implementation of embedding_fn inside train_and_check_params.
+    # Based on the implementation of embedding_fn inside
+    # _train_and_check_params.
     x0_embedding = np.dot(x0, weight)
     neighbor0_embedding = np.dot(neighbor0, weight)
 
@@ -271,8 +272,8 @@ def test_graph_reg_wrapper_one_neighbor_with_training(self):
                                                neighbor0).T  # [[2.5], [1.5]]
     orig_grad_b = 2 * (orig_pred - y0).reshape((1,))  # [2.0]
 
-    self.train_and_check_params(example, 1, weight, bias, orig_grad_w,
-                                orig_grad_b)
+    self._train_and_check_params(example, 1, weight, bias, orig_grad_w,
+                                 orig_grad_b)
 
   @test_util.run_v1_only('Requires tf.get_variable')
   def test_graph_reg_wrapper_two_neighbors_with_training(self):
@@ -318,7 +319,8 @@ def test_graph_reg_wrapper_two_neighbors_with_training(self):
     # which includes the supervised loss as well as the graph loss.
     orig_pred = np.dot(x0, weight) + bias  # [9.0]
 
-    # Based on the implementation of embedding_fn inside train_and_check_params.
+    # Based on the implementation of embedding_fn inside
+    # _train_and_check_params.
     x0_embedding = np.dot(x0, weight)
     neighbor0_embedding = np.dot(neighbor0, weight)
     neighbor1_embedding = np.dot(neighbor1, weight)
@@ -338,8 +340,101 @@ def test_graph_reg_wrapper_two_neighbors_with_training(self):
     orig_grad_w = grad_w_supervised_loss + grad_w_graph_loss
     orig_grad_b = 2 * (orig_pred - y0).reshape((1,))  # [2.0]
 
-    self.train_and_check_params(example, 2, weight, bias, orig_grad_w,
-                                orig_grad_b)
+    self._train_and_check_params(example, 2, weight, bias, orig_grad_w,
+                                 orig_grad_b)
+
+  def _train_and_check_eval_results(self, train_example, test_example,
+                                    max_neighbors, weight, bias):
+    """Verifies evaluation results for the graph-regularized model."""
+
+    def embedding_fn(features, unused_mode):
+      # Computes y = w*x
+      with tf.variable_scope(
+          tf.get_variable_scope(),
+          reuse=tf.AUTO_REUSE,
+          auxiliary_name_scope=False):
+        weight_tensor = tf.reshape(
+            tf.get_variable(
+                WEIGHT_VARIABLE,
+                shape=[2, 1],
+                partitioner=tf.fixed_size_partitioner(1)),
+            shape=[-1, 2])
+
+      x_tensor = tf.reshape(features[FEATURE_NAME], shape=[-1, 2])
+      return tf.reduce_sum(
+          tf.multiply(weight_tensor, x_tensor), 1, keep_dims=True)
+
+    def optimizer_fn():
+      return tf.train.GradientDescentOptimizer(LEARNING_RATE)
+
+    base_est = self.build_linear_regressor(
+        weight=weight, weight_shape=[2, 1], bias=bias, bias_shape=[1])
+
+    graph_reg_config = nsl_configs.make_graph_reg_config(
+        max_neighbors=max_neighbors, multiplier=1)
+    graph_reg_est = nsl_estimator.add_graph_regularization(
+        base_est, embedding_fn, optimizer_fn, graph_reg_config=graph_reg_config)
+
+    train_input_fn = single_example_input_fn(
+        train_example, input_shape=[2], max_neighbors=max_neighbors)
+    graph_reg_est.train(input_fn=train_input_fn, steps=1)
+
+    # Evaluating the graph-regularized model should yield the same results
+    # as evaluating the base model because model paramters are shared.
+    eval_input_fn = single_example_input_fn(
+        test_example, input_shape=[2], max_neighbors=0)
+    graph_eval_results = graph_reg_est.evaluate(input_fn=eval_input_fn)
+    base_eval_results = base_est.evaluate(input_fn=eval_input_fn)
+    self.assertAllClose(base_eval_results, graph_eval_results)
+
+  @test_util.run_v1_only('Requires tf.get_variable')
+  def test_graph_reg_model_evaluate(self):
+    weight = np.array([[4.0], [-3.0]])
+    bias = np.array([0.0], dtype=np.float32)
+
+    train_example = """
+                features {
+                  feature {
+                    key: "x"
+                    value: { float_list { value: [ 2.0, 3.0 ] } }
+                  }
+                  feature {
+                    key: "NL_nbr_0_x"
+                    value: { float_list { value: [ 2.5, 3.0 ] } }
+                  }
+                  feature {
+                    key: "NL_nbr_0_weight"
+                    value: { float_list { value: 1.0 } }
+                  }
+                  feature {
+                    key: "NL_nbr_1_x"
+                    value: { float_list { value: [ 2.0, 2.0 ] } }
+                  }
+                  feature {
+                    key: "NL_nbr_1_weight"
+                    value: { float_list { value: 1.0 } }
+                  }
+                  feature {
+                    key: "y"
+                    value: { float_list { value: 0.0 } }
+                  }
+                }
+              """
+
+    test_example = """
+                features {
+                  feature {
+                    key: "x"
+                    value: { float_list { value: [ 4.0, 2.0 ] } }
+                  }
+                  feature {
+                    key: "y"
+                    value: { float_list { value: 4.0 } }
+                  }
+                }
+              """
+    self._train_and_check_eval_results(
+        train_example, test_example, max_neighbors=2, weight=weight, bias=bias)
 
 
 if __name__ == '__main__':
diff --git a/neural_structured_learning/keras/BUILD b/neural_structured_learning/keras/BUILD
@@ -81,12 +81,14 @@ py_library(
     deps = [
         "//neural_structured_learning/configs",
         "//neural_structured_learning/keras/layers",
+        "//neural_structured_learning/lib",
         # package tensorflow
     ],
 )
 
 py_test(
     name = "graph_regularization_test",
+    timeout = "long",
     srcs = ["graph_regularization_test.py"],
     srcs_version = "PY2AND3",
     deps = [
diff --git a/neural_structured_learning/keras/graph_regularization.py b/neural_structured_learning/keras/graph_regularization.py
@@ -84,6 +84,20 @@ def compile(self, *args, **kwargs):
 
   compile.__doc__ = tf.keras.Model.compile.__doc__
 
+  # Override the evaluate and the predict methods so that we can use the base
+  # model for evaluation/prediction rather than the graph-regularized model.
+  # This is because once the graph-regularized Keras model is built, it expects
+  # neighbor features as input for all modes and not just for training.
+  def evaluate(self, *args, **kwargs):
+    return self.base_model.evaluate(*args, **kwargs)
+
+  evaluate.__doc__ = tf.keras.Model.evaluate.__doc__
+
+  def predict(self, *args, **kwargs):
+    return self.base_model.predict(*args, **kwargs)
+
+  predict.__doc__ = tf.keras.Model.predict.__doc__
+
   def call(self, inputs, training=False, **kwargs):
     """Incorporates graph regularization into the loss of `base_model`.
 
@@ -99,30 +113,24 @@ def call(self, inputs, training=False, **kwargs):
     Returns:
       The output tensors for the wrapped graph-regularized model.
     """
-    sample_features, nbr_features, nbr_weights = self.nbr_features_layer(inputs)
+    # Invoke the call() function of the neighbor features layer directly instead
+    # of invoking it as a callable to avoid Keras from wrapping placeholder
+    # tensors with the tf.identity() op.
+    sample_features, nbr_features, nbr_weights = self.nbr_features_layer.call(
+        inputs)
     base_output = self.base_model(sample_features, training=training, **kwargs)
 
+    # For evaluation and prediction, we use the base model. So, this overridden
+    # call function will get invoked only for training.
     has_nbr_inputs = nbr_weights is not None and nbr_features
-
-    # 'training' is a boolean or boolean tensor. So, we have to use the tf.cond
-    # op to be able to write conditional code based on its value.
-
-    def graph_loss_with_regularization():
-      if (has_nbr_inputs and self.graph_reg_config.multiplier > 0):
-        # Use logits for regularization.
-        sample_logits = base_output
-        nbr_logits = self.base_model(nbr_features, training=training, **kwargs)
-        return self.regularizer(
-            sources=sample_logits, targets=nbr_logits, weights=nbr_weights)
-      else:
-        return tf.constant(0, dtype=tf.float32)
-
-    def graph_loss_without_regularization():
-      return tf.constant(0, dtype=tf.float32)
-
-    graph_loss = tf.cond(
-        tf.equal(training, tf.constant(True)), graph_loss_with_regularization,
-        graph_loss_without_regularization)
+    if (has_nbr_inputs and self.graph_reg_config.multiplier > 0):
+      # Use logits for regularization.
+      sample_logits = base_output
+      nbr_logits = self.base_model(nbr_features, training=training, **kwargs)
+      graph_loss = self.regularizer(
+          sources=sample_logits, targets=nbr_logits, weights=nbr_weights)
+    else:
+      graph_loss = tf.constant(0, dtype=tf.float32)
 
     # Note that add_metric() cannot be invoked in a control flow branch.
     self.add_metric(graph_loss, name='graph_loss', aggregation='mean')
diff --git a/neural_structured_learning/keras/graph_regularization_test.py b/neural_structured_learning/keras/graph_regularization_test.py
diff --git a/neural_structured_learning/keras/layers/neighbor_features.py b/neural_structured_learning/keras/layers/neighbor_features.py