Update graph-NSL tutorials not to extract neighbor features during evaluation. Neighbor features are required only when training a graph-regularized model.

arjung · tensorflow-copybara · commit 897567ef4100 · 2020-06-11T15:35:32.000-07:00
Also, 'graph_loss' as a metric is no longer available, as evaluation of a graph-regularized model uses the underlying base model.

PiperOrigin-RevId: 315991629
diff --git a/g3doc/tutorials/graph_keras_lstm_imdb.ipynb b/g3doc/tutorials/graph_keras_lstm_imdb.ipynb
@@ -1,4 +1,4 @@
-{ 
+{
   "cells": [
     {
       "cell_type": "markdown",
@@ -794,58 +794,6 @@
       },
       "outputs": [],
       "source": [
-        "def pad_sequence(sequence, max_seq_length):\n",
-        "  \"\"\"Pads the input sequence (a `tf.SparseTensor`) to `max_seq_length`.\"\"\"\n",
-        "  pad_size = tf.maximum([0], max_seq_length - tf.shape(sequence)[0])\n",
-        "  padded = tf.concat(\n",
-        "      [sequence.values,\n",
-        "       tf.fill((pad_size), tf.cast(0, sequence.dtype))],\n",
-        "      axis=0)\n",
-        "  # The input sequence may be larger than max_seq_length. Truncate down if\n",
-        "  # necessary.\n",
-        "  return tf.slice(padded, [0], [max_seq_length])\n",
-        "\n",
-        "def parse_example(example_proto):\n",
-        "  \"\"\"Extracts relevant fields from the `example_proto`.\n",
-        "\n",
-        "  Args:\n",
-        "    example_proto: An instance of `tf.train.Example`.\n",
-        "\n",
-        "  Returns:\n",
-        "    A pair whose first value is a dictionary containing relevant features\n",
-        "    and whose second value contains the ground truth labels.\n",
-        "  \"\"\"\n",
-        "  # The 'words' feature is a variable length word ID vector.\n",
-        "  feature_spec = {\n",
-        "      'words': tf.io.VarLenFeature(tf.int64),\n",
-        "      'label': tf.io.FixedLenFeature((), tf.int64, default_value=-1),\n",
-        "  }\n",
-        "  # We also extract corresponding neighbor features in a similar manner to\n",
-        "  # the features above.\n",
-        "  for i in range(HPARAMS.num_neighbors):\n",
-        "    nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, i, 'words')\n",
-        "    nbr_weight_key = '{}{}{}'.format(NBR_FEATURE_PREFIX, i, NBR_WEIGHT_SUFFIX)\n",
-        "    feature_spec[nbr_feature_key] = tf.io.VarLenFeature(tf.int64)\n",
-        "\n",
-        "    # We assign a default value of 0.0 for the neighbor weight so that\n",
-        "    # graph regularization is done on samples based on their exact number\n",
-        "    # of neighbors. In other words, non-existent neighbors are discounted.\n",
-        "    feature_spec[nbr_weight_key] = tf.io.FixedLenFeature(\n",
-        "        [1], tf.float32, default_value=tf.constant([0.0]))\n",
-        "\n",
-        "  features = tf.io.parse_single_example(example_proto, feature_spec)\n",
-        "\n",
-        "  # Since the 'words' feature is a variable length word vector, we pad it to a\n",
-        "  # constant maximum length based on HPARAMS.max_seq_length\n",
-        "  features['words'] = pad_sequence(features['words'], HPARAMS.max_seq_length)\n",
-        "  for i in range(HPARAMS.num_neighbors):\n",
-        "    nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, i, 'words')\n",
-        "    features[nbr_feature_key] = pad_sequence(features[nbr_feature_key],\n",
-        "                                             HPARAMS.max_seq_length)\n",
-        "\n",
-        "  labels = features.pop('label')\n",
-        "  return features, labels\n",
-        "\n",
         "def make_dataset(file_path, training=False):\n",
         "  \"\"\"Creates a `tf.data.TFRecordDataset`.\n",
         "\n",
@@ -858,13 +806,70 @@
         "    An instance of `tf.data.TFRecordDataset` containing the `tf.train.Example`\n",
         "    objects.\n",
         "  \"\"\"\n",
+        "\n",
+        "  def pad_sequence(sequence, max_seq_length):\n",
+        "    \"\"\"Pads the input sequence (a `tf.SparseTensor`) to `max_seq_length`.\"\"\"\n",
+        "    pad_size = tf.maximum([0], max_seq_length - tf.shape(sequence)[0])\n",
+        "    padded = tf.concat(\n",
+        "        [sequence.values,\n",
+        "         tf.fill((pad_size), tf.cast(0, sequence.dtype))],\n",
+        "        axis=0)\n",
+        "    # The input sequence may be larger than max_seq_length. Truncate down if\n",
+        "    # necessary.\n",
+        "    return tf.slice(padded, [0], [max_seq_length])\n",
+        "\n",
+        "  def parse_example(example_proto):\n",
+        "    \"\"\"Extracts relevant fields from the `example_proto`.\n",
+        "\n",
+        "    Args:\n",
+        "      example_proto: An instance of `tf.train.Example`.\n",
+        "\n",
+        "    Returns:\n",
+        "      A pair whose first value is a dictionary containing relevant features\n",
+        "      and whose second value contains the ground truth labels.\n",
+        "    \"\"\"\n",
+        "    # The 'words' feature is a variable length word ID vector.\n",
+        "    feature_spec = {\n",
+        "        'words': tf.io.VarLenFeature(tf.int64),\n",
+        "        'label': tf.io.FixedLenFeature((), tf.int64, default_value=-1),\n",
+        "    }\n",
+        "    # We also extract corresponding neighbor features in a similar manner to\n",
+        "    # the features above during training.\n",
+        "    if training:\n",
+        "      for i in range(HPARAMS.num_neighbors):\n",
+        "        nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, i, 'words')\n",
+        "        nbr_weight_key = '{}{}{}'.format(NBR_FEATURE_PREFIX, i,\n",
+        "                                         NBR_WEIGHT_SUFFIX)\n",
+        "        feature_spec[nbr_feature_key] = tf.io.VarLenFeature(tf.int64)\n",
+        "\n",
+        "        # We assign a default value of 0.0 for the neighbor weight so that\n",
+        "        # graph regularization is done on samples based on their exact number\n",
+        "        # of neighbors. In other words, non-existent neighbors are discounted.\n",
+        "        feature_spec[nbr_weight_key] = tf.io.FixedLenFeature(\n",
+        "            [1], tf.float32, default_value=tf.constant([0.0]))\n",
+        "\n",
+        "    features = tf.io.parse_single_example(example_proto, feature_spec)\n",
+        "\n",
+        "    # Since the 'words' feature is a variable length word vector, we pad it to a\n",
+        "    # constant maximum length based on HPARAMS.max_seq_length\n",
+        "    features['words'] = pad_sequence(features['words'], HPARAMS.max_seq_length)\n",
+        "    if training:\n",
+        "      for i in range(HPARAMS.num_neighbors):\n",
+        "        nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, i, 'words')\n",
+        "        features[nbr_feature_key] = pad_sequence(features[nbr_feature_key],\n",
+        "                                                 HPARAMS.max_seq_length)\n",
+        "\n",
+        "    labels = features.pop('label')\n",
+        "    return features, labels\n",
+        "\n",
         "  dataset = tf.data.TFRecordDataset([file_path])\n",
         "  if training:\n",
         "    dataset = dataset.shuffle(10000)\n",
         "  dataset = dataset.map(parse_example)\n",
         "  dataset = dataset.batch(HPARAMS.batch_size)\n",
         "  return dataset\n",
         "\n",
+        "\n",
         "train_dataset = make_dataset('/tmp/imdb/nsl_train_data.tfr', True)\n",
         "test_dataset = make_dataset('/tmp/imdb/test_data.tfr')"
       ]
@@ -1357,11 +1362,10 @@
         "id": "yBrp0Y0jHu5k"
       },
       "source": [
-        "There are six entries: one for each monitored metric -- loss, graph loss, and\n",
-        "accuracy -- during training and validation. We can use these to plot the\n",
-        "training, graph, and validation losses for comparison, as well as the training\n",
-        "and validation accuracy. Note that the graph loss is only computed during\n",
-        "training; so its value will be 0 during validation."
+        "There are five entries in total in the dictionary: training loss, training\n",
+        "accuracy, training graph loss, validation loss, and validation accuracy. We can\n",
+        "plot them all together for comparison. Note that the graph loss is only computed\n",
+        "during training."
       ]
     },
     {
@@ -1379,7 +1383,6 @@
         "loss = graph_reg_history_dict['loss']\n",
         "graph_loss = graph_reg_history_dict['graph_loss']\n",
         "val_loss = graph_reg_history_dict['val_loss']\n",
-        "val_graph_loss = graph_reg_history_dict['val_graph_loss']\n",
         "\n",
         "epochs = range(1, len(acc) + 1)\n",
         "\n",
@@ -1391,8 +1394,6 @@
         "plt.plot(epochs, graph_loss, '-gD', label='Training graph loss')\n",
         "# \"-b0\" is for solid blue line with circle markers.\n",
         "plt.plot(epochs, val_loss, '-bo', label='Validation loss')\n",
-        "# \"-ms\" is for solid magenta line with square markers.\n",
-        "plt.plot(epochs, val_graph_loss, '-ms', label='Validation graph loss')\n",
         "plt.title('Training and validation loss')\n",
         "plt.xlabel('Epochs')\n",
         "plt.ylabel('Loss')\n",
diff --git a/g3doc/tutorials/graph_keras_mlp_cora.ipynb b/g3doc/tutorials/graph_keras_mlp_cora.ipynb
@@ -402,52 +402,6 @@
       },
       "outputs": [],
       "source": [
-        "def parse_example(example_proto):\n",
-        "  \"\"\"Extracts relevant fields from the `example_proto`.\n",
-        "\n",
-        "  Args:\n",
-        "    example_proto: An instance of `tf.train.Example`.\n",
-        "\n",
-        "  Returns:\n",
-        "    A pair whose first value is a dictionary containing relevant features\n",
-        "    and whose second value contains the ground truth label.\n",
-        "  \"\"\"\n",
-        "  # The 'words' feature is a multi-hot, bag-of-words representation of the\n",
-        "  # original raw text. A default value is required for examples that don't\n",
-        "  # have the feature.\n",
-        "  feature_spec = {\n",
-        "      'words':\n",
-        "          tf.io.FixedLenFeature([HPARAMS.max_seq_length],\n",
-        "                                tf.int64,\n",
-        "                                default_value=tf.constant(\n",
-        "                                    0,\n",
-        "                                    dtype=tf.int64,\n",
-        "                                    shape=[HPARAMS.max_seq_length])),\n",
-        "      'label':\n",
-        "          tf.io.FixedLenFeature((), tf.int64, default_value=-1),\n",
-        "  }\n",
-        "  # We also extract corresponding neighbor features in a similar manner to\n",
-        "  # the features above.\n",
-        "  for i in range(HPARAMS.num_neighbors):\n",
-        "    nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, i, 'words')\n",
-        "    nbr_weight_key = '{}{}{}'.format(NBR_FEATURE_PREFIX, i, NBR_WEIGHT_SUFFIX)\n",
-        "    feature_spec[nbr_feature_key] = tf.io.FixedLenFeature(\n",
-        "        [HPARAMS.max_seq_length],\n",
-        "        tf.int64,\n",
-        "        default_value=tf.constant(\n",
-        "            0, dtype=tf.int64, shape=[HPARAMS.max_seq_length]))\n",
-        "\n",
-        "    # We assign a default value of 0.0 for the neighbor weight so that\n",
-        "    # graph regularization is done on samples based on their exact number\n",
-        "    # of neighbors. In other words, non-existent neighbors are discounted.\n",
-        "    feature_spec[nbr_weight_key] = tf.io.FixedLenFeature(\n",
-        "        [1], tf.float32, default_value=tf.constant([0.0]))\n",
-        "\n",
-        "  features = tf.io.parse_single_example(example_proto, feature_spec)\n",
-        "  label = features.pop('label')\n",
-        "  return features, label\n",
-        "\n",
-        "\n",
         "def make_dataset(file_path, training=False):\n",
         "  \"\"\"Creates a `tf.data.TFRecordDataset`.\n",
         "\n",
@@ -460,6 +414,55 @@
         "    An instance of `tf.data.TFRecordDataset` containing the `tf.train.Example`\n",
         "    objects.\n",
         "  \"\"\"\n",
+        "\n",
+        "  def parse_example(example_proto):\n",
+        "    \"\"\"Extracts relevant fields from the `example_proto`.\n",
+        "\n",
+        "    Args:\n",
+        "      example_proto: An instance of `tf.train.Example`.\n",
+        "\n",
+        "    Returns:\n",
+        "      A pair whose first value is a dictionary containing relevant features\n",
+        "      and whose second value contains the ground truth label.\n",
+        "    \"\"\"\n",
+        "    # The 'words' feature is a multi-hot, bag-of-words representation of the\n",
+        "    # original raw text. A default value is required for examples that don't\n",
+        "    # have the feature.\n",
+        "    feature_spec = {\n",
+        "        'words':\n",
+        "            tf.io.FixedLenFeature([HPARAMS.max_seq_length],\n",
+        "                                  tf.int64,\n",
+        "                                  default_value=tf.constant(\n",
+        "                                      0,\n",
+        "                                      dtype=tf.int64,\n",
+        "                                      shape=[HPARAMS.max_seq_length])),\n",
+        "        'label':\n",
+        "            tf.io.FixedLenFeature((), tf.int64, default_value=-1),\n",
+        "    }\n",
+        "    # We also extract corresponding neighbor features in a similar manner to\n",
+        "    # the features above during training.\n",
+        "    if training:\n",
+        "      for i in range(HPARAMS.num_neighbors):\n",
+        "        nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, i, 'words')\n",
+        "        nbr_weight_key = '{}{}{}'.format(NBR_FEATURE_PREFIX, i,\n",
+        "                                         NBR_WEIGHT_SUFFIX)\n",
+        "        feature_spec[nbr_feature_key] = tf.io.FixedLenFeature(\n",
+        "            [HPARAMS.max_seq_length],\n",
+        "            tf.int64,\n",
+        "            default_value=tf.constant(\n",
+        "                0, dtype=tf.int64, shape=[HPARAMS.max_seq_length]))\n",
+        "\n",
+        "        # We assign a default value of 0.0 for the neighbor weight so that\n",
+        "        # graph regularization is done on samples based on their exact number\n",
+        "        # of neighbors. In other words, non-existent neighbors are discounted.\n",
+        "        feature_spec[nbr_weight_key] = tf.io.FixedLenFeature(\n",
+        "            [1], tf.float32, default_value=tf.constant([0.0]))\n",
+        "\n",
+        "    features = tf.io.parse_single_example(example_proto, feature_spec)\n",
+        "\n",
+        "    label = features.pop('label')\n",
+        "    return features, label\n",
+        "\n",
         "  dataset = tf.data.TFRecordDataset([file_path])\n",
         "  if training:\n",
         "    dataset = dataset.shuffle(10000)\n",
@@ -526,11 +529,6 @@
         "for feature_batch, label_batch in test_dataset.take(1):\n",
         "  print('Feature list:', list(feature_batch.keys()))\n",
         "  print('Batch of inputs:', feature_batch['words'])\n",
-        "  nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, 0, 'words')\n",
-        "  nbr_weight_key = '{}{}{}'.format(NBR_FEATURE_PREFIX, 0, NBR_WEIGHT_SUFFIX)\n",
-        "  print('Batch of neighbor inputs:', feature_batch[nbr_feature_key])\n",
-        "  print('Batch of neighbor weights:',\n",
-        "        tf.reshape(feature_batch[nbr_weight_key], [-1]))\n",
         "  print('Batch of labels:', label_batch)"
       ]
     },