Skip to content

Commit 8e1a1bc

Browse files
author
Florian Westphal
committed
netfilter: nf_tables: avoid chain re-validation if possible
Hamza Mahfooz reports cpu soft lock-ups in nft_chain_validate(): watchdog: BUG: soft lockup - CPU#1 stuck for 27s! [iptables-nft-re:37547] [..] RIP: 0010:nft_chain_validate+0xcb/0x110 [nf_tables] [..] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_immediate_validate+0x36/0x50 [nf_tables] nft_chain_validate+0xc9/0x110 [nf_tables] nft_table_validate+0x6b/0xb0 [nf_tables] nf_tables_validate+0x8b/0xa0 [nf_tables] nf_tables_commit+0x1df/0x1eb0 [nf_tables] [..] Currently nf_tables will traverse the entire table (chain graph), starting from the entry points (base chains), exploring all possible paths (chain jumps). But there are cases where we could avoid revalidation. Consider: 1 input -> j2 -> j3 2 input -> j2 -> j3 3 input -> j1 -> j2 -> j3 Then the second rule does not need to revalidate j2, and, by extension j3, because this was already checked during validation of the first rule. We need to validate it only for rule 3. This is needed because chain loop detection also ensures we do not exceed the jump stack: Just because we know that j2 is cycle free, its last jump might now exceed the allowed stack size. We also need to update all reachable chains with the new largest observed call depth. Care has to be taken to revalidate even if the chain depth won't be an issue: chain validation also ensures that expressions are not called from invalid base chains. For example, the masquerade expression can only be called from NAT postrouting base chains. Therefore we also need to keep record of the base chain context (type, hooknum) and revalidate if the chain becomes reachable from a different hook location. Reported-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> Closes: https://lore.kernel.org/netfilter-devel/20251118221735.GA5477@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net/ Tested-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> Signed-off-by: Florian Westphal <fw@strlen.de>
1 parent a67fd55 commit 8e1a1bc

File tree

2 files changed

+91
-12
lines changed

2 files changed

+91
-12
lines changed

include/net/netfilter/nf_tables.h

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,29 @@ struct nft_rule_blob {
10911091
__attribute__((aligned(__alignof__(struct nft_rule_dp))));
10921092
};
10931093

1094+
enum nft_chain_types {
1095+
NFT_CHAIN_T_DEFAULT = 0,
1096+
NFT_CHAIN_T_ROUTE,
1097+
NFT_CHAIN_T_NAT,
1098+
NFT_CHAIN_T_MAX
1099+
};
1100+
1101+
/**
1102+
* struct nft_chain_validate_state - validation state
1103+
*
1104+
* If a chain is encountered again during table validation it is
1105+
* possible to avoid revalidation provided the calling context is
1106+
* compatible. This structure stores relevant calling context of
1107+
* previous validations.
1108+
*
1109+
* @hook_mask: the hook numbers and locations the chain is linked to
1110+
* @depth: the deepest call chain level the chain is linked to
1111+
*/
1112+
struct nft_chain_validate_state {
1113+
u8 hook_mask[NFT_CHAIN_T_MAX];
1114+
u8 depth;
1115+
};
1116+
10941117
/**
10951118
* struct nft_chain - nf_tables chain
10961119
*
@@ -1109,6 +1132,7 @@ struct nft_rule_blob {
11091132
* @udlen: user data length
11101133
* @udata: user data in the chain
11111134
* @blob_next: rule blob pointer to the next in the chain
1135+
* @vstate: validation state
11121136
*/
11131137
struct nft_chain {
11141138
struct nft_rule_blob __rcu *blob_gen_0;
@@ -1128,23 +1152,17 @@ struct nft_chain {
11281152

11291153
/* Only used during control plane commit phase: */
11301154
struct nft_rule_blob *blob_next;
1155+
struct nft_chain_validate_state vstate;
11311156
};
11321157

1133-
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
1158+
int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain);
11341159
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
11351160
const struct nft_set_iter *iter,
11361161
struct nft_elem_priv *elem_priv);
11371162
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
11381163
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
11391164
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
11401165

1141-
enum nft_chain_types {
1142-
NFT_CHAIN_T_DEFAULT = 0,
1143-
NFT_CHAIN_T_ROUTE,
1144-
NFT_CHAIN_T_NAT,
1145-
NFT_CHAIN_T_MAX
1146-
};
1147-
11481166
/**
11491167
* struct nft_chain_type - nf_tables chain type info
11501168
*

net/netfilter/nf_tables_api.c

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,29 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
123123

124124
table->validate_state = new_validate_state;
125125
}
126+
127+
static bool nft_chain_vstate_valid(const struct nft_ctx *ctx,
128+
const struct nft_chain *chain)
129+
{
130+
const struct nft_base_chain *base_chain;
131+
enum nft_chain_types type;
132+
u8 hooknum;
133+
134+
if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain)))
135+
return false;
136+
137+
base_chain = nft_base_chain(ctx->chain);
138+
hooknum = base_chain->ops.hooknum;
139+
type = base_chain->type->type;
140+
141+
/* chain is already validated for this call depth */
142+
if (chain->vstate.depth >= ctx->level &&
143+
chain->vstate.hook_mask[type] & BIT(hooknum))
144+
return true;
145+
146+
return false;
147+
}
148+
126149
static void nf_tables_trans_destroy_work(struct work_struct *w);
127150

128151
static void nft_trans_gc_work(struct work_struct *work);
@@ -4079,6 +4102,29 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
40794102
nf_tables_rule_destroy(ctx, rule);
40804103
}
40814104

4105+
static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain)
4106+
{
4107+
const struct nft_base_chain *base_chain;
4108+
enum nft_chain_types type;
4109+
u8 hooknum;
4110+
4111+
/* ctx->chain must hold the calling base chain. */
4112+
if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) {
4113+
memset(&chain->vstate, 0, sizeof(chain->vstate));
4114+
return;
4115+
}
4116+
4117+
base_chain = nft_base_chain(ctx->chain);
4118+
hooknum = base_chain->ops.hooknum;
4119+
type = base_chain->type->type;
4120+
4121+
BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX);
4122+
4123+
chain->vstate.hook_mask[type] |= BIT(hooknum);
4124+
if (chain->vstate.depth < ctx->level)
4125+
chain->vstate.depth = ctx->level;
4126+
}
4127+
40824128
/** nft_chain_validate - loop detection and hook validation
40834129
*
40844130
* @ctx: context containing call depth and base chain
@@ -4088,15 +4134,25 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
40884134
* and set lookups until either the jump limit is hit or all reachable
40894135
* chains have been validated.
40904136
*/
4091-
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
4137+
int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain)
40924138
{
40934139
struct nft_expr *expr, *last;
40944140
struct nft_rule *rule;
40954141
int err;
40964142

4143+
BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255);
40974144
if (ctx->level == NFT_JUMP_STACK_SIZE)
40984145
return -EMLINK;
40994146

4147+
if (ctx->level > 0) {
4148+
/* jumps to base chains are not allowed. */
4149+
if (nft_is_base_chain(chain))
4150+
return -ELOOP;
4151+
4152+
if (nft_chain_vstate_valid(ctx, chain))
4153+
return 0;
4154+
}
4155+
41004156
list_for_each_entry(rule, &chain->rules, list) {
41014157
if (fatal_signal_pending(current))
41024158
return -EINTR;
@@ -4117,6 +4173,7 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
41174173
}
41184174
}
41194175

4176+
nft_chain_vstate_update(ctx, chain);
41204177
return 0;
41214178
}
41224179
EXPORT_SYMBOL_GPL(nft_chain_validate);
@@ -4128,7 +4185,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
41284185
.net = net,
41294186
.family = table->family,
41304187
};
4131-
int err;
4188+
int err = 0;
41324189

41334190
list_for_each_entry(chain, &table->chains, list) {
41344191
if (!nft_is_base_chain(chain))
@@ -4137,12 +4194,16 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
41374194
ctx.chain = chain;
41384195
err = nft_chain_validate(&ctx, chain);
41394196
if (err < 0)
4140-
return err;
4197+
goto err;
41414198

41424199
cond_resched();
41434200
}
41444201

4145-
return 0;
4202+
err:
4203+
list_for_each_entry(chain, &table->chains, list)
4204+
memset(&chain->vstate, 0, sizeof(chain->vstate));
4205+
4206+
return err;
41464207
}
41474208

41484209
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,

0 commit comments

Comments
 (0)