Skip to content

Commit d88e822

Browse files
committed
powerpc64/bpf: Add support for bpf trampolines
JIRA: https://issues.redhat.com/browse/RHEL-24555 commit d243b62 Author: Naveen N Rao <naveen@kernel.org> Date: Wed Oct 30 12:38:50 2024 +0530 powerpc64/bpf: Add support for bpf trampolines Add support for bpf_arch_text_poke() and arch_prepare_bpf_trampoline() for 64-bit powerpc. While the code is generic, BPF trampolines are only enabled on 64-bit powerpc. 32-bit powerpc will need testing and some updates. BPF Trampolines adhere to the existing ftrace ABI utilizing a two-instruction profiling sequence, as well as the newer ABI utilizing a three-instruction profiling sequence enabling return with a 'blr'. The trampoline code itself closely follows x86 implementation. BPF prog JIT is extended to mimic 64-bit powerpc approach for ftrace having a single nop at function entry, followed by the function profiling sequence out-of-line and a separate long branch stub for calls to trampolines that are out of range. A dummy_tramp is provided to simplify synchronization similar to arm64. When attaching a bpf trampoline to a bpf prog, we can patch up to three things: - the nop at bpf prog entry to go to the out-of-line stub - the instruction in the out-of-line stub to either call the bpf trampoline directly, or to branch to the long_branch stub. - the trampoline address before the long_branch stub. We do not need any synchronization here since we always have a valid branch target regardless of the order in which the above stores are seen. dummy_tramp ensures that the long_branch stub goes to a valid destination on other cpus, even when the branch to the long_branch stub is seen before the updated trampoline address. However, when detaching a bpf trampoline from a bpf prog, or if changing the bpf trampoline address, we need synchronization to ensure that other cpus can no longer branch into the older trampoline so that it can be safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus make forward progress, but we still need to ensure that other cpus execute isync (or some CSI) so that they don't go back into the trampoline again. While here, update the stale comment that describes the redzone usage in ppc64 BPF JIT. Signed-off-by: Naveen N Rao <naveen@kernel.org> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://patch.msgid.link/20241030070850.1361304-18-hbathini@linux.ibm.com Signed-off-by: Viktor Malik <vmalik@redhat.com>
1 parent b60d35a commit d88e822

File tree

5 files changed

+891
-5
lines changed

5 files changed

+891
-5
lines changed

arch/powerpc/include/asm/ppc-opcode.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,12 +587,26 @@
587587
#define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr))
588588
#define PPC_RAW_EIEIO() (0x7c0006ac)
589589

590+
/* bcl 20,31,$+4 */
591+
#define PPC_RAW_BCL4() (0x429f0005)
590592
#define PPC_RAW_BRANCH(offset) (0x48000000 | PPC_LI(offset))
591593
#define PPC_RAW_BL(offset) (0x48000001 | PPC_LI(offset))
592594
#define PPC_RAW_TW(t0, a, b) (0x7c000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b))
593595
#define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0)
594596
#define PPC_RAW_SETB(t, bfa) (0x7c000100 | ___PPC_RT(t) | ___PPC_RA((bfa) << 2))
595597

598+
#ifdef CONFIG_PPC32
599+
#define PPC_RAW_STL PPC_RAW_STW
600+
#define PPC_RAW_STLU PPC_RAW_STWU
601+
#define PPC_RAW_LL PPC_RAW_LWZ
602+
#define PPC_RAW_CMPLI PPC_RAW_CMPWI
603+
#else
604+
#define PPC_RAW_STL PPC_RAW_STD
605+
#define PPC_RAW_STLU PPC_RAW_STDU
606+
#define PPC_RAW_LL PPC_RAW_LD
607+
#define PPC_RAW_CMPLI PPC_RAW_CMPDI
608+
#endif
609+
596610
/* Deal with instructions that older assemblers aren't aware of */
597611
#define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
598612
#define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT)

arch/powerpc/net/bpf_jit.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <asm/types.h>
1414
#include <asm/ppc-opcode.h>
15+
#include <linux/build_bug.h>
1516

1617
#ifdef CONFIG_PPC64_ELF_ABI_V1
1718
#define FUNCTION_DESCR_SIZE 24
@@ -21,6 +22,9 @@
2122

2223
#define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4)
2324

25+
#define SZL sizeof(unsigned long)
26+
#define BPF_INSN_SAFETY 64
27+
2428
#define PLANT_INSTR(d, idx, instr) \
2529
do { if (d) { (d)[idx] = instr; } idx++; } while (0)
2630
#define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
@@ -81,6 +85,18 @@
8185
EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \
8286
0xffff)); \
8387
} } while (0)
88+
#define PPC_LI_ADDR PPC_LI64
89+
90+
#ifndef CONFIG_PPC_KERNEL_PCREL
91+
#define PPC64_LOAD_PACA() \
92+
EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc)))
93+
#else
94+
#define PPC64_LOAD_PACA() do {} while (0)
95+
#endif
96+
#else
97+
#define PPC_LI64(d, i) BUILD_BUG()
98+
#define PPC_LI_ADDR PPC_LI32
99+
#define PPC64_LOAD_PACA() BUILD_BUG()
84100
#endif
85101

86102
/*
@@ -165,6 +181,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
165181
u32 *addrs, int pass, bool extra_pass);
166182
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
167183
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
184+
void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx);
168185
void bpf_jit_realloc_regs(struct codegen_context *ctx);
169186
int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
170187

0 commit comments

Comments
 (0)