Przeglądaj źródła

loongarch64: backport kernel BPF trampoline

Enable xdp-loader to attach multiple XDP programs to a single interface by
backporting the BPF trampoline implementation from Linux kernel 6.17 to
6.12 for LoongArch64.

The xdp-loader utility relies on libxdp, which in turn requires kernel
support for BPF trampoline. While x86_64 and other architectures have
this feature, LoongArch64 only gained it in kernel 6.17. Without this
backport, xdp-loader fails on LoongArch64 systems running kernel 6.12.

Changes backported include:
- BPF trampoline infrastructure for LoongArch64
- Necessary JIT compiler updates
- Related BPF subsystem changes

This allows full compatibility with the xdp-tools ecosystem on LoongArch64
systems running older kernel versions.

Reference: https://github.com/xdp-project/xdp-tools/tree/main/lib/libxdp

Signed-off-by: Vincent Li <[email protected]>
Link: https://github.com/openwrt/openwrt/pull/21077
Signed-off-by: Christian Marangi <[email protected]>
Vincent Li 3 tygodni temu
rodzic
commit
ed5cefb037

+ 68 - 0
target/linux/loongarch64/patches-6.12/001-v6.17-LoongArch-Add-larch_insn_gen_beq_bne-helpers.patch

@@ -0,0 +1,68 @@
+From 6ab55e0a9eac638ca390bfaef6408c10c127e623 Mon Sep 17 00:00:00 2001
+From: Chenghao Duan <[email protected]>
+Date: Sun, 3 Aug 2025 22:49:50 +0800
+Subject: [PATCH 01/12] LoongArch: Add larch_insn_gen_{beq,bne} helpers
+
+Add larch_insn_gen_beq() and larch_insn_gen_bne() helpers which will be
+used in BPF trampoline implementation.
+
+Reviewed-by: Hengqi Chen <[email protected]>
+Co-developed-by: George Guo <[email protected]>
+Signed-off-by: George Guo <[email protected]>
+Co-developed-by: Youling Tang <[email protected]>
+Signed-off-by: Youling Tang <[email protected]>
+Signed-off-by: Chenghao Duan <[email protected]>
+Signed-off-by: Huacai Chen <[email protected]>
+---
+ arch/loongarch/include/asm/inst.h |  2 ++
+ arch/loongarch/kernel/inst.c      | 28 ++++++++++++++++++++++++++++
+ 2 files changed, 30 insertions(+)
+
+--- a/arch/loongarch/include/asm/inst.h
++++ b/arch/loongarch/include/asm/inst.h
+@@ -515,6 +515,8 @@ u32 larch_insn_gen_move(enum loongarch_g
+ u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm);
+ u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm);
+ u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
++u32 larch_insn_gen_beq(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
++u32 larch_insn_gen_bne(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
+ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
+ 
+ static inline bool signed_imm_check(long val, unsigned int bit)
+--- a/arch/loongarch/kernel/inst.c
++++ b/arch/loongarch/kernel/inst.c
+@@ -335,6 +335,34 @@ u32 larch_insn_gen_lu52id(enum loongarch
+ 	return insn.word;
+ }
+ 
++u32 larch_insn_gen_beq(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm)
++{
++	union loongarch_instruction insn;
++
++	if ((imm & 3) || imm < -SZ_128K || imm >= SZ_128K) {
++		pr_warn("The generated beq instruction is out of range.\n");
++		return INSN_BREAK;
++	}
++
++	emit_beq(&insn, rj, rd, imm >> 2);
++
++	return insn.word;
++}
++
++u32 larch_insn_gen_bne(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm)
++{
++	union loongarch_instruction insn;
++
++	if ((imm & 3) || imm < -SZ_128K || imm >= SZ_128K) {
++		pr_warn("The generated bne instruction is out of range.\n");
++		return INSN_BREAK;
++	}
++
++	emit_bne(&insn, rj, rd, imm >> 2);
++
++	return insn.word;
++}
++
+ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm)
+ {
+ 	union loongarch_instruction insn;

+ 52 - 0
target/linux/loongarch64/patches-6.12/002-v6.17-LoongArch-BPF-Update-the-code-to-rename-validate_code-to-validate_ctx.patch

@@ -0,0 +1,52 @@
+From ed1a1fe6ec5e73b23b310b434ace07d1e5060657 Mon Sep 17 00:00:00 2001
+From: Chenghao Duan <[email protected]>
+Date: Tue, 5 Aug 2025 19:00:18 +0800
+Subject: [PATCH 02/12] LoongArch: BPF: Rename and refactor validate_code()
+
+1. Rename the existing validate_code() to validate_ctx()
+2. Factor out the code validation handling into a new helper
+   validate_code()
+
+Then:
+
+* validate_code() is used to check the validity of code.
+* validate_ctx() is used to check both code validity and table entry
+  correctness.
+
+The new validate_code() will be used in subsequent changes.
+
+Reviewed-by: Hengqi Chen <[email protected]>
+Co-developed-by: George Guo <[email protected]>
+Signed-off-by: George Guo <[email protected]>
+Signed-off-by: Chenghao Duan <[email protected]>
+Signed-off-by: Huacai Chen <[email protected]>
+---
+ arch/loongarch/net/bpf_jit.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/arch/loongarch/net/bpf_jit.c
++++ b/arch/loongarch/net/bpf_jit.c
+@@ -1170,6 +1170,14 @@ static int validate_code(struct jit_ctx
+ 			return -1;
+ 	}
+ 
++	return 0;
++}
++
++static int validate_ctx(struct jit_ctx *ctx)
++{
++	if (validate_code(ctx))
++		return -1;
++
+ 	if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
+ 		return -1;
+ 
+@@ -1278,7 +1286,7 @@ skip_init_ctx:
+ 	build_epilogue(&ctx);
+ 
+ 	/* 3. Extra pass to validate JITed code */
+-	if (validate_code(&ctx)) {
++	if (validate_ctx(&ctx)) {
+ 		bpf_jit_binary_free(header);
+ 		prog = orig_prog;
+ 		goto out_offset;

+ 240 - 0
target/linux/loongarch64/patches-6.12/003-v6.17-loongArch-BPF-Add-dynamic-code-modification-support.patch

@@ -0,0 +1,240 @@
+From 9fbd18cf4c69f512f7de3ab73235078f3e32ecec Mon Sep 17 00:00:00 2001
+From: Chenghao Duan <[email protected]>
+Date: Tue, 5 Aug 2025 19:00:18 +0800
+Subject: [PATCH 03/12] LoongArch: BPF: Add dynamic code modification support
+
+This commit adds support for BPF dynamic code modification on the
+LoongArch architecture:
+1. Add bpf_arch_text_copy() for instruction block copying.
+2. Add bpf_arch_text_poke() for runtime instruction patching.
+3. Add bpf_arch_text_invalidate() for code invalidation.
+
+On LoongArch, since symbol addresses in the direct mapping region can't
+be reached via relative jump instructions from the paged mapping region,
+we use the move_imm+jirl instruction pair as absolute jump instructions.
+These require 2-5 instructions, so we reserve 5 NOP instructions in the
+program as placeholders for function jumps.
+
+The larch_insn_text_copy() function is solely used for BPF. And the use
+of larch_insn_text_copy() requires PAGE_SIZE alignment. Currently, only
+the size of the BPF trampoline is page-aligned.
+
+Co-developed-by: George Guo <[email protected]>
+Signed-off-by: George Guo <[email protected]>
+Signed-off-by: Chenghao Duan <[email protected]>
+Signed-off-by: Huacai Chen <[email protected]>
+---
+ arch/loongarch/include/asm/inst.h |   1 +
+ arch/loongarch/kernel/inst.c      |  46 +++++++++++++
+ arch/loongarch/net/bpf_jit.c      | 105 +++++++++++++++++++++++++++++-
+ 3 files changed, 151 insertions(+), 1 deletion(-)
+
+--- a/arch/loongarch/include/asm/inst.h
++++ b/arch/loongarch/include/asm/inst.h
+@@ -502,6 +502,7 @@ void arch_simulate_insn(union loongarch_
+ int larch_insn_read(void *addr, u32 *insnp);
+ int larch_insn_write(void *addr, u32 insn);
+ int larch_insn_patch_text(void *addr, u32 insn);
++int larch_insn_text_copy(void *dst, void *src, size_t len);
+ 
+ u32 larch_insn_gen_nop(void);
+ u32 larch_insn_gen_b(unsigned long pc, unsigned long dest);
+--- a/arch/loongarch/kernel/inst.c
++++ b/arch/loongarch/kernel/inst.c
+@@ -4,6 +4,8 @@
+  */
+ #include <linux/sizes.h>
+ #include <linux/uaccess.h>
++#include <linux/set_memory.h>
++#include <linux/stop_machine.h>
+ 
+ #include <asm/cacheflush.h>
+ #include <asm/inst.h>
+@@ -229,6 +231,50 @@ int larch_insn_patch_text(void *addr, u3
+ 
+ 	return ret;
+ }
++
++struct insn_copy {
++	void *dst;
++	void *src;
++	size_t len;
++	unsigned int cpu;
++};
++
++static int text_copy_cb(void *data)
++{
++	int ret = 0;
++	struct insn_copy *copy = data;
++
++	if (smp_processor_id() == copy->cpu) {
++		ret = copy_to_kernel_nofault(copy->dst, copy->src, copy->len);
++		if (ret)
++			pr_err("%s: operation failed\n", __func__);
++	}
++
++	flush_icache_range((unsigned long)copy->dst, (unsigned long)copy->dst + copy->len);
++
++	return ret;
++}
++
++int larch_insn_text_copy(void *dst, void *src, size_t len)
++{
++	int ret = 0;
++	size_t start, end;
++	struct insn_copy copy = {
++		.dst = dst,
++		.src = src,
++		.len = len,
++		.cpu = smp_processor_id(),
++	};
++
++	start = round_down((size_t)dst, PAGE_SIZE);
++	end   = round_up((size_t)dst + len, PAGE_SIZE);
++
++	set_memory_rw(start, (end - start) / PAGE_SIZE);
++	ret = stop_machine(text_copy_cb, &copy, cpu_online_mask);
++	set_memory_rox(start, (end - start) / PAGE_SIZE);
++
++	return ret;
++}
+ 
+ u32 larch_insn_gen_nop(void)
+ {
+--- a/arch/loongarch/net/bpf_jit.c
++++ b/arch/loongarch/net/bpf_jit.c
+@@ -4,8 +4,12 @@
+  *
+  * Copyright (C) 2022 Loongson Technology Corporation Limited
+  */
++#include <linux/memory.h>
+ #include "bpf_jit.h"
+ 
++#define LOONGARCH_LONG_JUMP_NINSNS 5
++#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
++
+ #define REG_TCC		LOONGARCH_GPR_A6
+ #define TCC_SAVED	LOONGARCH_GPR_S5
+ 
+@@ -88,7 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *
+  */
+ static void build_prologue(struct jit_ctx *ctx)
+ {
+-	int stack_adjust = 0, store_offset, bpf_stack_adjust;
++	int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
+ 
+ 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
+ 
+@@ -98,6 +102,10 @@ static void build_prologue(struct jit_ct
+ 	stack_adjust = round_up(stack_adjust, 16);
+ 	stack_adjust += bpf_stack_adjust;
+ 
++	/* Reserve space for the move_imm + jirl instruction */
++	for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
++		emit_insn(ctx, nop);
++
+ 	/*
+ 	 * First instruction initializes the tail call count (TCC).
+ 	 * On tail call we skip this instruction, and the TCC is
+@@ -1184,6 +1192,101 @@ static int validate_ctx(struct jit_ctx *
+ 	return 0;
+ }
+ 
++static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target)
++{
++	if (!target) {
++		pr_err("bpf_jit: jump target address is error\n");
++		return -EFAULT;
++	}
++
++	move_imm(ctx, LOONGARCH_GPR_T1, target, false);
++	emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0);
++
++	return 0;
++}
++
++static int emit_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
++{
++	int i;
++	struct jit_ctx ctx;
++
++	ctx.idx = 0;
++	ctx.image = (union loongarch_instruction *)insns;
++
++	if (!target) {
++		for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
++			emit_insn((&ctx), nop);
++		return 0;
++	}
++
++	return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, (u64)target);
++}
++
++void *bpf_arch_text_copy(void *dst, void *src, size_t len)
++{
++	int ret;
++
++	mutex_lock(&text_mutex);
++	ret = larch_insn_text_copy(dst, src, len);
++	mutex_unlock(&text_mutex);
++
++	return ret ? ERR_PTR(-EINVAL) : dst;
++}
++
++int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
++		       void *old_addr, void *new_addr)
++{
++	int ret;
++	bool is_call = (poke_type == BPF_MOD_CALL);
++	u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
++	u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
++
++	if (!is_kernel_text((unsigned long)ip) &&
++		!is_bpf_text_address((unsigned long)ip))
++		return -ENOTSUPP;
++
++	ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call);
++	if (ret)
++		return ret;
++
++	if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES))
++		return -EFAULT;
++
++	ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call);
++	if (ret)
++		return ret;
++
++	mutex_lock(&text_mutex);
++	if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES))
++		ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES);
++	mutex_unlock(&text_mutex);
++
++	return ret;
++}
++
++int bpf_arch_text_invalidate(void *dst, size_t len)
++{
++	int i;
++	int ret = 0;
++	u32 *inst;
++
++	inst = kvmalloc(len, GFP_KERNEL);
++	if (!inst)
++		return -ENOMEM;
++
++	for (i = 0; i < (len / sizeof(u32)); i++)
++		inst[i] = INSN_BREAK;
++
++	mutex_lock(&text_mutex);
++	if (larch_insn_text_copy(dst, inst, len))
++		ret = -EINVAL;
++	mutex_unlock(&text_mutex);
++
++	kvfree(inst);
++
++	return ret;
++}
++
+ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+ {
+ 	bool tmp_blinded = false, extra_pass = false;

+ 467 - 0
target/linux/loongarch64/patches-6.12/004-v6.17-LoongArch-BPF-Add-basic-bpf-trampoline-support.patch

@@ -0,0 +1,467 @@
+From f9b6b41f0cf31791541cea9644ddbedb46465801 Mon Sep 17 00:00:00 2001
+From: Chenghao Duan <[email protected]>
+Date: Tue, 5 Aug 2025 19:00:18 +0800
+Subject: [PATCH 04/12] LoongArch: BPF: Add basic bpf trampoline support
+
+BPF trampoline is the critical infrastructure of the BPF subsystem,
+acting as a mediator between kernel functions and BPF programs. Numerous
+important features, such as using BPF program for zero overhead kernel
+introspection, rely on this key component.
+
+The related tests have passed, including the following technical points:
+1. fentry
+2. fmod_ret
+3. fexit
+
+The following related testcases passed on LoongArch:
+sudo ./test_progs -a fentry_test/fentry
+sudo ./test_progs -a fexit_test/fexit
+sudo ./test_progs -a fentry_fexit
+sudo ./test_progs -a modify_return
+sudo ./test_progs -a fexit_sleep
+sudo ./test_progs -a test_overhead
+sudo ./test_progs -a trampoline_count
+
+This issue was first reported by Geliang Tang in June 2024 while
+debugging MPTCP BPF selftests on a LoongArch machine (see commit
+eef0532e900c "selftests/bpf: Null checks for links in bpf_tcp_ca").
+Geliang, Huacai, and Tiezhu then worked together to drive the
+implementation of this feature, encouraging broader collaboration among
+Chinese kernel engineers.
+
+Reported-by: kernel test robot <[email protected]>
+Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
+Reported-by: Geliang Tang <[email protected]>
+Tested-by: Tiezhu Yang <[email protected]>
+Tested-by: Vincent Li <[email protected]>
+Co-developed-by: George Guo <[email protected]>
+Signed-off-by: George Guo <[email protected]>
+Signed-off-by: Chenghao Duan <[email protected]>
+Signed-off-by: Huacai Chen <[email protected]>
+---
+ arch/loongarch/net/bpf_jit.c | 377 +++++++++++++++++++++++++++++++++++
+ arch/loongarch/net/bpf_jit.h |   6 +
+ 2 files changed, 383 insertions(+)
+
+--- a/arch/loongarch/net/bpf_jit.c
++++ b/arch/loongarch/net/bpf_jit.c
+@@ -7,9 +7,15 @@
+ #include <linux/memory.h>
+ #include "bpf_jit.h"
+ 
++#define LOONGARCH_MAX_REG_ARGS 8
++
+ #define LOONGARCH_LONG_JUMP_NINSNS 5
+ #define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
+ 
++#define LOONGARCH_FENTRY_NINSNS 2
++#define LOONGARCH_FENTRY_NBYTES (LOONGARCH_FENTRY_NINSNS * 4)
++#define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4)
++
+ #define REG_TCC		LOONGARCH_GPR_A6
+ #define TCC_SAVED	LOONGARCH_GPR_S5
+ 
+@@ -1222,6 +1228,11 @@ static int emit_jump_or_nops(void *targe
+ 	return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, (u64)target);
+ }
+ 
++static int emit_call(struct jit_ctx *ctx, u64 addr)
++{
++	return emit_jump_and_link(ctx, LOONGARCH_GPR_RA, addr);
++}
++
+ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+ {
+ 	int ret;
+@@ -1287,6 +1298,372 @@ int bpf_arch_text_invalidate(void *dst,
+ 	return ret;
+ }
+ 
++static void store_args(struct jit_ctx *ctx, int nargs, int args_off)
++{
++	int i;
++
++	for (i = 0; i < nargs; i++) {
++		emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
++		args_off -= 8;
++	}
++}
++
++static void restore_args(struct jit_ctx *ctx, int nargs, int args_off)
++{
++	int i;
++
++	for (i = 0; i < nargs; i++) {
++		emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
++		args_off -= 8;
++	}
++}
++
++static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
++			   int args_off, int retval_off, int run_ctx_off, bool save_ret)
++{
++	int ret;
++	u32 *branch;
++	struct bpf_prog *p = l->link.prog;
++	int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
++
++	if (l->cookie) {
++		move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false);
++		emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
++	} else {
++		emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
++	}
++
++	/* arg1: prog */
++	move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
++	/* arg2: &run_ctx */
++	emit_insn(ctx, addid, LOONGARCH_GPR_A1, LOONGARCH_GPR_FP, -run_ctx_off);
++	ret = emit_call(ctx, (const u64)bpf_trampoline_enter(p));
++	if (ret)
++		return ret;
++
++	/* store prog start time */
++	move_reg(ctx, LOONGARCH_GPR_S1, LOONGARCH_GPR_A0);
++
++	/*
++	 * if (__bpf_prog_enter(prog) == 0)
++	 *      goto skip_exec_of_prog;
++	 */
++	branch = (u32 *)ctx->image + ctx->idx;
++	/* nop reserved for conditional jump */
++	emit_insn(ctx, nop);
++
++	/* arg1: &args_off */
++	emit_insn(ctx, addid, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -args_off);
++	if (!p->jited)
++		move_imm(ctx, LOONGARCH_GPR_A1, (const s64)p->insnsi, false);
++	ret = emit_call(ctx, (const u64)p->bpf_func);
++	if (ret)
++		return ret;
++
++	if (save_ret) {
++		emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
++		emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
++	}
++
++	/* update branch with beqz */
++	if (ctx->image) {
++		int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branch;
++		*branch = larch_insn_gen_beq(LOONGARCH_GPR_A0, LOONGARCH_GPR_ZERO, offset);
++	}
++
++	/* arg1: prog */
++	move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
++	/* arg2: prog start time */
++	move_reg(ctx, LOONGARCH_GPR_A1, LOONGARCH_GPR_S1);
++	/* arg3: &run_ctx */
++	emit_insn(ctx, addid, LOONGARCH_GPR_A2, LOONGARCH_GPR_FP, -run_ctx_off);
++	ret = emit_call(ctx, (const u64)bpf_trampoline_exit(p));
++
++	return ret;
++}
++
++static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
++			       int args_off, int retval_off, int run_ctx_off, u32 **branches)
++{
++	int i;
++
++	emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off);
++	for (i = 0; i < tl->nr_links; i++) {
++		invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, true);
++		emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off);
++		branches[i] = (u32 *)ctx->image + ctx->idx;
++		emit_insn(ctx, nop);
++	}
++}
++
++void *arch_alloc_bpf_trampoline(unsigned int size)
++{
++	return bpf_prog_pack_alloc(size, jit_fill_hole);
++}
++
++void arch_free_bpf_trampoline(void *image, unsigned int size)
++{
++	bpf_prog_pack_free(image, size);
++}
++
++static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
++					 const struct btf_func_model *m, struct bpf_tramp_links *tlinks,
++					 void *func_addr, u32 flags)
++{
++	int i, ret, save_ret;
++	int stack_size = 0, nargs = 0;
++	int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off;
++	void *orig_call = func_addr;
++	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
++	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
++	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
++	u32 **branches = NULL;
++
++	if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
++		return -ENOTSUPP;
++
++	/*
++	 * FP + 8       [ RA to parent func ] return address to parent
++	 *                    function
++	 * FP + 0       [ FP of parent func ] frame pointer of parent
++	 *                    function
++	 * FP - 8       [ T0 to traced func ] return address of traced
++	 *                    function
++	 * FP - 16      [ FP of traced func ] frame pointer of traced
++	 *                    function
++	 *
++	 * FP - retval_off  [ return value      ] BPF_TRAMP_F_CALL_ORIG or
++	 *                    BPF_TRAMP_F_RET_FENTRY_RET
++	 *                  [ argN              ]
++	 *                  [ ...               ]
++	 * FP - args_off    [ arg1              ]
++	 *
++	 * FP - nargs_off   [ regs count        ]
++	 *
++	 * FP - ip_off      [ traced func   ] BPF_TRAMP_F_IP_ARG
++	 *
++	 * FP - run_ctx_off [ bpf_tramp_run_ctx ]
++	 *
++	 * FP - sreg_off    [ callee saved reg  ]
++	 *
++	 */
++
++	if (m->nr_args > LOONGARCH_MAX_REG_ARGS)
++		return -ENOTSUPP;
++
++	if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
++		return -ENOTSUPP;
++
++	stack_size = 0;
++
++	/* Room of trampoline frame to store return address and frame pointer */
++	stack_size += 16;
++
++	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
++	if (save_ret) {
++		/* Save BPF R0 and A0 */
++		stack_size += 16;
++		retval_off = stack_size;
++	}
++
++	/* Room of trampoline frame to store args */
++	nargs = m->nr_args;
++	stack_size += nargs * 8;
++	args_off = stack_size;
++
++	/* Room of trampoline frame to store args number */
++	stack_size += 8;
++	nargs_off = stack_size;
++
++	/* Room of trampoline frame to store ip address */
++	if (flags & BPF_TRAMP_F_IP_ARG) {
++		stack_size += 8;
++		ip_off = stack_size;
++	}
++
++	/* Room of trampoline frame to store struct bpf_tramp_run_ctx */
++	stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
++	run_ctx_off = stack_size;
++
++	stack_size += 8;
++	sreg_off = stack_size;
++
++	stack_size = round_up(stack_size, 16);
++
++	/* For the trampoline called from function entry */
++	/* RA and FP for parent function */
++	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16);
++	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
++	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
++	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16);
++
++	/* RA and FP for traced function */
++	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size);
++	emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
++	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
++	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size);
++
++	/* callee saved register S1 to pass start time */
++	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
++
++	/* store ip address of the traced function */
++	if (flags & BPF_TRAMP_F_IP_ARG) {
++		move_imm(ctx, LOONGARCH_GPR_T1, (const s64)func_addr, false);
++		emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -ip_off);
++	}
++
++	/* store nargs number */
++	move_imm(ctx, LOONGARCH_GPR_T1, nargs, false);
++	emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nargs_off);
++
++	store_args(ctx, nargs, args_off);
++
++	/* To traced function */
++	/* Ftrace jump skips 2 NOP instructions */
++	if (is_kernel_text((unsigned long)orig_call))
++		orig_call += LOONGARCH_FENTRY_NBYTES;
++	/* Direct jump skips 5 NOP instructions */
++	else if (is_bpf_text_address((unsigned long)orig_call))
++		orig_call += LOONGARCH_BPF_FENTRY_NBYTES;
++
++	if (flags & BPF_TRAMP_F_CALL_ORIG) {
++		move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false);
++		ret = emit_call(ctx, (const u64)__bpf_tramp_enter);
++		if (ret)
++			return ret;
++	}
++
++	for (i = 0; i < fentry->nr_links; i++) {
++		ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off,
++				      run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET);
++		if (ret)
++			return ret;
++	}
++	if (fmod_ret->nr_links) {
++		branches  = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL);
++		if (!branches)
++			return -ENOMEM;
++
++		invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, run_ctx_off, branches);
++	}
++
++	if (flags & BPF_TRAMP_F_CALL_ORIG) {
++		restore_args(ctx, m->nr_args, args_off);
++		ret = emit_call(ctx, (const u64)orig_call);
++		if (ret)
++			goto out;
++		emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
++		emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
++		im->ip_after_call = ctx->ro_image + ctx->idx;
++		/* Reserve space for the move_imm + jirl instruction */
++		for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++)
++			emit_insn(ctx, nop);
++	}
++
++	for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) {
++		int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i];
++		*branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset);
++	}
++
++	for (i = 0; i < fexit->nr_links; i++) {
++		ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, run_ctx_off, false);
++		if (ret)
++			goto out;
++	}
++
++	if (flags & BPF_TRAMP_F_CALL_ORIG) {
++		im->ip_epilogue = ctx->ro_image + ctx->idx;
++		move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false);
++		ret = emit_call(ctx, (const u64)__bpf_tramp_exit);
++		if (ret)
++			goto out;
++	}
++
++	if (flags & BPF_TRAMP_F_RESTORE_REGS)
++		restore_args(ctx, m->nr_args, args_off);
++
++	if (save_ret) {
++		emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
++		emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
++	}
++
++	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
++
++	/* trampoline called from function entry */
++	emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
++	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
++	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size);
++
++	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
++	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
++	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16);
++
++	if (flags & BPF_TRAMP_F_SKIP_FRAME)
++		/* return to parent function */
++		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
++	else
++		/* return to traced function */
++		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0);
++
++	ret = ctx->idx;
++out:
++	kfree(branches);
++
++	return ret;
++}
++
++int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
++				void *ro_image_end, const struct btf_func_model *m,
++				u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
++{
++	int ret, size;
++	void *image, *tmp;
++	struct jit_ctx ctx;
++
++	size = ro_image_end - ro_image;
++	image = kvmalloc(size, GFP_KERNEL);
++	if (!image)
++		return -ENOMEM;
++
++	ctx.image = (union loongarch_instruction *)image;
++	ctx.ro_image = (union loongarch_instruction *)ro_image;
++	ctx.idx = 0;
++
++	jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
++	ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags);
++	if (ret > 0 && validate_code(&ctx) < 0) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	tmp = bpf_arch_text_copy(ro_image, image, size);
++	if (IS_ERR(tmp)) {
++		ret = PTR_ERR(tmp);
++		goto out;
++	}
++
++	bpf_flush_icache(ro_image, ro_image_end);
++out:
++	kvfree(image);
++	return ret < 0 ? ret : size;
++}
++
++int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
++			     struct bpf_tramp_links *tlinks, void *func_addr)
++{
++	int ret;
++	struct jit_ctx ctx;
++	struct bpf_tramp_image im;
++
++	ctx.image = NULL;
++	ctx.idx = 0;
++
++	ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags);
++
++	/* Page align */
++	return ret < 0 ? ret : round_up(ret * LOONGARCH_INSN_SIZE, PAGE_SIZE);
++}
++
+ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+ {
+ 	bool tmp_blinded = false, extra_pass = false;
+--- a/arch/loongarch/net/bpf_jit.h
++++ b/arch/loongarch/net/bpf_jit.h
+@@ -18,6 +18,7 @@ struct jit_ctx {
+ 	u32 *offset;
+ 	int num_exentries;
+ 	union loongarch_instruction *image;
++	union loongarch_instruction *ro_image;
+ 	u32 stack_size;
+ };
+ 
+@@ -308,3 +309,8 @@ static inline int emit_tailcall_jmp(stru
+ 
+ 	return -EINVAL;
+ }
++
++static inline void bpf_flush_icache(void *start, void *end)
++{
++	flush_icache_range((unsigned long)start, (unsigned long)end);
++}