%!s(int64=3) %!d(string=hai) anos · fa79baf4a6
--- a/target/linux/generic/backport-6.1/005-v5.17-01-Kbuild-use-Wdeclaration-after-statement.patch
+++ b/target/linux/generic/backport-6.1/005-v5.17-01-Kbuild-use-Wdeclaration-after-statement.patch
@@ -0,0 +1,73 @@
 
				+From 2fd7e7f9317d3048a14026816d081b08ba98ea8e Mon Sep 17 00:00:00 2001
			
 
				+From: Mark Rutland <[email protected]>
			
 
				+Date: Tue, 8 Mar 2022 22:56:13 +0100
			
 
				+Subject: [PATCH 1/3] Kbuild: use -Wdeclaration-after-statement
			
 
				+
			
 
				+The kernel is moving from using `-std=gnu89` to `-std=gnu11`, permitting
			
 
				+the use of additional C11 features such as for-loop initial declarations.
			
 
				+
			
 
				+One contentious aspect of C99 is that it permits mixed declarations and
			
 
				+code, and for now at least, it seems preferable to enforce that
			
 
				+declarations must come first.
			
 
				+
			
 
				+These warnings were already enabled in the kernel itself, but not
			
 
				+for KBUILD_USERCFLAGS or the compat VDSO on arch/arm64, which uses
			
 
				+a separate set of CFLAGS.
			
 
				+
			
 
				+This patch fixes an existing violation in modpost.c, which is not
			
 
				+reported because of the missing flag in KBUILD_USERCFLAGS:
			
 
				+
			
 
				+| scripts/mod/modpost.c: In function ‘match’:
			
 
				+| scripts/mod/modpost.c:837:3: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
			
 
				+|   837 |   const char *endp = p + strlen(p) - 1;
			
 
				+|       |   ^~~~~
			
 
				+
			
 
				+Signed-off-by: Mark Rutland <[email protected]>
			
 
				+[arnd: don't add a duplicate flag to the default set, update changelog]
			
 
				+Signed-off-by: Arnd Bergmann <[email protected]>
			
 
				+Reviewed-by: Nathan Chancellor <[email protected]>
			
 
				+Reviewed-by: Nick Desaulniers <[email protected]>
			
 
				+Tested-by: Sedat Dilek <[email protected]> # LLVM/Clang v13.0.0 (x86-64)
			
 
				+Signed-off-by: Masahiro Yamada <[email protected]>
			
 
				+---
			
 
				+ Makefile                          | 3 ++-
			
 
				+ arch/arm64/kernel/vdso32/Makefile | 1 +
			
 
				+ scripts/mod/modpost.c             | 4 +++-
			
 
				+ 3 files changed, 6 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+--- a/Makefile
			
 
				++++ b/Makefile
			
 
				+@@ -440,7 +440,8 @@ endif
			
 
				+ HOSTPKG_CONFIG	= pkg-config
			
 
				+ 
			
 
				+ export KBUILD_USERCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
			
 
				+-			      -O2 -fomit-frame-pointer -std=gnu89
			
 
				++			      -O2 -fomit-frame-pointer -std=gnu89 \
			
 
				++			      -Wdeclaration-after-statement
			
 
				+ export KBUILD_USERLDFLAGS :=
			
 
				+ 
			
 
				+ KBUILD_HOSTCFLAGS   := $(KBUILD_USERCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)
			
 
				+--- a/arch/arm64/kernel/vdso32/Makefile
			
 
				++++ b/arch/arm64/kernel/vdso32/Makefile
			
 
				+@@ -76,6 +76,7 @@ VDSO_CFLAGS += -Wall -Wundef -Wstrict-pr
			
 
				+                -fno-strict-aliasing -fno-common \
			
 
				+                -Werror-implicit-function-declaration \
			
 
				+                -Wno-format-security \
			
 
				++               -Wdeclaration-after-statement \
			
 
				+                -std=gnu89
			
 
				+ VDSO_CFLAGS  += -O2
			
 
				+ # Some useful compiler-dependent flags from top-level Makefile
			
 
				+--- a/scripts/mod/modpost.c
			
 
				++++ b/scripts/mod/modpost.c
			
 
				+@@ -833,8 +833,10 @@ static int match(const char *sym, const
			
 
				+ {
			
 
				+ 	const char *p;
			
 
				+ 	while (*pat) {
			
 
				++		const char *endp;
			
 
				++
			
 
				+ 		p = *pat++;
			
 
				+-		const char *endp = p + strlen(p) - 1;
			
 
				++		endp = p + strlen(p) - 1;
			
 
				+ 
			
 
				+ 		/* "*foo*" */
			
 
				+ 		if (*p == '*' && *endp == '*') {
			
--- a/target/linux/generic/backport-6.1/005-v5.17-02-Kbuild-move-to-std-gnu11.patch
+++ b/target/linux/generic/backport-6.1/005-v5.17-02-Kbuild-move-to-std-gnu11.patch
@@ -0,0 +1,60 @@
 
				+From b810c8e719ea082e47c7a8f7cf878bc84fa2455d Mon Sep 17 00:00:00 2001
			
 
				+From: Arnd Bergmann <[email protected]>
			
 
				+Date: Tue, 8 Mar 2022 22:56:14 +0100
			
 
				+Subject: [PATCH 2/3] Kbuild: move to -std=gnu11
			
 
				+
			
 
				+During a patch discussion, Linus brought up the option of changing
			
 
				+the C standard version from gnu89 to gnu99, which allows using variable
			
 
				+declaration inside of a for() loop. While the C99, C11 and later standards
			
 
				+introduce many other features, most of these are already available in
			
 
				+gnu89 as GNU extensions as well.
			
 
				+
			
 
				+An earlier attempt to do this when gcc-5 started defaulting to
			
 
				+-std=gnu11 failed because at the time that caused warnings about
			
 
				+designated initializers with older compilers. Now that gcc-5.1 is
			
 
				+the minimum compiler version used for building kernels, that is no
			
 
				+longer a concern. Similarly, the behavior of 'inline' functions changes
			
 
				+between gnu89 using gnu_inline behavior and gnu11 using standard c99+
			
 
				+behavior, but this was taken care of by defining 'inline' to include
			
 
				+__attribute__((gnu_inline)) in order to allow building with clang a
			
 
				+while ago.
			
 
				+
			
 
				+Nathan Chancellor reported a new -Wdeclaration-after-statement
			
 
				+warning that appears in a system header on arm, this still needs a
			
 
				+workaround.
			
 
				+
			
 
				+The differences between gnu99, gnu11, gnu1x and gnu17 are fairly
			
 
				+minimal and mainly impact warnings at the -Wpedantic level that the
			
 
				+kernel never enables. Between these, gnu11 is the newest version
			
 
				+that is supported by all supported compiler versions, though it is
			
 
				+only the default on gcc-5, while all other supported versions of
			
 
				+gcc or clang default to gnu1x/gnu17.
			
 
				+
			
 
				+Link: https://lore.kernel.org/lkml/CAHk-=wiyCH7xeHcmiFJ-YgXUy2Jaj7pnkdKpcovt8fYbVFW3TA@mail.gmail.com/
			
 
				+Link: https://github.com/ClangBuiltLinux/linux/issues/1603
			
 
				+Suggested-by: Linus Torvalds <[email protected]>
			
 
				+Acked-by: Marco Elver <[email protected]>
			
 
				+Acked-by: Jani Nikula <[email protected]>
			
 
				+Acked-by: David Sterba <[email protected]>
			
 
				+Tested-by: Sedat Dilek <[email protected]>
			
 
				+Reviewed-by: Alex Shi <[email protected]>
			
 
				+Reviewed-by: Nick Desaulniers <[email protected]>
			
 
				+Reviewed-by: Miguel Ojeda <[email protected]>
			
 
				+Signed-off-by: Arnd Bergmann <[email protected]>
			
 
				+Reviewed-by: Nathan Chancellor <[email protected]>
			
 
				+Signed-off-by: Masahiro Yamada <[email protected]>
			
 
				+---
			
 
				+ Makefile | 2 +-
			
 
				+ 1 file changed, 1 insertion(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/Makefile
			
 
				++++ b/Makefile
			
 
				+@@ -524,7 +524,7 @@ KBUILD_CFLAGS   := -Wall -Wundef -Werror
			
 
				+ 		   -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE \
			
 
				+ 		   -Werror=implicit-function-declaration -Werror=implicit-int \
			
 
				+ 		   -Werror=return-type -Wno-format-security \
			
 
				+-		   -std=gnu89
			
 
				++		   -std=gnu11
			
 
				+ KBUILD_CPPFLAGS := -D__KERNEL__
			
 
				+ KBUILD_AFLAGS_KERNEL :=
			
 
				+ KBUILD_CFLAGS_KERNEL :=
			
--- a/target/linux/generic/backport-6.1/005-v5.17-03-Kbuild-use-std-gnu11-for-KBUILD_USERCFLAGS.patch
+++ b/target/linux/generic/backport-6.1/005-v5.17-03-Kbuild-use-std-gnu11-for-KBUILD_USERCFLAGS.patch
@@ -0,0 +1,43 @@
 
				+From 40337d6f3d677aee7ad3052ae662d3f53dd4d5cb Mon Sep 17 00:00:00 2001
			
 
				+From: Arnd Bergmann <[email protected]>
			
 
				+Date: Tue, 8 Mar 2022 22:56:15 +0100
			
 
				+Subject: [PATCH 3/3] Kbuild: use -std=gnu11 for KBUILD_USERCFLAGS
			
 
				+
			
 
				+As we change the C language standard for the kernel from gnu89 to
			
 
				+gnu11, it makes sense to also update the version for user space
			
 
				+compilation.
			
 
				+
			
 
				+Some users have older native compilers than what they use for
			
 
				+kernel builds, so I considered using gnu99 as the default version
			
 
				+for wider compatibility with gcc-4.6 and earlier.
			
 
				+
			
 
				+However, testing with older compilers showed that we already require
			
 
				+HOSTCC version 5.1 as well because a lot of host tools include
			
 
				+linux/compiler.h that uses __has_attribute():
			
 
				+
			
 
				+  CC      tools/objtool/exec-cmd.o
			
 
				+In file included from tools/include/linux/compiler_types.h:36:0,
			
 
				+                 from tools/include/linux/compiler.h:5,
			
 
				+                 from exec-cmd.c:2:
			
 
				+tools/include/linux/compiler-gcc.h:19:5: error: "__has_attribute" is not defined [-Werror=undef]
			
 
				+
			
 
				+Signed-off-by: Arnd Bergmann <[email protected]>
			
 
				+Reviewed-by: Nathan Chancellor <[email protected]>
			
 
				+Reviewed-by: Nick Desaulniers <[email protected]>
			
 
				+Tested-by: Sedat Dilek <[email protected]>
			
 
				+Signed-off-by: Masahiro Yamada <[email protected]>
			
 
				+---
			
 
				+ Makefile | 2 +-
			
 
				+ 1 file changed, 1 insertion(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/Makefile
			
 
				++++ b/Makefile
			
 
				+@@ -440,7 +440,7 @@ endif
			
 
				+ HOSTPKG_CONFIG	= pkg-config
			
 
				+ 
			
 
				+ export KBUILD_USERCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \
			
 
				+-			      -O2 -fomit-frame-pointer -std=gnu89 \
			
 
				++			      -O2 -fomit-frame-pointer -std=gnu11 \
			
 
				+ 			      -Wdeclaration-after-statement
			
 
				+ export KBUILD_USERLDFLAGS :=
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/020-v6.1-01-mm-x86-arm64-add-arch_has_hw_pte_young.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-01-mm-x86-arm64-add-arch_has_hw_pte_young.patch
@@ -0,0 +1,425 @@
 
				+From a4103262b01a1b8704b37c01c7c813df91b7b119 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 01:59:58 -0600
			
 
				+Subject: [PATCH 01/29] mm: x86, arm64: add arch_has_hw_pte_young()
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Patch series "Multi-Gen LRU Framework", v14.
			
 
				+
			
 
				+What's new
			
 
				+==========
			
 
				+1. OpenWrt, in addition to Android, Arch Linux Zen, Armbian, ChromeOS,
			
 
				+   Liquorix, post-factum and XanMod, is now shipping MGLRU on 5.15.
			
 
				+2. Fixed long-tailed direct reclaim latency seen on high-memory (TBs)
			
 
				+   machines. The old direct reclaim backoff, which tries to enforce a
			
 
				+   minimum fairness among all eligible memcgs, over-swapped by about
			
 
				+   (total_mem>>DEF_PRIORITY)-nr_to_reclaim. The new backoff, which
			
 
				+   pulls the plug on swapping once the target is met, trades some
			
 
				+   fairness for curtailed latency:
			
 
				+   https://lore.kernel.org/r/[email protected]/
			
 
				+3. Fixed minior build warnings and conflicts. More comments and nits.
			
 
				+
			
 
				+TLDR
			
 
				+====
			
 
				+The current page reclaim is too expensive in terms of CPU usage and it
			
 
				+often makes poor choices about what to evict. This patchset offers an
			
 
				+alternative solution that is performant, versatile and
			
 
				+straightforward.
			
 
				+
			
 
				+Patchset overview
			
 
				+=================
			
 
				+The design and implementation overview is in patch 14:
			
 
				+https://lore.kernel.org/r/[email protected]/
			
 
				+
			
 
				+01. mm: x86, arm64: add arch_has_hw_pte_young()
			
 
				+02. mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
			
 
				+Take advantage of hardware features when trying to clear the accessed
			
 
				+bit in many PTEs.
			
 
				+
			
 
				+03. mm/vmscan.c: refactor shrink_node()
			
 
				+04. Revert "include/linux/mm_inline.h: fold __update_lru_size() into
			
 
				+    its sole caller"
			
 
				+Minor refactors to improve readability for the following patches.
			
 
				+
			
 
				+05. mm: multi-gen LRU: groundwork
			
 
				+Adds the basic data structure and the functions that insert pages to
			
 
				+and remove pages from the multi-gen LRU (MGLRU) lists.
			
 
				+
			
 
				+06. mm: multi-gen LRU: minimal implementation
			
 
				+A minimal implementation without optimizations.
			
 
				+
			
 
				+07. mm: multi-gen LRU: exploit locality in rmap
			
 
				+Exploits spatial locality to improve efficiency when using the rmap.
			
 
				+
			
 
				+08. mm: multi-gen LRU: support page table walks
			
 
				+Further exploits spatial locality by optionally scanning page tables.
			
 
				+
			
 
				+09. mm: multi-gen LRU: optimize multiple memcgs
			
 
				+Optimizes the overall performance for multiple memcgs running mixed
			
 
				+types of workloads.
			
 
				+
			
 
				+10. mm: multi-gen LRU: kill switch
			
 
				+Adds a kill switch to enable or disable MGLRU at runtime.
			
 
				+
			
 
				+11. mm: multi-gen LRU: thrashing prevention
			
 
				+12. mm: multi-gen LRU: debugfs interface
			
 
				+Provide userspace with features like thrashing prevention, working set
			
 
				+estimation and proactive reclaim.
			
 
				+
			
 
				+13. mm: multi-gen LRU: admin guide
			
 
				+14. mm: multi-gen LRU: design doc
			
 
				+Add an admin guide and a design doc.
			
 
				+
			
 
				+Benchmark results
			
 
				+=================
			
 
				+Independent lab results
			
 
				+-----------------------
			
 
				+Based on the popularity of searches [01] and the memory usage in
			
 
				+Google's public cloud, the most popular open-source memory-hungry
			
 
				+applications, in alphabetical order, are:
			
 
				+      Apache Cassandra      Memcached
			
 
				+      Apache Hadoop         MongoDB
			
 
				+      Apache Spark          PostgreSQL
			
 
				+      MariaDB (MySQL)       Redis
			
 
				+
			
 
				+An independent lab evaluated MGLRU with the most widely used benchmark
			
 
				+suites for the above applications. They posted 960 data points along
			
 
				+with kernel metrics and perf profiles collected over more than 500
			
 
				+hours of total benchmark time. Their final reports show that, with 95%
			
 
				+confidence intervals (CIs), the above applications all performed
			
 
				+significantly better for at least part of their benchmark matrices.
			
 
				+
			
 
				+On 5.14:
			
 
				+1. Apache Spark [02] took 95% CIs [9.28, 11.19]% and [12.20, 14.93]%
			
 
				+   less wall time to sort three billion random integers, respectively,
			
 
				+   under the medium- and the high-concurrency conditions, when
			
 
				+   overcommitting memory. There were no statistically significant
			
 
				+   changes in wall time for the rest of the benchmark matrix.
			
 
				+2. MariaDB [03] achieved 95% CIs [5.24, 10.71]% and [20.22, 25.97]%
			
 
				+   more transactions per minute (TPM), respectively, under the medium-
			
 
				+   and the high-concurrency conditions, when overcommitting memory.
			
 
				+   There were no statistically significant changes in TPM for the rest
			
 
				+   of the benchmark matrix.
			
 
				+3. Memcached [04] achieved 95% CIs [23.54, 32.25]%, [20.76, 41.61]%
			
 
				+   and [21.59, 30.02]% more operations per second (OPS), respectively,
			
 
				+   for sequential access, random access and Gaussian (distribution)
			
 
				+   access, when THP=always; 95% CIs [13.85, 15.97]% and
			
 
				+   [23.94, 29.92]% more OPS, respectively, for random access and
			
 
				+   Gaussian access, when THP=never. There were no statistically
			
 
				+   significant changes in OPS for the rest of the benchmark matrix.
			
 
				+4. MongoDB [05] achieved 95% CIs [2.23, 3.44]%, [6.97, 9.73]% and
			
 
				+   [2.16, 3.55]% more operations per second (OPS), respectively, for
			
 
				+   exponential (distribution) access, random access and Zipfian
			
 
				+   (distribution) access, when underutilizing memory; 95% CIs
			
 
				+   [8.83, 10.03]%, [21.12, 23.14]% and [5.53, 6.46]% more OPS,
			
 
				+   respectively, for exponential access, random access and Zipfian
			
 
				+   access, when overcommitting memory.
			
 
				+
			
 
				+On 5.15:
			
 
				+5. Apache Cassandra [06] achieved 95% CIs [1.06, 4.10]%, [1.94, 5.43]%
			
 
				+   and [4.11, 7.50]% more operations per second (OPS), respectively,
			
 
				+   for exponential (distribution) access, random access and Zipfian
			
 
				+   (distribution) access, when swap was off; 95% CIs [0.50, 2.60]%,
			
 
				+   [6.51, 8.77]% and [3.29, 6.75]% more OPS, respectively, for
			
 
				+   exponential access, random access and Zipfian access, when swap was
			
 
				+   on.
			
 
				+6. Apache Hadoop [07] took 95% CIs [5.31, 9.69]% and [2.02, 7.86]%
			
 
				+   less average wall time to finish twelve parallel TeraSort jobs,
			
 
				+   respectively, under the medium- and the high-concurrency
			
 
				+   conditions, when swap was on. There were no statistically
			
 
				+   significant changes in average wall time for the rest of the
			
 
				+   benchmark matrix.
			
 
				+7. PostgreSQL [08] achieved 95% CI [1.75, 6.42]% more transactions per
			
 
				+   minute (TPM) under the high-concurrency condition, when swap was
			
 
				+   off; 95% CIs [12.82, 18.69]% and [22.70, 46.86]% more TPM,
			
 
				+   respectively, under the medium- and the high-concurrency
			
 
				+   conditions, when swap was on. There were no statistically
			
 
				+   significant changes in TPM for the rest of the benchmark matrix.
			
 
				+8. Redis [09] achieved 95% CIs [0.58, 5.94]%, [6.55, 14.58]% and
			
 
				+   [11.47, 19.36]% more total operations per second (OPS),
			
 
				+   respectively, for sequential access, random access and Gaussian
			
 
				+   (distribution) access, when THP=always; 95% CIs [1.27, 3.54]%,
			
 
				+   [10.11, 14.81]% and [8.75, 13.64]% more total OPS, respectively,
			
 
				+   for sequential access, random access and Gaussian access, when
			
 
				+   THP=never.
			
 
				+
			
 
				+Our lab results
			
 
				+---------------
			
 
				+To supplement the above results, we ran the following benchmark suites
			
 
				+on 5.16-rc7 and found no regressions [10].
			
 
				+      fs_fio_bench_hdd_mq      pft
			
 
				+      fs_lmbench               pgsql-hammerdb
			
 
				+      fs_parallelio            redis
			
 
				+      fs_postmark              stream
			
 
				+      hackbench                sysbenchthread
			
 
				+      kernbench                tpcc_spark
			
 
				+      memcached                unixbench
			
 
				+      multichase               vm-scalability
			
 
				+      mutilate                 will-it-scale
			
 
				+      nginx
			
 
				+
			
 
				+[01] https://trends.google.com
			
 
				+[02] https://lore.kernel.org/r/[email protected]/
			
 
				+[03] https://lore.kernel.org/r/[email protected]/
			
 
				+[04] https://lore.kernel.org/r/[email protected]/
			
 
				+[05] https://lore.kernel.org/r/[email protected]/
			
 
				+[06] https://lore.kernel.org/r/[email protected]/
			
 
				+[07] https://lore.kernel.org/r/[email protected]/
			
 
				+[08] https://lore.kernel.org/r/[email protected]/
			
 
				+[09] https://lore.kernel.org/r/[email protected]/
			
 
				+[10] https://lore.kernel.org/r/[email protected]/
			
 
				+
			
 
				+Read-world applications
			
 
				+=======================
			
 
				+Third-party testimonials
			
 
				+------------------------
			
 
				+Konstantin reported [11]:
			
 
				+   I have Archlinux with 8G RAM + zswap + swap. While developing, I
			
 
				+   have lots of apps opened such as multiple LSP-servers for different
			
 
				+   langs, chats, two browsers, etc... Usually, my system gets quickly
			
 
				+   to a point of SWAP-storms, where I have to kill LSP-servers,
			
 
				+   restart browsers to free memory, etc, otherwise the system lags
			
 
				+   heavily and is barely usable.
			
 
				+
			
 
				+   1.5 day ago I migrated from 5.11.15 kernel to 5.12 + the LRU
			
 
				+   patchset, and I started up by opening lots of apps to create memory
			
 
				+   pressure, and worked for a day like this. Till now I had not a
			
 
				+   single SWAP-storm, and mind you I got 3.4G in SWAP. I was never
			
 
				+   getting to the point of 3G in SWAP before without a single
			
 
				+   SWAP-storm.
			
 
				+
			
 
				+Vaibhav from IBM reported [12]:
			
 
				+   In a synthetic MongoDB Benchmark, seeing an average of ~19%
			
 
				+   throughput improvement on POWER10(Radix MMU + 64K Page Size) with
			
 
				+   MGLRU patches on top of 5.16 kernel for MongoDB + YCSB across
			
 
				+   three different request distributions, namely, Exponential, Uniform
			
 
				+   and Zipfan.
			
 
				+
			
 
				+Shuang from U of Rochester reported [13]:
			
 
				+   With the MGLRU, fio achieved 95% CIs [38.95, 40.26]%, [4.12, 6.64]%
			
 
				+   and [9.26, 10.36]% higher throughput, respectively, for random
			
 
				+   access, Zipfian (distribution) access and Gaussian (distribution)
			
 
				+   access, when the average number of jobs per CPU is 1; 95% CIs
			
 
				+   [42.32, 49.15]%, [9.44, 9.89]% and [20.99, 22.86]% higher
			
 
				+   throughput, respectively, for random access, Zipfian access and
			
 
				+   Gaussian access, when the average number of jobs per CPU is 2.
			
 
				+
			
 
				+Daniel from Michigan Tech reported [14]:
			
 
				+   With Memcached allocating ~100GB of byte-addressable Optante,
			
 
				+   performance improvement in terms of throughput (measured as queries
			
 
				+   per second) was about 10% for a series of workloads.
			
 
				+
			
 
				+Large-scale deployments
			
 
				+-----------------------
			
 
				+We've rolled out MGLRU to tens of millions of ChromeOS users and
			
 
				+about a million Android users. Google's fleetwide profiling [15] shows
			
 
				+an overall 40% decrease in kswapd CPU usage, in addition to
			
 
				+improvements in other UX metrics, e.g., an 85% decrease in the number
			
 
				+of low-memory kills at the 75th percentile and an 18% decrease in
			
 
				+app launch time at the 50th percentile.
			
 
				+
			
 
				+The downstream kernels that have been using MGLRU include:
			
 
				+1. Android [16]
			
 
				+2. Arch Linux Zen [17]
			
 
				+3. Armbian [18]
			
 
				+4. ChromeOS [19]
			
 
				+5. Liquorix [20]
			
 
				+6. OpenWrt [21]
			
 
				+7. post-factum [22]
			
 
				+8. XanMod [23]
			
 
				+
			
 
				+[11] https://lore.kernel.org/r/[email protected]/
			
 
				+[12] https://lore.kernel.org/r/[email protected]/
			
 
				+[13] https://lore.kernel.org/r/[email protected]/
			
 
				+[14] https://lore.kernel.org/r/CA+4-3vksGvKd18FgRinxhqHetBS1hQekJE2gwco8Ja-bJWKtFw@mail.gmail.com/
			
 
				+[15] https://dl.acm.org/doi/10.1145/2749469.2750392
			
 
				+[16] https://android.com
			
 
				+[17] https://archlinux.org
			
 
				+[18] https://armbian.com
			
 
				+[19] https://chromium.org
			
 
				+[20] https://liquorix.net
			
 
				+[21] https://openwrt.org
			
 
				+[22] https://codeberg.org/pf-kernel
			
 
				+[23] https://xanmod.org
			
 
				+
			
 
				+Summary
			
 
				+=======
			
 
				+The facts are:
			
 
				+1. The independent lab results and the real-world applications
			
 
				+   indicate substantial improvements; there are no known regressions.
			
 
				+2. Thrashing prevention, working set estimation and proactive reclaim
			
 
				+   work out of the box; there are no equivalent solutions.
			
 
				+3. There is a lot of new code; no smaller changes have been
			
 
				+   demonstrated similar effects.
			
 
				+
			
 
				+Our options, accordingly, are:
			
 
				+1. Given the amount of evidence, the reported improvements will likely
			
 
				+   materialize for a wide range of workloads.
			
 
				+2. Gauging the interest from the past discussions, the new features
			
 
				+   will likely be put to use for both personal computers and data
			
 
				+   centers.
			
 
				+3. Based on Google's track record, the new code will likely be well
			
 
				+   maintained in the long term. It'd be more difficult if not
			
 
				+   impossible to achieve similar effects with other approaches.
			
 
				+
			
 
				+This patch (of 14):
			
 
				+
			
 
				+Some architectures automatically set the accessed bit in PTEs, e.g., x86
			
 
				+and arm64 v8.2.  On architectures that do not have this capability,
			
 
				+clearing the accessed bit in a PTE usually triggers a page fault following
			
 
				+the TLB miss of this PTE (to emulate the accessed bit).
			
 
				+
			
 
				+Being aware of this capability can help make better decisions, e.g.,
			
 
				+whether to spread the work out over a period of time to reduce bursty page
			
 
				+faults when trying to clear the accessed bit in many PTEs.
			
 
				+
			
 
				+Note that theoretically this capability can be unreliable, e.g.,
			
 
				+hotplugged CPUs might be different from builtin ones.  Therefore it should
			
 
				+not be used in architecture-independent code that involves correctness,
			
 
				+e.g., to determine whether TLB flushes are required (in combination with
			
 
				+the accessed bit).
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Reviewed-by: Barry Song <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Acked-by: Will Deacon <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: [email protected]
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ arch/arm64/include/asm/pgtable.h | 14 ++------------
			
 
				+ arch/x86/include/asm/pgtable.h   |  6 +++---
			
 
				+ include/linux/pgtable.h          | 13 +++++++++++++
			
 
				+ mm/memory.c                      | 14 +-------------
			
 
				+ 4 files changed, 19 insertions(+), 28 deletions(-)
			
 
				+
			
 
				+--- a/arch/arm64/include/asm/pgtable.h
			
 
				++++ b/arch/arm64/include/asm/pgtable.h
			
 
				+@@ -999,23 +999,13 @@ static inline void update_mmu_cache(stru
			
 
				+  * page after fork() + CoW for pfn mappings. We don't always have a
			
 
				+  * hardware-managed access flag on arm64.
			
 
				+  */
			
 
				+-static inline bool arch_faults_on_old_pte(void)
			
 
				+-{
			
 
				+-	WARN_ON(preemptible());
			
 
				+-
			
 
				+-	return !cpu_has_hw_af();
			
 
				+-}
			
 
				+-#define arch_faults_on_old_pte		arch_faults_on_old_pte
			
 
				++#define arch_has_hw_pte_young		cpu_has_hw_af
			
 
				+ 
			
 
				+ /*
			
 
				+  * Experimentally, it's cheap to set the access flag in hardware and we
			
 
				+  * benefit from prefaulting mappings as 'old' to start with.
			
 
				+  */
			
 
				+-static inline bool arch_wants_old_prefaulted_pte(void)
			
 
				+-{
			
 
				+-	return !arch_faults_on_old_pte();
			
 
				+-}
			
 
				+-#define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
			
 
				++#define arch_wants_old_prefaulted_pte	cpu_has_hw_af
			
 
				+ 
			
 
				+ #endif /* !__ASSEMBLY__ */
			
 
				+ 
			
 
				+--- a/arch/x86/include/asm/pgtable.h
			
 
				++++ b/arch/x86/include/asm/pgtable.h
			
 
				+@@ -1397,10 +1397,10 @@ static inline bool arch_has_pfn_modify_c
			
 
				+ 	return boot_cpu_has_bug(X86_BUG_L1TF);
			
 
				+ }
			
 
				+ 
			
 
				+-#define arch_faults_on_old_pte arch_faults_on_old_pte
			
 
				+-static inline bool arch_faults_on_old_pte(void)
			
 
				++#define arch_has_hw_pte_young arch_has_hw_pte_young
			
 
				++static inline bool arch_has_hw_pte_young(void)
			
 
				+ {
			
 
				+-	return false;
			
 
				++	return true;
			
 
				+ }
			
 
				+ 
			
 
				+ #endif	/* __ASSEMBLY__ */
			
 
				+--- a/include/linux/pgtable.h
			
 
				++++ b/include/linux/pgtable.h
			
 
				+@@ -259,6 +259,19 @@ static inline int pmdp_clear_flush_young
			
 
				+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
			
 
				+ #endif
			
 
				+ 
			
 
				++#ifndef arch_has_hw_pte_young
			
 
				++/*
			
 
				++ * Return whether the accessed bit is supported on the local CPU.
			
 
				++ *
			
 
				++ * This stub assumes accessing through an old PTE triggers a page fault.
			
 
				++ * Architectures that automatically set the access bit should overwrite it.
			
 
				++ */
			
 
				++static inline bool arch_has_hw_pte_young(void)
			
 
				++{
			
 
				++	return false;
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				+ #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
			
 
				+ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
			
 
				+ 				       unsigned long address,
			
 
				+--- a/mm/memory.c
			
 
				++++ b/mm/memory.c
			
 
				+@@ -121,18 +121,6 @@ int randomize_va_space __read_mostly =
			
 
				+ 					2;
			
 
				+ #endif
			
 
				+ 
			
 
				+-#ifndef arch_faults_on_old_pte
			
 
				+-static inline bool arch_faults_on_old_pte(void)
			
 
				+-{
			
 
				+-	/*
			
 
				+-	 * Those arches which don't have hw access flag feature need to
			
 
				+-	 * implement their own helper. By default, "true" means pagefault
			
 
				+-	 * will be hit on old pte.
			
 
				+-	 */
			
 
				+-	return true;
			
 
				+-}
			
 
				+-#endif
			
 
				+-
			
 
				+ #ifndef arch_wants_old_prefaulted_pte
			
 
				+ static inline bool arch_wants_old_prefaulted_pte(void)
			
 
				+ {
			
 
				+@@ -2782,7 +2770,7 @@ static inline bool cow_user_page(struct
			
 
				+ 	 * On architectures with software "accessed" bits, we would
			
 
				+ 	 * take a double page fault, so mark it accessed here.
			
 
				+ 	 */
			
 
				+-	if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
			
 
				++	if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
			
 
				+ 		pte_t entry;
			
 
				+ 
			
 
				+ 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
			
--- a/target/linux/generic/backport-6.1/020-v6.1-02-mm-x86-add-CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-02-mm-x86-add-CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG.patch
@@ -0,0 +1,153 @@
 
				+From 493de1c4b0f2cd909169401da8c445f6c8a7e29d Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 01:59:59 -0600
			
 
				+Subject: [PATCH 02/29] mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Some architectures support the accessed bit in non-leaf PMD entries, e.g.,
			
 
				+x86 sets the accessed bit in a non-leaf PMD entry when using it as part of
			
 
				+linear address translation [1].  Page table walkers that clear the
			
 
				+accessed bit may use this capability to reduce their search space.
			
 
				+
			
 
				+Note that:
			
 
				+1. Although an inline function is preferable, this capability is added
			
 
				+   as a configuration option for consistency with the existing macros.
			
 
				+2. Due to the little interest in other varieties, this capability was
			
 
				+   only tested on Intel and AMD CPUs.
			
 
				+
			
 
				+Thanks to the following developers for their efforts [2][3].
			
 
				+  Randy Dunlap <[email protected]>
			
 
				+  Stephen Rothwell <[email protected]>
			
 
				+
			
 
				+[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
			
 
				+     Volume 3 (June 2021), section 4.8
			
 
				+[2] https://lore.kernel.org/r/[email protected]/
			
 
				+[3] https://lore.kernel.org/r/[email protected]/
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Reviewed-by: Barry Song <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ arch/Kconfig                   | 8 ++++++++
			
 
				+ arch/x86/Kconfig               | 1 +
			
 
				+ arch/x86/include/asm/pgtable.h | 3 ++-
			
 
				+ arch/x86/mm/pgtable.c          | 5 ++++-
			
 
				+ include/linux/pgtable.h        | 4 ++--
			
 
				+ 5 files changed, 17 insertions(+), 4 deletions(-)
			
 
				+
			
 
				+--- a/arch/Kconfig
			
 
				++++ b/arch/Kconfig
			
 
				+@@ -1295,6 +1295,14 @@ config ARCH_HAS_ELFCORE_COMPAT
			
 
				+ config ARCH_HAS_PARANOID_L1D_FLUSH
			
 
				+ 	bool
			
 
				+ 
			
 
				++config ARCH_HAS_NONLEAF_PMD_YOUNG
			
 
				++	bool
			
 
				++	help
			
 
				++	  Architectures that select this option are capable of setting the
			
 
				++	  accessed bit in non-leaf PMD entries when using them as part of linear
			
 
				++	  address translations. Page table walkers that clear the accessed bit
			
 
				++	  may use this capability to reduce their search space.
			
 
				++
			
 
				+ source "kernel/gcov/Kconfig"
			
 
				+ 
			
 
				+ source "scripts/gcc-plugins/Kconfig"
			
 
				+--- a/arch/x86/Kconfig
			
 
				++++ b/arch/x86/Kconfig
			
 
				+@@ -84,6 +84,7 @@ config X86
			
 
				+ 	select ARCH_HAS_PMEM_API		if X86_64
			
 
				+ 	select ARCH_HAS_PTE_DEVMAP		if X86_64
			
 
				+ 	select ARCH_HAS_PTE_SPECIAL
			
 
				++	select ARCH_HAS_NONLEAF_PMD_YOUNG	if PGTABLE_LEVELS > 2
			
 
				+ 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
			
 
				+ 	select ARCH_HAS_COPY_MC			if X86_64
			
 
				+ 	select ARCH_HAS_SET_MEMORY
			
 
				+--- a/arch/x86/include/asm/pgtable.h
			
 
				++++ b/arch/x86/include/asm/pgtable.h
			
 
				+@@ -817,7 +817,8 @@ static inline unsigned long pmd_page_vad
			
 
				+ 
			
 
				+ static inline int pmd_bad(pmd_t pmd)
			
 
				+ {
			
 
				+-	return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
			
 
				++	return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
			
 
				++	       (_KERNPG_TABLE & ~_PAGE_ACCESSED);
			
 
				+ }
			
 
				+ 
			
 
				+ static inline unsigned long pages_to_mb(unsigned long npg)
			
 
				+--- a/arch/x86/mm/pgtable.c
			
 
				++++ b/arch/x86/mm/pgtable.c
			
 
				+@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_
			
 
				+ 	return ret;
			
 
				+ }
			
 
				+ 
			
 
				+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
			
 
				++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
			
 
				+ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
			
 
				+ 			      unsigned long addr, pmd_t *pmdp)
			
 
				+ {
			
 
				+@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_
			
 
				+ 
			
 
				+ 	return ret;
			
 
				+ }
			
 
				++#endif
			
 
				++
			
 
				++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
			
 
				+ int pudp_test_and_clear_young(struct vm_area_struct *vma,
			
 
				+ 			      unsigned long addr, pud_t *pudp)
			
 
				+ {
			
 
				+--- a/include/linux/pgtable.h
			
 
				++++ b/include/linux/pgtable.h
			
 
				+@@ -212,7 +212,7 @@ static inline int ptep_test_and_clear_yo
			
 
				+ #endif
			
 
				+ 
			
 
				+ #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
			
 
				+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
			
 
				++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
			
 
				+ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
			
 
				+ 					    unsigned long address,
			
 
				+ 					    pmd_t *pmdp)
			
 
				+@@ -233,7 +233,7 @@ static inline int pmdp_test_and_clear_yo
			
 
				+ 	BUILD_BUG();
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
			
 
				++#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
			
 
				+ #endif
			
 
				+ 
			
 
				+ #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
			
--- a/target/linux/generic/backport-6.1/020-v6.1-03-mm-vmscan.c-refactor-shrink_node.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-03-mm-vmscan.c-refactor-shrink_node.patch
@@ -0,0 +1,275 @@
 
				+From 9e17efd11450d3d2069adaa3c58db9ac8ebd1c66 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:00 -0600
			
 
				+Subject: [PATCH 03/29] mm/vmscan.c: refactor shrink_node()
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+This patch refactors shrink_node() to improve readability for the upcoming
			
 
				+changes to mm/vmscan.c.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Reviewed-by: Barry Song <[email protected]>
			
 
				+Reviewed-by: Miaohe Lin <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 198 +++++++++++++++++++++++++++-------------------------
			
 
				+ 1 file changed, 104 insertions(+), 94 deletions(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -2497,6 +2497,109 @@ enum scan_balance {
			
 
				+ 	SCAN_FILE,
			
 
				+ };
			
 
				+ 
			
 
				++static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
			
 
				++{
			
 
				++	unsigned long file;
			
 
				++	struct lruvec *target_lruvec;
			
 
				++
			
 
				++	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
			
 
				++
			
 
				++	/*
			
 
				++	 * Flush the memory cgroup stats, so that we read accurate per-memcg
			
 
				++	 * lruvec stats for heuristics.
			
 
				++	 */
			
 
				++	mem_cgroup_flush_stats();
			
 
				++
			
 
				++	/*
			
 
				++	 * Determine the scan balance between anon and file LRUs.
			
 
				++	 */
			
 
				++	spin_lock_irq(&target_lruvec->lru_lock);
			
 
				++	sc->anon_cost = target_lruvec->anon_cost;
			
 
				++	sc->file_cost = target_lruvec->file_cost;
			
 
				++	spin_unlock_irq(&target_lruvec->lru_lock);
			
 
				++
			
 
				++	/*
			
 
				++	 * Target desirable inactive:active list ratios for the anon
			
 
				++	 * and file LRU lists.
			
 
				++	 */
			
 
				++	if (!sc->force_deactivate) {
			
 
				++		unsigned long refaults;
			
 
				++
			
 
				++		refaults = lruvec_page_state(target_lruvec,
			
 
				++				WORKINGSET_ACTIVATE_ANON);
			
 
				++		if (refaults != target_lruvec->refaults[0] ||
			
 
				++			inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
			
 
				++			sc->may_deactivate |= DEACTIVATE_ANON;
			
 
				++		else
			
 
				++			sc->may_deactivate &= ~DEACTIVATE_ANON;
			
 
				++
			
 
				++		/*
			
 
				++		 * When refaults are being observed, it means a new
			
 
				++		 * workingset is being established. Deactivate to get
			
 
				++		 * rid of any stale active pages quickly.
			
 
				++		 */
			
 
				++		refaults = lruvec_page_state(target_lruvec,
			
 
				++				WORKINGSET_ACTIVATE_FILE);
			
 
				++		if (refaults != target_lruvec->refaults[1] ||
			
 
				++		    inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
			
 
				++			sc->may_deactivate |= DEACTIVATE_FILE;
			
 
				++		else
			
 
				++			sc->may_deactivate &= ~DEACTIVATE_FILE;
			
 
				++	} else
			
 
				++		sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
			
 
				++
			
 
				++	/*
			
 
				++	 * If we have plenty of inactive file pages that aren't
			
 
				++	 * thrashing, try to reclaim those first before touching
			
 
				++	 * anonymous pages.
			
 
				++	 */
			
 
				++	file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
			
 
				++	if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
			
 
				++		sc->cache_trim_mode = 1;
			
 
				++	else
			
 
				++		sc->cache_trim_mode = 0;
			
 
				++
			
 
				++	/*
			
 
				++	 * Prevent the reclaimer from falling into the cache trap: as
			
 
				++	 * cache pages start out inactive, every cache fault will tip
			
 
				++	 * the scan balance towards the file LRU.  And as the file LRU
			
 
				++	 * shrinks, so does the window for rotation from references.
			
 
				++	 * This means we have a runaway feedback loop where a tiny
			
 
				++	 * thrashing file LRU becomes infinitely more attractive than
			
 
				++	 * anon pages.  Try to detect this based on file LRU size.
			
 
				++	 */
			
 
				++	if (!cgroup_reclaim(sc)) {
			
 
				++		unsigned long total_high_wmark = 0;
			
 
				++		unsigned long free, anon;
			
 
				++		int z;
			
 
				++
			
 
				++		free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
			
 
				++		file = node_page_state(pgdat, NR_ACTIVE_FILE) +
			
 
				++			   node_page_state(pgdat, NR_INACTIVE_FILE);
			
 
				++
			
 
				++		for (z = 0; z < MAX_NR_ZONES; z++) {
			
 
				++			struct zone *zone = &pgdat->node_zones[z];
			
 
				++
			
 
				++			if (!managed_zone(zone))
			
 
				++				continue;
			
 
				++
			
 
				++			total_high_wmark += high_wmark_pages(zone);
			
 
				++		}
			
 
				++
			
 
				++		/*
			
 
				++		 * Consider anon: if that's low too, this isn't a
			
 
				++		 * runaway file reclaim problem, but rather just
			
 
				++		 * extreme pressure. Reclaim as per usual then.
			
 
				++		 */
			
 
				++		anon = node_page_state(pgdat, NR_INACTIVE_ANON);
			
 
				++
			
 
				++		sc->file_is_tiny =
			
 
				++			file + free <= total_high_wmark &&
			
 
				++			!(sc->may_deactivate & DEACTIVATE_ANON) &&
			
 
				++			anon >> sc->priority;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				+ /*
			
 
				+  * Determine how aggressively the anon and file LRU lists should be
			
 
				+  * scanned.  The relative value of each set of LRU lists is determined
			
 
				+@@ -2965,109 +3068,16 @@ static void shrink_node(pg_data_t *pgdat
			
 
				+ 	unsigned long nr_reclaimed, nr_scanned;
			
 
				+ 	struct lruvec *target_lruvec;
			
 
				+ 	bool reclaimable = false;
			
 
				+-	unsigned long file;
			
 
				+ 
			
 
				+ 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
			
 
				+ 
			
 
				+ again:
			
 
				+-	/*
			
 
				+-	 * Flush the memory cgroup stats, so that we read accurate per-memcg
			
 
				+-	 * lruvec stats for heuristics.
			
 
				+-	 */
			
 
				+-	mem_cgroup_flush_stats();
			
 
				+-
			
 
				+ 	memset(&sc->nr, 0, sizeof(sc->nr));
			
 
				+ 
			
 
				+ 	nr_reclaimed = sc->nr_reclaimed;
			
 
				+ 	nr_scanned = sc->nr_scanned;
			
 
				+ 
			
 
				+-	/*
			
 
				+-	 * Determine the scan balance between anon and file LRUs.
			
 
				+-	 */
			
 
				+-	spin_lock_irq(&target_lruvec->lru_lock);
			
 
				+-	sc->anon_cost = target_lruvec->anon_cost;
			
 
				+-	sc->file_cost = target_lruvec->file_cost;
			
 
				+-	spin_unlock_irq(&target_lruvec->lru_lock);
			
 
				+-
			
 
				+-	/*
			
 
				+-	 * Target desirable inactive:active list ratios for the anon
			
 
				+-	 * and file LRU lists.
			
 
				+-	 */
			
 
				+-	if (!sc->force_deactivate) {
			
 
				+-		unsigned long refaults;
			
 
				+-
			
 
				+-		refaults = lruvec_page_state(target_lruvec,
			
 
				+-				WORKINGSET_ACTIVATE_ANON);
			
 
				+-		if (refaults != target_lruvec->refaults[0] ||
			
 
				+-			inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
			
 
				+-			sc->may_deactivate |= DEACTIVATE_ANON;
			
 
				+-		else
			
 
				+-			sc->may_deactivate &= ~DEACTIVATE_ANON;
			
 
				+-
			
 
				+-		/*
			
 
				+-		 * When refaults are being observed, it means a new
			
 
				+-		 * workingset is being established. Deactivate to get
			
 
				+-		 * rid of any stale active pages quickly.
			
 
				+-		 */
			
 
				+-		refaults = lruvec_page_state(target_lruvec,
			
 
				+-				WORKINGSET_ACTIVATE_FILE);
			
 
				+-		if (refaults != target_lruvec->refaults[1] ||
			
 
				+-		    inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
			
 
				+-			sc->may_deactivate |= DEACTIVATE_FILE;
			
 
				+-		else
			
 
				+-			sc->may_deactivate &= ~DEACTIVATE_FILE;
			
 
				+-	} else
			
 
				+-		sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
			
 
				+-
			
 
				+-	/*
			
 
				+-	 * If we have plenty of inactive file pages that aren't
			
 
				+-	 * thrashing, try to reclaim those first before touching
			
 
				+-	 * anonymous pages.
			
 
				+-	 */
			
 
				+-	file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
			
 
				+-	if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
			
 
				+-		sc->cache_trim_mode = 1;
			
 
				+-	else
			
 
				+-		sc->cache_trim_mode = 0;
			
 
				+-
			
 
				+-	/*
			
 
				+-	 * Prevent the reclaimer from falling into the cache trap: as
			
 
				+-	 * cache pages start out inactive, every cache fault will tip
			
 
				+-	 * the scan balance towards the file LRU.  And as the file LRU
			
 
				+-	 * shrinks, so does the window for rotation from references.
			
 
				+-	 * This means we have a runaway feedback loop where a tiny
			
 
				+-	 * thrashing file LRU becomes infinitely more attractive than
			
 
				+-	 * anon pages.  Try to detect this based on file LRU size.
			
 
				+-	 */
			
 
				+-	if (!cgroup_reclaim(sc)) {
			
 
				+-		unsigned long total_high_wmark = 0;
			
 
				+-		unsigned long free, anon;
			
 
				+-		int z;
			
 
				+-
			
 
				+-		free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
			
 
				+-		file = node_page_state(pgdat, NR_ACTIVE_FILE) +
			
 
				+-			   node_page_state(pgdat, NR_INACTIVE_FILE);
			
 
				+-
			
 
				+-		for (z = 0; z < MAX_NR_ZONES; z++) {
			
 
				+-			struct zone *zone = &pgdat->node_zones[z];
			
 
				+-			if (!managed_zone(zone))
			
 
				+-				continue;
			
 
				+-
			
 
				+-			total_high_wmark += high_wmark_pages(zone);
			
 
				+-		}
			
 
				+-
			
 
				+-		/*
			
 
				+-		 * Consider anon: if that's low too, this isn't a
			
 
				+-		 * runaway file reclaim problem, but rather just
			
 
				+-		 * extreme pressure. Reclaim as per usual then.
			
 
				+-		 */
			
 
				+-		anon = node_page_state(pgdat, NR_INACTIVE_ANON);
			
 
				+-
			
 
				+-		sc->file_is_tiny =
			
 
				+-			file + free <= total_high_wmark &&
			
 
				+-			!(sc->may_deactivate & DEACTIVATE_ANON) &&
			
 
				+-			anon >> sc->priority;
			
 
				+-	}
			
 
				++	prepare_scan_count(pgdat, sc);
			
 
				+ 
			
 
				+ 	shrink_node_memcgs(pgdat, sc);
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/020-v6.1-04-Revert-include-linux-mm_inline.h-fold-__update_lru_s.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-04-Revert-include-linux-mm_inline.h-fold-__update_lru_s.patch
@@ -0,0 +1,82 @@
 
				+From 03705be42114db7cc5bd6eb7bf7e8703c94d4880 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:01 -0600
			
 
				+Subject: [PATCH 04/29] Revert "include/linux/mm_inline.h: fold
			
 
				+ __update_lru_size() into its sole caller"
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+This patch undoes the following refactor: commit 289ccba18af4
			
 
				+("include/linux/mm_inline.h: fold __update_lru_size() into its sole
			
 
				+caller")
			
 
				+
			
 
				+The upcoming changes to include/linux/mm_inline.h will reuse
			
 
				+__update_lru_size().
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Reviewed-by: Miaohe Lin <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Barry Song <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/mm_inline.h | 9 ++++++++-
			
 
				+ 1 file changed, 8 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/include/linux/mm_inline.h
			
 
				++++ b/include/linux/mm_inline.h
			
 
				+@@ -24,7 +24,7 @@ static inline int page_is_file_lru(struc
			
 
				+ 	return !PageSwapBacked(page);
			
 
				+ }
			
 
				+ 
			
 
				+-static __always_inline void update_lru_size(struct lruvec *lruvec,
			
 
				++static __always_inline void __update_lru_size(struct lruvec *lruvec,
			
 
				+ 				enum lru_list lru, enum zone_type zid,
			
 
				+ 				int nr_pages)
			
 
				+ {
			
 
				+@@ -33,6 +33,13 @@ static __always_inline void update_lru_s
			
 
				+ 	__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
			
 
				+ 	__mod_zone_page_state(&pgdat->node_zones[zid],
			
 
				+ 				NR_ZONE_LRU_BASE + lru, nr_pages);
			
 
				++}
			
 
				++
			
 
				++static __always_inline void update_lru_size(struct lruvec *lruvec,
			
 
				++				enum lru_list lru, enum zone_type zid,
			
 
				++				long nr_pages)
			
 
				++{
			
 
				++	__update_lru_size(lruvec, lru, zid, nr_pages);
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				+ 	mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
			
 
				+ #endif
			
--- a/target/linux/generic/backport-6.1/020-v6.1-05-mm-multi-gen-LRU-groundwork.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-05-mm-multi-gen-LRU-groundwork.patch
@@ -0,0 +1,807 @@
 
				+From a9b328add8422921a0dbbef162730800e16e8cfd Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:02 -0600
			
 
				+Subject: [PATCH 05/29] mm: multi-gen LRU: groundwork
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Evictable pages are divided into multiple generations for each lruvec.
			
 
				+The youngest generation number is stored in lrugen->max_seq for both
			
 
				+anon and file types as they are aged on an equal footing. The oldest
			
 
				+generation numbers are stored in lrugen->min_seq[] separately for anon
			
 
				+and file types as clean file pages can be evicted regardless of swap
			
 
				+constraints. These three variables are monotonically increasing.
			
 
				+
			
 
				+Generation numbers are truncated into order_base_2(MAX_NR_GENS+1) bits
			
 
				+in order to fit into the gen counter in page->flags. Each truncated
			
 
				+generation number is an index to lrugen->lists[]. The sliding window
			
 
				+technique is used to track at least MIN_NR_GENS and at most
			
 
				+MAX_NR_GENS generations. The gen counter stores a value within [1,
			
 
				+MAX_NR_GENS] while a page is on one of lrugen->lists[]. Otherwise it
			
 
				+stores 0.
			
 
				+
			
 
				+There are two conceptually independent procedures: "the aging", which
			
 
				+produces young generations, and "the eviction", which consumes old
			
 
				+generations.  They form a closed-loop system, i.e., "the page reclaim".
			
 
				+Both procedures can be invoked from userspace for the purposes of working
			
 
				+set estimation and proactive reclaim.  These techniques are commonly used
			
 
				+to optimize job scheduling (bin packing) in data centers [1][2].
			
 
				+
			
 
				+To avoid confusion, the terms "hot" and "cold" will be applied to the
			
 
				+multi-gen LRU, as a new convention; the terms "active" and "inactive" will
			
 
				+be applied to the active/inactive LRU, as usual.
			
 
				+
			
 
				+The protection of hot pages and the selection of cold pages are based
			
 
				+on page access channels and patterns. There are two access channels:
			
 
				+one through page tables and the other through file descriptors. The
			
 
				+protection of the former channel is by design stronger because:
			
 
				+1. The uncertainty in determining the access patterns of the former
			
 
				+   channel is higher due to the approximation of the accessed bit.
			
 
				+2. The cost of evicting the former channel is higher due to the TLB
			
 
				+   flushes required and the likelihood of encountering the dirty bit.
			
 
				+3. The penalty of underprotecting the former channel is higher because
			
 
				+   applications usually do not prepare themselves for major page
			
 
				+   faults like they do for blocked I/O. E.g., GUI applications
			
 
				+   commonly use dedicated I/O threads to avoid blocking rendering
			
 
				+   threads.
			
 
				+
			
 
				+There are also two access patterns: one with temporal locality and the
			
 
				+other without.  For the reasons listed above, the former channel is
			
 
				+assumed to follow the former pattern unless VM_SEQ_READ or VM_RAND_READ is
			
 
				+present; the latter channel is assumed to follow the latter pattern unless
			
 
				+outlying refaults have been observed [3][4].
			
 
				+
			
 
				+The next patch will address the "outlying refaults".  Three macros, i.e.,
			
 
				+LRU_REFS_WIDTH, LRU_REFS_PGOFF and LRU_REFS_MASK, used later are added in
			
 
				+this patch to make the entire patchset less diffy.
			
 
				+
			
 
				+A page is added to the youngest generation on faulting.  The aging needs
			
 
				+to check the accessed bit at least twice before handing this page over to
			
 
				+the eviction.  The first check takes care of the accessed bit set on the
			
 
				+initial fault; the second check makes sure this page has not been used
			
 
				+since then.  This protocol, AKA second chance, requires a minimum of two
			
 
				+generations, hence MIN_NR_GENS.
			
 
				+
			
 
				+[1] https://dl.acm.org/doi/10.1145/3297858.3304053
			
 
				+[2] https://dl.acm.org/doi/10.1145/3503222.3507731
			
 
				+[3] https://lwn.net/Articles/495543/
			
 
				+[4] https://lwn.net/Articles/815342/
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Barry Song <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ fs/fuse/dev.c                     |   3 +-
			
 
				+ include/linux/mm.h                |   2 +
			
 
				+ include/linux/mm_inline.h         | 177 +++++++++++++++++++++++++++++-
			
 
				+ include/linux/mmzone.h            | 100 +++++++++++++++++
			
 
				+ include/linux/page-flags-layout.h |  13 ++-
			
 
				+ include/linux/page-flags.h        |   4 +-
			
 
				+ include/linux/sched.h             |   4 +
			
 
				+ kernel/bounds.c                   |   5 +
			
 
				+ mm/Kconfig                        |   8 ++
			
 
				+ mm/huge_memory.c                  |   3 +-
			
 
				+ mm/memcontrol.c                   |   2 +
			
 
				+ mm/memory.c                       |  25 +++++
			
 
				+ mm/mm_init.c                      |   6 +-
			
 
				+ mm/mmzone.c                       |   2 +
			
 
				+ mm/swap.c                         |  10 +-
			
 
				+ mm/vmscan.c                       |  75 +++++++++++++
			
 
				+ 16 files changed, 425 insertions(+), 14 deletions(-)
			
 
				+
			
 
				+--- a/fs/fuse/dev.c
			
 
				++++ b/fs/fuse/dev.c
			
 
				+@@ -785,7 +785,8 @@ static int fuse_check_page(struct page *
			
 
				+ 	       1 << PG_active |
			
 
				+ 	       1 << PG_workingset |
			
 
				+ 	       1 << PG_reclaim |
			
 
				+-	       1 << PG_waiters))) {
			
 
				++	       1 << PG_waiters |
			
 
				++	       LRU_GEN_MASK | LRU_REFS_MASK))) {
			
 
				+ 		dump_page(page, "fuse: trying to steal weird page");
			
 
				+ 		return 1;
			
 
				+ 	}
			
 
				+--- a/include/linux/mm.h
			
 
				++++ b/include/linux/mm.h
			
 
				+@@ -1093,6 +1093,8 @@ vm_fault_t finish_mkwrite_fault(struct v
			
 
				+ #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
			
 
				+ #define LAST_CPUPID_PGOFF	(ZONES_PGOFF - LAST_CPUPID_WIDTH)
			
 
				+ #define KASAN_TAG_PGOFF		(LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
			
 
				++#define LRU_GEN_PGOFF		(KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
			
 
				++#define LRU_REFS_PGOFF		(LRU_GEN_PGOFF - LRU_REFS_WIDTH)
			
 
				+ 
			
 
				+ /*
			
 
				+  * Define the bit shifts to access each section.  For non-existent
			
 
				+--- a/include/linux/mm_inline.h
			
 
				++++ b/include/linux/mm_inline.h
			
 
				+@@ -26,10 +26,13 @@ static inline int page_is_file_lru(struc
			
 
				+ 
			
 
				+ static __always_inline void __update_lru_size(struct lruvec *lruvec,
			
 
				+ 				enum lru_list lru, enum zone_type zid,
			
 
				+-				int nr_pages)
			
 
				++				long nr_pages)
			
 
				+ {
			
 
				+ 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
			
 
				+ 
			
 
				++	lockdep_assert_held(&lruvec->lru_lock);
			
 
				++	WARN_ON_ONCE(nr_pages != (int)nr_pages);
			
 
				++
			
 
				+ 	__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
			
 
				+ 	__mod_zone_page_state(&pgdat->node_zones[zid],
			
 
				+ 				NR_ZONE_LRU_BASE + lru, nr_pages);
			
 
				+@@ -86,11 +89,177 @@ static __always_inline enum lru_list pag
			
 
				+ 	return lru;
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++
			
 
				++static inline bool lru_gen_enabled(void)
			
 
				++{
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				++static inline bool lru_gen_in_fault(void)
			
 
				++{
			
 
				++	return current->in_lru_fault;
			
 
				++}
			
 
				++
			
 
				++static inline int lru_gen_from_seq(unsigned long seq)
			
 
				++{
			
 
				++	return seq % MAX_NR_GENS;
			
 
				++}
			
 
				++
			
 
				++static inline int page_lru_gen(struct page *page)
			
 
				++{
			
 
				++	unsigned long flags = READ_ONCE(page->flags);
			
 
				++
			
 
				++	return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
			
 
				++}
			
 
				++
			
 
				++static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
			
 
				++{
			
 
				++	unsigned long max_seq = lruvec->lrugen.max_seq;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
			
 
				++
			
 
				++	/* see the comment on MIN_NR_GENS */
			
 
				++	return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
			
 
				++}
			
 
				++
			
 
				++static inline void lru_gen_update_size(struct lruvec *lruvec, struct page *page,
			
 
				++				       int old_gen, int new_gen)
			
 
				++{
			
 
				++	int type = page_is_file_lru(page);
			
 
				++	int zone = page_zonenum(page);
			
 
				++	int delta = thp_nr_pages(page);
			
 
				++	enum lru_list lru = type * LRU_INACTIVE_FILE;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
			
 
				++	VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
			
 
				++	VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1);
			
 
				++
			
 
				++	if (old_gen >= 0)
			
 
				++		WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone],
			
 
				++			   lrugen->nr_pages[old_gen][type][zone] - delta);
			
 
				++	if (new_gen >= 0)
			
 
				++		WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone],
			
 
				++			   lrugen->nr_pages[new_gen][type][zone] + delta);
			
 
				++
			
 
				++	/* addition */
			
 
				++	if (old_gen < 0) {
			
 
				++		if (lru_gen_is_active(lruvec, new_gen))
			
 
				++			lru += LRU_ACTIVE;
			
 
				++		__update_lru_size(lruvec, lru, zone, delta);
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				++	/* deletion */
			
 
				++	if (new_gen < 0) {
			
 
				++		if (lru_gen_is_active(lruvec, old_gen))
			
 
				++			lru += LRU_ACTIVE;
			
 
				++		__update_lru_size(lruvec, lru, zone, -delta);
			
 
				++		return;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
			
 
				++{
			
 
				++	unsigned long seq;
			
 
				++	unsigned long flags;
			
 
				++	int gen = page_lru_gen(page);
			
 
				++	int type = page_is_file_lru(page);
			
 
				++	int zone = page_zonenum(page);
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE_PAGE(gen != -1, page);
			
 
				++
			
 
				++	if (PageUnevictable(page))
			
 
				++		return false;
			
 
				++	/*
			
 
				++	 * There are three common cases for this page:
			
 
				++	 * 1. If it's hot, e.g., freshly faulted in or previously hot and
			
 
				++	 *    migrated, add it to the youngest generation.
			
 
				++	 * 2. If it's cold but can't be evicted immediately, i.e., an anon page
			
 
				++	 *    not in swapcache or a dirty page pending writeback, add it to the
			
 
				++	 *    second oldest generation.
			
 
				++	 * 3. Everything else (clean, cold) is added to the oldest generation.
			
 
				++	 */
			
 
				++	if (PageActive(page))
			
 
				++		seq = lrugen->max_seq;
			
 
				++	else if ((type == LRU_GEN_ANON && !PageSwapCache(page)) ||
			
 
				++		 (PageReclaim(page) &&
			
 
				++		  (PageDirty(page) || PageWriteback(page))))
			
 
				++		seq = lrugen->min_seq[type] + 1;
			
 
				++	else
			
 
				++		seq = lrugen->min_seq[type];
			
 
				++
			
 
				++	gen = lru_gen_from_seq(seq);
			
 
				++	flags = (gen + 1UL) << LRU_GEN_PGOFF;
			
 
				++	/* see the comment on MIN_NR_GENS about PG_active */
			
 
				++	set_mask_bits(&page->flags, LRU_GEN_MASK | BIT(PG_active), flags);
			
 
				++
			
 
				++	lru_gen_update_size(lruvec, page, -1, gen);
			
 
				++	/* for rotate_reclaimable_page() */
			
 
				++	if (reclaiming)
			
 
				++		list_add_tail(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++	else
			
 
				++		list_add(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				++static inline bool lru_gen_del_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
			
 
				++{
			
 
				++	unsigned long flags;
			
 
				++	int gen = page_lru_gen(page);
			
 
				++
			
 
				++	if (gen < 0)
			
 
				++		return false;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE_PAGE(PageActive(page), page);
			
 
				++	VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page);
			
 
				++
			
 
				++	/* for migrate_page_states() */
			
 
				++	flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0;
			
 
				++	flags = set_mask_bits(&page->flags, LRU_GEN_MASK, flags);
			
 
				++	gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
			
 
				++
			
 
				++	lru_gen_update_size(lruvec, page, gen, -1);
			
 
				++	list_del(&page->lru);
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				++#else /* !CONFIG_LRU_GEN */
			
 
				++
			
 
				++static inline bool lru_gen_enabled(void)
			
 
				++{
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++static inline bool lru_gen_in_fault(void)
			
 
				++{
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
			
 
				++{
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++static inline bool lru_gen_del_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
			
 
				++{
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++#endif /* CONFIG_LRU_GEN */
			
 
				++
			
 
				+ static __always_inline void add_page_to_lru_list(struct page *page,
			
 
				+ 				struct lruvec *lruvec)
			
 
				+ {
			
 
				+ 	enum lru_list lru = page_lru(page);
			
 
				+ 
			
 
				++	if (lru_gen_add_page(lruvec, page, false))
			
 
				++		return;
			
 
				++
			
 
				+ 	update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
			
 
				+ 	list_add(&page->lru, &lruvec->lists[lru]);
			
 
				+ }
			
 
				+@@ -100,6 +269,9 @@ static __always_inline void add_page_to_
			
 
				+ {
			
 
				+ 	enum lru_list lru = page_lru(page);
			
 
				+ 
			
 
				++	if (lru_gen_add_page(lruvec, page, true))
			
 
				++		return;
			
 
				++
			
 
				+ 	update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
			
 
				+ 	list_add_tail(&page->lru, &lruvec->lists[lru]);
			
 
				+ }
			
 
				+@@ -107,6 +279,9 @@ static __always_inline void add_page_to_
			
 
				+ static __always_inline void del_page_from_lru_list(struct page *page,
			
 
				+ 				struct lruvec *lruvec)
			
 
				+ {
			
 
				++	if (lru_gen_del_page(lruvec, page, false))
			
 
				++		return;
			
 
				++
			
 
				+ 	list_del(&page->lru);
			
 
				+ 	update_lru_size(lruvec, page_lru(page), page_zonenum(page),
			
 
				+ 			-thp_nr_pages(page));
			
 
				+--- a/include/linux/mmzone.h
			
 
				++++ b/include/linux/mmzone.h
			
 
				+@@ -294,6 +294,102 @@ enum lruvec_flags {
			
 
				+ 					 */
			
 
				+ };
			
 
				+ 
			
 
				++#endif /* !__GENERATING_BOUNDS_H */
			
 
				++
			
 
				++/*
			
 
				++ * Evictable pages are divided into multiple generations. The youngest and the
			
 
				++ * oldest generation numbers, max_seq and min_seq, are monotonically increasing.
			
 
				++ * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
			
 
				++ * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
			
 
				++ * corresponding generation. The gen counter in page->flags stores gen+1 while
			
 
				++ * a page is on one of lrugen->lists[]. Otherwise it stores 0.
			
 
				++ *
			
 
				++ * A page is added to the youngest generation on faulting. The aging needs to
			
 
				++ * check the accessed bit at least twice before handing this page over to the
			
 
				++ * eviction. The first check takes care of the accessed bit set on the initial
			
 
				++ * fault; the second check makes sure this page hasn't been used since then.
			
 
				++ * This process, AKA second chance, requires a minimum of two generations,
			
 
				++ * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive
			
 
				++ * LRU, e.g., /proc/vmstat, these two generations are considered active; the
			
 
				++ * rest of generations, if they exist, are considered inactive. See
			
 
				++ * lru_gen_is_active().
			
 
				++ *
			
 
				++ * PG_active is always cleared while a page is on one of lrugen->lists[] so that
			
 
				++ * the aging needs not to worry about it. And it's set again when a page
			
 
				++ * considered active is isolated for non-reclaiming purposes, e.g., migration.
			
 
				++ * See lru_gen_add_page() and lru_gen_del_page().
			
 
				++ *
			
 
				++ * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the
			
 
				++ * number of categories of the active/inactive LRU when keeping track of
			
 
				++ * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits
			
 
				++ * in page->flags.
			
 
				++ */
			
 
				++#define MIN_NR_GENS		2U
			
 
				++#define MAX_NR_GENS		4U
			
 
				++
			
 
				++#ifndef __GENERATING_BOUNDS_H
			
 
				++
			
 
				++struct lruvec;
			
 
				++
			
 
				++#define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
			
 
				++#define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
			
 
				++
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++
			
 
				++enum {
			
 
				++	LRU_GEN_ANON,
			
 
				++	LRU_GEN_FILE,
			
 
				++};
			
 
				++
			
 
				++/*
			
 
				++ * The youngest generation number is stored in max_seq for both anon and file
			
 
				++ * types as they are aged on an equal footing. The oldest generation numbers are
			
 
				++ * stored in min_seq[] separately for anon and file types as clean file pages
			
 
				++ * can be evicted regardless of swap constraints.
			
 
				++ *
			
 
				++ * Normally anon and file min_seq are in sync. But if swapping is constrained,
			
 
				++ * e.g., out of swap space, file min_seq is allowed to advance and leave anon
			
 
				++ * min_seq behind.
			
 
				++ *
			
 
				++ * The number of pages in each generation is eventually consistent and therefore
			
 
				++ * can be transiently negative.
			
 
				++ */
			
 
				++struct lru_gen_struct {
			
 
				++	/* the aging increments the youngest generation number */
			
 
				++	unsigned long max_seq;
			
 
				++	/* the eviction increments the oldest generation numbers */
			
 
				++	unsigned long min_seq[ANON_AND_FILE];
			
 
				++	/* the multi-gen LRU lists, lazily sorted on eviction */
			
 
				++	struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
			
 
				++	/* the multi-gen LRU sizes, eventually consistent */
			
 
				++	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
			
 
				++};
			
 
				++
			
 
				++void lru_gen_init_lruvec(struct lruvec *lruvec);
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++void lru_gen_init_memcg(struct mem_cgroup *memcg);
			
 
				++void lru_gen_exit_memcg(struct mem_cgroup *memcg);
			
 
				++#endif
			
 
				++
			
 
				++#else /* !CONFIG_LRU_GEN */
			
 
				++
			
 
				++static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				++#endif /* CONFIG_LRU_GEN */
			
 
				++
			
 
				+ struct lruvec {
			
 
				+ 	struct list_head		lists[NR_LRU_LISTS];
			
 
				+ 	/* per lruvec lru_lock for memcg */
			
 
				+@@ -311,6 +407,10 @@ struct lruvec {
			
 
				+ 	unsigned long			refaults[ANON_AND_FILE];
			
 
				+ 	/* Various lruvec state flags (enum lruvec_flags) */
			
 
				+ 	unsigned long			flags;
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++	/* evictable pages divided into generations */
			
 
				++	struct lru_gen_struct		lrugen;
			
 
				++#endif
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				+ 	struct pglist_data *pgdat;
			
 
				+ #endif
			
 
				+--- a/include/linux/page-flags-layout.h
			
 
				++++ b/include/linux/page-flags-layout.h
			
 
				+@@ -55,7 +55,8 @@
			
 
				+ #define SECTIONS_WIDTH		0
			
 
				+ #endif
			
 
				+ 
			
 
				+-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
			
 
				++#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
			
 
				++	<= BITS_PER_LONG - NR_PAGEFLAGS
			
 
				+ #define NODES_WIDTH		NODES_SHIFT
			
 
				+ #elif defined(CONFIG_SPARSEMEM_VMEMMAP)
			
 
				+ #error "Vmemmap: No space for nodes field in page flags"
			
 
				+@@ -89,8 +90,8 @@
			
 
				+ #define LAST_CPUPID_SHIFT 0
			
 
				+ #endif
			
 
				+ 
			
 
				+-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
			
 
				+-	<= BITS_PER_LONG - NR_PAGEFLAGS
			
 
				++#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
			
 
				++	KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
			
 
				+ #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
			
 
				+ #else
			
 
				+ #define LAST_CPUPID_WIDTH 0
			
 
				+@@ -100,10 +101,12 @@
			
 
				+ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS
			
 
				+ #endif
			
 
				+ 
			
 
				+-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
			
 
				+-	> BITS_PER_LONG - NR_PAGEFLAGS
			
 
				++#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
			
 
				++	KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
			
 
				+ #error "Not enough bits in page flags"
			
 
				+ #endif
			
 
				+ 
			
 
				++#define LRU_REFS_WIDTH	0
			
 
				++
			
 
				+ #endif
			
 
				+ #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
			
 
				+--- a/include/linux/page-flags.h
			
 
				++++ b/include/linux/page-flags.h
			
 
				+@@ -845,7 +845,7 @@ static inline void ClearPageSlabPfmemall
			
 
				+ 	 1UL << PG_private	| 1UL << PG_private_2	|	\
			
 
				+ 	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
			
 
				+ 	 1UL << PG_slab		| 1UL << PG_active 	|	\
			
 
				+-	 1UL << PG_unevictable	| __PG_MLOCKED)
			
 
				++	 1UL << PG_unevictable	| __PG_MLOCKED | LRU_GEN_MASK)
			
 
				+ 
			
 
				+ /*
			
 
				+  * Flags checked when a page is prepped for return by the page allocator.
			
 
				+@@ -856,7 +856,7 @@ static inline void ClearPageSlabPfmemall
			
 
				+  * alloc-free cycle to prevent from reusing the page.
			
 
				+  */
			
 
				+ #define PAGE_FLAGS_CHECK_AT_PREP	\
			
 
				+-	(PAGEFLAGS_MASK & ~__PG_HWPOISON)
			
 
				++	((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
			
 
				+ 
			
 
				+ #define PAGE_FLAGS_PRIVATE				\
			
 
				+ 	(1UL << PG_private | 1UL << PG_private_2)
			
 
				+--- a/include/linux/sched.h
			
 
				++++ b/include/linux/sched.h
			
 
				+@@ -911,6 +911,10 @@ struct task_struct {
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				+ 	unsigned			in_user_fault:1;
			
 
				+ #endif
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++	/* whether the LRU algorithm may apply to this access */
			
 
				++	unsigned			in_lru_fault:1;
			
 
				++#endif
			
 
				+ #ifdef CONFIG_COMPAT_BRK
			
 
				+ 	unsigned			brk_randomized:1;
			
 
				+ #endif
			
 
				+--- a/kernel/bounds.c
			
 
				++++ b/kernel/bounds.c
			
 
				+@@ -22,6 +22,11 @@ int main(void)
			
 
				+ 	DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
			
 
				+ #endif
			
 
				+ 	DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++	DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1));
			
 
				++#else
			
 
				++	DEFINE(LRU_GEN_WIDTH, 0);
			
 
				++#endif
			
 
				+ 	/* End of constants */
			
 
				+ 
			
 
				+ 	return 0;
			
 
				+--- a/mm/Kconfig
			
 
				++++ b/mm/Kconfig
			
 
				+@@ -897,6 +897,14 @@ config IO_MAPPING
			
 
				+ config SECRETMEM
			
 
				+ 	def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
			
 
				+ 
			
 
				++config LRU_GEN
			
 
				++	bool "Multi-Gen LRU"
			
 
				++	depends on MMU
			
 
				++	# make sure page->flags has enough spare bits
			
 
				++	depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
			
 
				++	help
			
 
				++	  A high performance LRU implementation to overcommit memory.
			
 
				++
			
 
				+ source "mm/damon/Kconfig"
			
 
				+ 
			
 
				+ endmenu
			
 
				+--- a/mm/huge_memory.c
			
 
				++++ b/mm/huge_memory.c
			
 
				+@@ -2366,7 +2366,8 @@ static void __split_huge_page_tail(struc
			
 
				+ #ifdef CONFIG_64BIT
			
 
				+ 			 (1L << PG_arch_2) |
			
 
				+ #endif
			
 
				+-			 (1L << PG_dirty)));
			
 
				++			 (1L << PG_dirty) |
			
 
				++			 LRU_GEN_MASK | LRU_REFS_MASK));
			
 
				+ 
			
 
				+ 	/* ->mapping in first tail page is compound_mapcount */
			
 
				+ 	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
			
 
				+--- a/mm/memcontrol.c
			
 
				++++ b/mm/memcontrol.c
			
 
				+@@ -5178,6 +5178,7 @@ static void __mem_cgroup_free(struct mem
			
 
				+ 
			
 
				+ static void mem_cgroup_free(struct mem_cgroup *memcg)
			
 
				+ {
			
 
				++	lru_gen_exit_memcg(memcg);
			
 
				+ 	memcg_wb_domain_exit(memcg);
			
 
				+ 	__mem_cgroup_free(memcg);
			
 
				+ }
			
 
				+@@ -5241,6 +5242,7 @@ static struct mem_cgroup *mem_cgroup_all
			
 
				+ 	memcg->deferred_split_queue.split_queue_len = 0;
			
 
				+ #endif
			
 
				+ 	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
			
 
				++	lru_gen_init_memcg(memcg);
			
 
				+ 	return memcg;
			
 
				+ fail:
			
 
				+ 	mem_cgroup_id_remove(memcg);
			
 
				+--- a/mm/memory.c
			
 
				++++ b/mm/memory.c
			
 
				+@@ -4792,6 +4792,27 @@ static inline void mm_account_fault(stru
			
 
				+ 		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++static void lru_gen_enter_fault(struct vm_area_struct *vma)
			
 
				++{
			
 
				++	/* the LRU algorithm doesn't apply to sequential or random reads */
			
 
				++	current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_exit_fault(void)
			
 
				++{
			
 
				++	current->in_lru_fault = false;
			
 
				++}
			
 
				++#else
			
 
				++static void lru_gen_enter_fault(struct vm_area_struct *vma)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_exit_fault(void)
			
 
				++{
			
 
				++}
			
 
				++#endif /* CONFIG_LRU_GEN */
			
 
				++
			
 
				+ /*
			
 
				+  * By the time we get here, we already hold the mm semaphore
			
 
				+  *
			
 
				+@@ -4823,11 +4844,15 @@ vm_fault_t handle_mm_fault(struct vm_are
			
 
				+ 	if (flags & FAULT_FLAG_USER)
			
 
				+ 		mem_cgroup_enter_user_fault();
			
 
				+ 
			
 
				++	lru_gen_enter_fault(vma);
			
 
				++
			
 
				+ 	if (unlikely(is_vm_hugetlb_page(vma)))
			
 
				+ 		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
			
 
				+ 	else
			
 
				+ 		ret = __handle_mm_fault(vma, address, flags);
			
 
				+ 
			
 
				++	lru_gen_exit_fault();
			
 
				++
			
 
				+ 	if (flags & FAULT_FLAG_USER) {
			
 
				+ 		mem_cgroup_exit_user_fault();
			
 
				+ 		/*
			
 
				+--- a/mm/mm_init.c
			
 
				++++ b/mm/mm_init.c
			
 
				+@@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layo
			
 
				+ 
			
 
				+ 	shift = 8 * sizeof(unsigned long);
			
 
				+ 	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
			
 
				+-		- LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
			
 
				++		- LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
			
 
				+ 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
			
 
				+-		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
			
 
				++		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
			
 
				+ 		SECTIONS_WIDTH,
			
 
				+ 		NODES_WIDTH,
			
 
				+ 		ZONES_WIDTH,
			
 
				+ 		LAST_CPUPID_WIDTH,
			
 
				+ 		KASAN_TAG_WIDTH,
			
 
				++		LRU_GEN_WIDTH,
			
 
				++		LRU_REFS_WIDTH,
			
 
				+ 		NR_PAGEFLAGS);
			
 
				+ 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
			
 
				+ 		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
			
 
				+--- a/mm/mmzone.c
			
 
				++++ b/mm/mmzone.c
			
 
				+@@ -81,6 +81,8 @@ void lruvec_init(struct lruvec *lruvec)
			
 
				+ 
			
 
				+ 	for_each_lru(lru)
			
 
				+ 		INIT_LIST_HEAD(&lruvec->lists[lru]);
			
 
				++
			
 
				++	lru_gen_init_lruvec(lruvec);
			
 
				+ }
			
 
				+ 
			
 
				+ #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
			
 
				+--- a/mm/swap.c
			
 
				++++ b/mm/swap.c
			
 
				+@@ -446,6 +446,11 @@ void lru_cache_add(struct page *page)
			
 
				+ 	VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
			
 
				+ 	VM_BUG_ON_PAGE(PageLRU(page), page);
			
 
				+ 
			
 
				++	/* see the comment in lru_gen_add_page() */
			
 
				++	if (lru_gen_enabled() && !PageUnevictable(page) &&
			
 
				++	    lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
			
 
				++		SetPageActive(page);
			
 
				++
			
 
				+ 	get_page(page);
			
 
				+ 	local_lock(&lru_pvecs.lock);
			
 
				+ 	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
			
 
				+@@ -547,7 +552,7 @@ static void lru_deactivate_file_fn(struc
			
 
				+ 
			
 
				+ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
			
 
				+ {
			
 
				+-	if (PageActive(page) && !PageUnevictable(page)) {
			
 
				++	if (!PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
			
 
				+ 		int nr_pages = thp_nr_pages(page);
			
 
				+ 
			
 
				+ 		del_page_from_lru_list(page, lruvec);
			
 
				+@@ -661,7 +666,8 @@ void deactivate_file_page(struct page *p
			
 
				+  */
			
 
				+ void deactivate_page(struct page *page)
			
 
				+ {
			
 
				+-	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
			
 
				++	if (PageLRU(page) && !PageUnevictable(page) &&
			
 
				++	    (PageActive(page) || lru_gen_enabled())) {
			
 
				+ 		struct pagevec *pvec;
			
 
				+ 
			
 
				+ 		local_lock(&lru_pvecs.lock);
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -2821,6 +2821,81 @@ static bool can_age_anon_pages(struct pg
			
 
				+ 	return can_demote(pgdat->node_id, sc);
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++
			
 
				++/******************************************************************************
			
 
				++ *                          shorthand helpers
			
 
				++ ******************************************************************************/
			
 
				++
			
 
				++#define for_each_gen_type_zone(gen, type, zone)				\
			
 
				++	for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++)			\
			
 
				++		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
			
 
				++			for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
			
 
				++
			
 
				++static struct lruvec __maybe_unused *get_lruvec(struct mem_cgroup *memcg, int nid)
			
 
				++{
			
 
				++	struct pglist_data *pgdat = NODE_DATA(nid);
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++	if (memcg) {
			
 
				++		struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
			
 
				++
			
 
				++		/* for hotadd_new_pgdat() */
			
 
				++		if (!lruvec->pgdat)
			
 
				++			lruvec->pgdat = pgdat;
			
 
				++
			
 
				++		return lruvec;
			
 
				++	}
			
 
				++#endif
			
 
				++	VM_WARN_ON_ONCE(!mem_cgroup_disabled());
			
 
				++
			
 
				++	return pgdat ? &pgdat->__lruvec : NULL;
			
 
				++}
			
 
				++
			
 
				++/******************************************************************************
			
 
				++ *                          initialization
			
 
				++ ******************************************************************************/
			
 
				++
			
 
				++void lru_gen_init_lruvec(struct lruvec *lruvec)
			
 
				++{
			
 
				++	int gen, type, zone;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	lrugen->max_seq = MIN_NR_GENS + 1;
			
 
				++
			
 
				++	for_each_gen_type_zone(gen, type, zone)
			
 
				++		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
			
 
				++}
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++void lru_gen_init_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++void lru_gen_exit_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++	int nid;
			
 
				++
			
 
				++	for_each_node(nid) {
			
 
				++		struct lruvec *lruvec = get_lruvec(memcg, nid);
			
 
				++
			
 
				++		VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
			
 
				++					   sizeof(lruvec->lrugen.nr_pages)));
			
 
				++	}
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				++static int __init init_lru_gen(void)
			
 
				++{
			
 
				++	BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
			
 
				++	BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
			
 
				++
			
 
				++	return 0;
			
 
				++};
			
 
				++late_initcall(init_lru_gen);
			
 
				++
			
 
				++#endif /* CONFIG_LRU_GEN */
			
 
				++
			
 
				+ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				+ {
			
 
				+ 	unsigned long nr[NR_LRU_LISTS];
			
--- a/target/linux/generic/backport-6.1/020-v6.1-06-mm-multi-gen-LRU-minimal-implementation.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-06-mm-multi-gen-LRU-minimal-implementation.patch
@@ -0,0 +1,1447 @@
 
				+From b564b9471cd60ef1ee3961a224898ce4a9620d84 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:03 -0600
			
 
				+Subject: [PATCH 06/29] mm: multi-gen LRU: minimal implementation
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+To avoid confusion, the terms "promotion" and "demotion" will be applied
			
 
				+to the multi-gen LRU, as a new convention; the terms "activation" and
			
 
				+"deactivation" will be applied to the active/inactive LRU, as usual.
			
 
				+
			
 
				+The aging produces young generations.  Given an lruvec, it increments
			
 
				+max_seq when max_seq-min_seq+1 approaches MIN_NR_GENS.  The aging promotes
			
 
				+hot pages to the youngest generation when it finds them accessed through
			
 
				+page tables; the demotion of cold pages happens consequently when it
			
 
				+increments max_seq.  Promotion in the aging path does not involve any LRU
			
 
				+list operations, only the updates of the gen counter and
			
 
				+lrugen->nr_pages[]; demotion, unless as the result of the increment of
			
 
				+max_seq, requires LRU list operations, e.g., lru_deactivate_fn().  The
			
 
				+aging has the complexity O(nr_hot_pages), since it is only interested in
			
 
				+hot pages.
			
 
				+
			
 
				+The eviction consumes old generations.  Given an lruvec, it increments
			
 
				+min_seq when lrugen->lists[] indexed by min_seq%MAX_NR_GENS becomes empty.
			
 
				+A feedback loop modeled after the PID controller monitors refaults over
			
 
				+anon and file types and decides which type to evict when both types are
			
 
				+available from the same generation.
			
 
				+
			
 
				+The protection of pages accessed multiple times through file descriptors
			
 
				+takes place in the eviction path.  Each generation is divided into
			
 
				+multiple tiers.  A page accessed N times through file descriptors is in
			
 
				+tier order_base_2(N).  Tiers do not have dedicated lrugen->lists[], only
			
 
				+bits in page->flags.  The aforementioned feedback loop also monitors
			
 
				+refaults over all tiers and decides when to protect pages in which tiers
			
 
				+(N>1), using the first tier (N=0,1) as a baseline.  The first tier
			
 
				+contains single-use unmapped clean pages, which are most likely the best
			
 
				+choices.  In contrast to promotion in the aging path, the protection of a
			
 
				+page in the eviction path is achieved by moving this page to the next
			
 
				+generation, i.e., min_seq+1, if the feedback loop decides so.  This
			
 
				+approach has the following advantages:
			
 
				+
			
 
				+1. It removes the cost of activation in the buffered access path by
			
 
				+   inferring whether pages accessed multiple times through file
			
 
				+   descriptors are statistically hot and thus worth protecting in the
			
 
				+   eviction path.
			
 
				+2. It takes pages accessed through page tables into account and avoids
			
 
				+   overprotecting pages accessed multiple times through file
			
 
				+   descriptors. (Pages accessed through page tables are in the first
			
 
				+   tier, since N=0.)
			
 
				+3. More tiers provide better protection for pages accessed more than
			
 
				+   twice through file descriptors, when under heavy buffered I/O
			
 
				+   workloads.
			
 
				+
			
 
				+Server benchmark results:
			
 
				+  Single workload:
			
 
				+    fio (buffered I/O): +[30, 32]%
			
 
				+                IOPS         BW
			
 
				+      5.19-rc1: 2673k        10.2GiB/s
			
 
				+      patch1-6: 3491k        13.3GiB/s
			
 
				+
			
 
				+  Single workload:
			
 
				+    memcached (anon): -[4, 6]%
			
 
				+                Ops/sec      KB/sec
			
 
				+      5.19-rc1: 1161501.04   45177.25
			
 
				+      patch1-6: 1106168.46   43025.04
			
 
				+
			
 
				+  Configurations:
			
 
				+    CPU: two Xeon 6154
			
 
				+    Mem: total 256G
			
 
				+
			
 
				+    Node 1 was only used as a ram disk to reduce the variance in the
			
 
				+    results.
			
 
				+
			
 
				+    patch drivers/block/brd.c <<EOF
			
 
				+    99,100c99,100
			
 
				+    < 	gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
			
 
				+    < 	page = alloc_page(gfp_flags);
			
 
				+    ---
			
 
				+    > 	gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM | __GFP_THISNODE;
			
 
				+    > 	page = alloc_pages_node(1, gfp_flags, 0);
			
 
				+    EOF
			
 
				+
			
 
				+    cat >>/etc/systemd/system.conf <<EOF
			
 
				+    CPUAffinity=numa
			
 
				+    NUMAPolicy=bind
			
 
				+    NUMAMask=0
			
 
				+    EOF
			
 
				+
			
 
				+    cat >>/etc/memcached.conf <<EOF
			
 
				+    -m 184320
			
 
				+    -s /var/run/memcached/memcached.sock
			
 
				+    -a 0766
			
 
				+    -t 36
			
 
				+    -B binary
			
 
				+    EOF
			
 
				+
			
 
				+    cat fio.sh
			
 
				+    modprobe brd rd_nr=1 rd_size=113246208
			
 
				+    swapoff -a
			
 
				+    mkfs.ext4 /dev/ram0
			
 
				+    mount -t ext4 /dev/ram0 /mnt
			
 
				+
			
 
				+    mkdir /sys/fs/cgroup/user.slice/test
			
 
				+    echo 38654705664 >/sys/fs/cgroup/user.slice/test/memory.max
			
 
				+    echo $$ >/sys/fs/cgroup/user.slice/test/cgroup.procs
			
 
				+    fio -name=mglru --numjobs=72 --directory=/mnt --size=1408m \
			
 
				+      --buffered=1 --ioengine=io_uring --iodepth=128 \
			
 
				+      --iodepth_batch_submit=32 --iodepth_batch_complete=32 \
			
 
				+      --rw=randread --random_distribution=random --norandommap \
			
 
				+      --time_based --ramp_time=10m --runtime=5m --group_reporting
			
 
				+
			
 
				+    cat memcached.sh
			
 
				+    modprobe brd rd_nr=1 rd_size=113246208
			
 
				+    swapoff -a
			
 
				+    mkswap /dev/ram0
			
 
				+    swapon /dev/ram0
			
 
				+
			
 
				+    memtier_benchmark -S /var/run/memcached/memcached.sock \
			
 
				+      -P memcache_binary -n allkeys --key-minimum=1 \
			
 
				+      --key-maximum=65000000 --key-pattern=P:P -c 1 -t 36 \
			
 
				+      --ratio 1:0 --pipeline 8 -d 2000
			
 
				+
			
 
				+    memtier_benchmark -S /var/run/memcached/memcached.sock \
			
 
				+      -P memcache_binary -n allkeys --key-minimum=1 \
			
 
				+      --key-maximum=65000000 --key-pattern=R:R -c 1 -t 36 \
			
 
				+      --ratio 0:1 --pipeline 8 --randomize --distinct-client-seed
			
 
				+
			
 
				+Client benchmark results:
			
 
				+  kswapd profiles:
			
 
				+    5.19-rc1
			
 
				+      40.33%  page_vma_mapped_walk (overhead)
			
 
				+      21.80%  lzo1x_1_do_compress (real work)
			
 
				+       7.53%  do_raw_spin_lock
			
 
				+       3.95%  _raw_spin_unlock_irq
			
 
				+       2.52%  vma_interval_tree_iter_next
			
 
				+       2.37%  page_referenced_one
			
 
				+       2.28%  vma_interval_tree_subtree_search
			
 
				+       1.97%  anon_vma_interval_tree_iter_first
			
 
				+       1.60%  ptep_clear_flush
			
 
				+       1.06%  __zram_bvec_write
			
 
				+
			
 
				+    patch1-6
			
 
				+      39.03%  lzo1x_1_do_compress (real work)
			
 
				+      18.47%  page_vma_mapped_walk (overhead)
			
 
				+       6.74%  _raw_spin_unlock_irq
			
 
				+       3.97%  do_raw_spin_lock
			
 
				+       2.49%  ptep_clear_flush
			
 
				+       2.48%  anon_vma_interval_tree_iter_first
			
 
				+       1.92%  page_referenced_one
			
 
				+       1.88%  __zram_bvec_write
			
 
				+       1.48%  memmove
			
 
				+       1.31%  vma_interval_tree_iter_next
			
 
				+
			
 
				+  Configurations:
			
 
				+    CPU: single Snapdragon 7c
			
 
				+    Mem: total 4G
			
 
				+
			
 
				+    ChromeOS MemoryPressure [1]
			
 
				+
			
 
				+[1] https://chromium.googlesource.com/chromiumos/platform/tast-tests/
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Barry Song <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/mm_inline.h         |  36 ++
			
 
				+ include/linux/mmzone.h            |  41 ++
			
 
				+ include/linux/page-flags-layout.h |   5 +-
			
 
				+ kernel/bounds.c                   |   2 +
			
 
				+ mm/Kconfig                        |  11 +
			
 
				+ mm/swap.c                         |  39 ++
			
 
				+ mm/vmscan.c                       | 792 +++++++++++++++++++++++++++++-
			
 
				+ mm/workingset.c                   | 110 ++++-
			
 
				+ 8 files changed, 1025 insertions(+), 11 deletions(-)
			
 
				+
			
 
				+--- a/include/linux/mm_inline.h
			
 
				++++ b/include/linux/mm_inline.h
			
 
				+@@ -106,6 +106,33 @@ static inline int lru_gen_from_seq(unsig
			
 
				+ 	return seq % MAX_NR_GENS;
			
 
				+ }
			
 
				+ 
			
 
				++static inline int lru_hist_from_seq(unsigned long seq)
			
 
				++{
			
 
				++	return seq % NR_HIST_GENS;
			
 
				++}
			
 
				++
			
 
				++static inline int lru_tier_from_refs(int refs)
			
 
				++{
			
 
				++	VM_WARN_ON_ONCE(refs > BIT(LRU_REFS_WIDTH));
			
 
				++
			
 
				++	/* see the comment in page_lru_refs() */
			
 
				++	return order_base_2(refs + 1);
			
 
				++}
			
 
				++
			
 
				++static inline int page_lru_refs(struct page *page)
			
 
				++{
			
 
				++	unsigned long flags = READ_ONCE(page->flags);
			
 
				++	bool workingset = flags & BIT(PG_workingset);
			
 
				++
			
 
				++	/*
			
 
				++	 * Return the number of accesses beyond PG_referenced, i.e., N-1 if the
			
 
				++	 * total number of accesses is N>1, since N=0,1 both map to the first
			
 
				++	 * tier. lru_tier_from_refs() will account for this off-by-one. Also see
			
 
				++	 * the comment on MAX_NR_TIERS.
			
 
				++	 */
			
 
				++	return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset;
			
 
				++}
			
 
				++
			
 
				+ static inline int page_lru_gen(struct page *page)
			
 
				+ {
			
 
				+ 	unsigned long flags = READ_ONCE(page->flags);
			
 
				+@@ -158,6 +185,15 @@ static inline void lru_gen_update_size(s
			
 
				+ 		__update_lru_size(lruvec, lru, zone, -delta);
			
 
				+ 		return;
			
 
				+ 	}
			
 
				++
			
 
				++	/* promotion */
			
 
				++	if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) {
			
 
				++		__update_lru_size(lruvec, lru, zone, -delta);
			
 
				++		__update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta);
			
 
				++	}
			
 
				++
			
 
				++	/* demotion requires isolation, e.g., lru_deactivate_fn() */
			
 
				++	VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
			
 
				+ }
			
 
				+ 
			
 
				+ static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
			
 
				+--- a/include/linux/mmzone.h
			
 
				++++ b/include/linux/mmzone.h
			
 
				+@@ -327,6 +327,28 @@ enum lruvec_flags {
			
 
				+ #define MIN_NR_GENS		2U
			
 
				+ #define MAX_NR_GENS		4U
			
 
				+ 
			
 
				++/*
			
 
				++ * Each generation is divided into multiple tiers. A page accessed N times
			
 
				++ * through file descriptors is in tier order_base_2(N). A page in the first tier
			
 
				++ * (N=0,1) is marked by PG_referenced unless it was faulted in through page
			
 
				++ * tables or read ahead. A page in any other tier (N>1) is marked by
			
 
				++ * PG_referenced and PG_workingset. This implies a minimum of two tiers is
			
 
				++ * supported without using additional bits in page->flags.
			
 
				++ *
			
 
				++ * In contrast to moving across generations which requires the LRU lock, moving
			
 
				++ * across tiers only involves atomic operations on page->flags and therefore
			
 
				++ * has a negligible cost in the buffered access path. In the eviction path,
			
 
				++ * comparisons of refaulted/(evicted+protected) from the first tier and the
			
 
				++ * rest infer whether pages accessed multiple times through file descriptors
			
 
				++ * are statistically hot and thus worth protecting.
			
 
				++ *
			
 
				++ * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the
			
 
				++ * number of categories of the active/inactive LRU when keeping track of
			
 
				++ * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in
			
 
				++ * page->flags.
			
 
				++ */
			
 
				++#define MAX_NR_TIERS		4U
			
 
				++
			
 
				+ #ifndef __GENERATING_BOUNDS_H
			
 
				+ 
			
 
				+ struct lruvec;
			
 
				+@@ -341,6 +363,16 @@ enum {
			
 
				+ 	LRU_GEN_FILE,
			
 
				+ };
			
 
				+ 
			
 
				++#define MIN_LRU_BATCH		BITS_PER_LONG
			
 
				++#define MAX_LRU_BATCH		(MIN_LRU_BATCH * 64)
			
 
				++
			
 
				++/* whether to keep historical stats from evicted generations */
			
 
				++#ifdef CONFIG_LRU_GEN_STATS
			
 
				++#define NR_HIST_GENS		MAX_NR_GENS
			
 
				++#else
			
 
				++#define NR_HIST_GENS		1U
			
 
				++#endif
			
 
				++
			
 
				+ /*
			
 
				+  * The youngest generation number is stored in max_seq for both anon and file
			
 
				+  * types as they are aged on an equal footing. The oldest generation numbers are
			
 
				+@@ -363,6 +395,15 @@ struct lru_gen_struct {
			
 
				+ 	struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
			
 
				+ 	/* the multi-gen LRU sizes, eventually consistent */
			
 
				+ 	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
			
 
				++	/* the exponential moving average of refaulted */
			
 
				++	unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
			
 
				++	/* the exponential moving average of evicted+protected */
			
 
				++	unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
			
 
				++	/* the first tier doesn't need protection, hence the minus one */
			
 
				++	unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
			
 
				++	/* can be modified without holding the LRU lock */
			
 
				++	atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
			
 
				++	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
			
 
				+ };
			
 
				+ 
			
 
				+ void lru_gen_init_lruvec(struct lruvec *lruvec);
			
 
				+--- a/include/linux/page-flags-layout.h
			
 
				++++ b/include/linux/page-flags-layout.h
			
 
				+@@ -106,7 +106,10 @@
			
 
				+ #error "Not enough bits in page flags"
			
 
				+ #endif
			
 
				+ 
			
 
				+-#define LRU_REFS_WIDTH	0
			
 
				++/* see the comment on MAX_NR_TIERS */
			
 
				++#define LRU_REFS_WIDTH	min(__LRU_REFS_WIDTH, BITS_PER_LONG - NR_PAGEFLAGS - \
			
 
				++			    ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \
			
 
				++			    NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH)
			
 
				+ 
			
 
				+ #endif
			
 
				+ #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
			
 
				+--- a/kernel/bounds.c
			
 
				++++ b/kernel/bounds.c
			
 
				+@@ -24,8 +24,10 @@ int main(void)
			
 
				+ 	DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
			
 
				+ #ifdef CONFIG_LRU_GEN
			
 
				+ 	DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1));
			
 
				++	DEFINE(__LRU_REFS_WIDTH, MAX_NR_TIERS - 2);
			
 
				+ #else
			
 
				+ 	DEFINE(LRU_GEN_WIDTH, 0);
			
 
				++	DEFINE(__LRU_REFS_WIDTH, 0);
			
 
				+ #endif
			
 
				+ 	/* End of constants */
			
 
				+ 
			
 
				+--- a/mm/Kconfig
			
 
				++++ b/mm/Kconfig
			
 
				+@@ -897,6 +897,7 @@ config IO_MAPPING
			
 
				+ config SECRETMEM
			
 
				+ 	def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
			
 
				+ 
			
 
				++# multi-gen LRU {
			
 
				+ config LRU_GEN
			
 
				+ 	bool "Multi-Gen LRU"
			
 
				+ 	depends on MMU
			
 
				+@@ -905,6 +906,16 @@ config LRU_GEN
			
 
				+ 	help
			
 
				+ 	  A high performance LRU implementation to overcommit memory.
			
 
				+ 
			
 
				++config LRU_GEN_STATS
			
 
				++	bool "Full stats for debugging"
			
 
				++	depends on LRU_GEN
			
 
				++	help
			
 
				++	  Do not enable this option unless you plan to look at historical stats
			
 
				++	  from evicted generations for debugging purpose.
			
 
				++
			
 
				++	  This option has a per-memcg and per-node memory overhead.
			
 
				++# }
			
 
				++
			
 
				+ source "mm/damon/Kconfig"
			
 
				+ 
			
 
				+ endmenu
			
 
				+--- a/mm/swap.c
			
 
				++++ b/mm/swap.c
			
 
				+@@ -389,6 +389,40 @@ static void __lru_cache_activate_page(st
			
 
				+ 	local_unlock(&lru_pvecs.lock);
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++static void page_inc_refs(struct page *page)
			
 
				++{
			
 
				++	unsigned long new_flags, old_flags = READ_ONCE(page->flags);
			
 
				++
			
 
				++	if (PageUnevictable(page))
			
 
				++		return;
			
 
				++
			
 
				++	if (!PageReferenced(page)) {
			
 
				++		SetPageReferenced(page);
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				++	if (!PageWorkingset(page)) {
			
 
				++		SetPageWorkingset(page);
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				++	/* see the comment on MAX_NR_TIERS */
			
 
				++	do {
			
 
				++		new_flags = old_flags & LRU_REFS_MASK;
			
 
				++		if (new_flags == LRU_REFS_MASK)
			
 
				++			break;
			
 
				++
			
 
				++		new_flags += BIT(LRU_REFS_PGOFF);
			
 
				++		new_flags |= old_flags & ~LRU_REFS_MASK;
			
 
				++	} while (!try_cmpxchg(&page->flags, &old_flags, new_flags));
			
 
				++}
			
 
				++#else
			
 
				++static void page_inc_refs(struct page *page)
			
 
				++{
			
 
				++}
			
 
				++#endif /* CONFIG_LRU_GEN */
			
 
				++
			
 
				+ /*
			
 
				+  * Mark a page as having seen activity.
			
 
				+  *
			
 
				+@@ -403,6 +437,11 @@ void mark_page_accessed(struct page *pag
			
 
				+ {
			
 
				+ 	page = compound_head(page);
			
 
				+ 
			
 
				++	if (lru_gen_enabled()) {
			
 
				++		page_inc_refs(page);
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				+ 	if (!PageReferenced(page)) {
			
 
				+ 		SetPageReferenced(page);
			
 
				+ 	} else if (PageUnevictable(page)) {
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -1142,9 +1142,11 @@ static int __remove_mapping(struct addre
			
 
				+ 
			
 
				+ 	if (PageSwapCache(page)) {
			
 
				+ 		swp_entry_t swap = { .val = page_private(page) };
			
 
				+-		mem_cgroup_swapout(page, swap);
			
 
				++
			
 
				++		/* get a shadow entry before mem_cgroup_swapout() clears page_memcg() */
			
 
				+ 		if (reclaimed && !mapping_exiting(mapping))
			
 
				+ 			shadow = workingset_eviction(page, target_memcg);
			
 
				++		mem_cgroup_swapout(page, swap);
			
 
				+ 		__delete_from_swap_cache(page, swap, shadow);
			
 
				+ 		xa_unlock_irq(&mapping->i_pages);
			
 
				+ 		put_swap_page(page, swap);
			
 
				+@@ -2502,6 +2504,9 @@ static void prepare_scan_count(pg_data_t
			
 
				+ 	unsigned long file;
			
 
				+ 	struct lruvec *target_lruvec;
			
 
				+ 
			
 
				++	if (lru_gen_enabled())
			
 
				++		return;
			
 
				++
			
 
				+ 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
			
 
				+ 
			
 
				+ 	/*
			
 
				+@@ -2827,6 +2832,17 @@ static bool can_age_anon_pages(struct pg
			
 
				+  *                          shorthand helpers
			
 
				+  ******************************************************************************/
			
 
				+ 
			
 
				++#define LRU_REFS_FLAGS	(BIT(PG_referenced) | BIT(PG_workingset))
			
 
				++
			
 
				++#define DEFINE_MAX_SEQ(lruvec)						\
			
 
				++	unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq)
			
 
				++
			
 
				++#define DEFINE_MIN_SEQ(lruvec)						\
			
 
				++	unsigned long min_seq[ANON_AND_FILE] = {			\
			
 
				++		READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_ANON]),	\
			
 
				++		READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]),	\
			
 
				++	}
			
 
				++
			
 
				+ #define for_each_gen_type_zone(gen, type, zone)				\
			
 
				+ 	for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++)			\
			
 
				+ 		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
			
 
				+@@ -2852,6 +2868,745 @@ static struct lruvec __maybe_unused *get
			
 
				+ 	return pgdat ? &pgdat->__lruvec : NULL;
			
 
				+ }
			
 
				+ 
			
 
				++static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
			
 
				++{
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
			
 
				++
			
 
				++	if (!can_demote(pgdat->node_id, sc) &&
			
 
				++	    mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
			
 
				++		return 0;
			
 
				++
			
 
				++	return mem_cgroup_swappiness(memcg);
			
 
				++}
			
 
				++
			
 
				++static int get_nr_gens(struct lruvec *lruvec, int type)
			
 
				++{
			
 
				++	return lruvec->lrugen.max_seq - lruvec->lrugen.min_seq[type] + 1;
			
 
				++}
			
 
				++
			
 
				++static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
			
 
				++{
			
 
				++	/* see the comment on lru_gen_struct */
			
 
				++	return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
			
 
				++	       get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
			
 
				++	       get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
			
 
				++}
			
 
				++
			
 
				++/******************************************************************************
			
 
				++ *                          refault feedback loop
			
 
				++ ******************************************************************************/
			
 
				++
			
 
				++/*
			
 
				++ * A feedback loop based on Proportional-Integral-Derivative (PID) controller.
			
 
				++ *
			
 
				++ * The P term is refaulted/(evicted+protected) from a tier in the generation
			
 
				++ * currently being evicted; the I term is the exponential moving average of the
			
 
				++ * P term over the generations previously evicted, using the smoothing factor
			
 
				++ * 1/2; the D term isn't supported.
			
 
				++ *
			
 
				++ * The setpoint (SP) is always the first tier of one type; the process variable
			
 
				++ * (PV) is either any tier of the other type or any other tier of the same
			
 
				++ * type.
			
 
				++ *
			
 
				++ * The error is the difference between the SP and the PV; the correction is to
			
 
				++ * turn off protection when SP>PV or turn on protection when SP<PV.
			
 
				++ *
			
 
				++ * For future optimizations:
			
 
				++ * 1. The D term may discount the other two terms over time so that long-lived
			
 
				++ *    generations can resist stale information.
			
 
				++ */
			
 
				++struct ctrl_pos {
			
 
				++	unsigned long refaulted;
			
 
				++	unsigned long total;
			
 
				++	int gain;
			
 
				++};
			
 
				++
			
 
				++static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
			
 
				++			  struct ctrl_pos *pos)
			
 
				++{
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	int hist = lru_hist_from_seq(lrugen->min_seq[type]);
			
 
				++
			
 
				++	pos->refaulted = lrugen->avg_refaulted[type][tier] +
			
 
				++			 atomic_long_read(&lrugen->refaulted[hist][type][tier]);
			
 
				++	pos->total = lrugen->avg_total[type][tier] +
			
 
				++		     atomic_long_read(&lrugen->evicted[hist][type][tier]);
			
 
				++	if (tier)
			
 
				++		pos->total += lrugen->protected[hist][type][tier - 1];
			
 
				++	pos->gain = gain;
			
 
				++}
			
 
				++
			
 
				++static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
			
 
				++{
			
 
				++	int hist, tier;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
			
 
				++	unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
			
 
				++
			
 
				++	lockdep_assert_held(&lruvec->lru_lock);
			
 
				++
			
 
				++	if (!carryover && !clear)
			
 
				++		return;
			
 
				++
			
 
				++	hist = lru_hist_from_seq(seq);
			
 
				++
			
 
				++	for (tier = 0; tier < MAX_NR_TIERS; tier++) {
			
 
				++		if (carryover) {
			
 
				++			unsigned long sum;
			
 
				++
			
 
				++			sum = lrugen->avg_refaulted[type][tier] +
			
 
				++			      atomic_long_read(&lrugen->refaulted[hist][type][tier]);
			
 
				++			WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
			
 
				++
			
 
				++			sum = lrugen->avg_total[type][tier] +
			
 
				++			      atomic_long_read(&lrugen->evicted[hist][type][tier]);
			
 
				++			if (tier)
			
 
				++				sum += lrugen->protected[hist][type][tier - 1];
			
 
				++			WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
			
 
				++		}
			
 
				++
			
 
				++		if (clear) {
			
 
				++			atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
			
 
				++			atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
			
 
				++			if (tier)
			
 
				++				WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
			
 
				++		}
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
			
 
				++{
			
 
				++	/*
			
 
				++	 * Return true if the PV has a limited number of refaults or a lower
			
 
				++	 * refaulted/total than the SP.
			
 
				++	 */
			
 
				++	return pv->refaulted < MIN_LRU_BATCH ||
			
 
				++	       pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <=
			
 
				++	       (sp->refaulted + 1) * pv->total * pv->gain;
			
 
				++}
			
 
				++
			
 
				++/******************************************************************************
			
 
				++ *                          the aging
			
 
				++ ******************************************************************************/
			
 
				++
			
 
				++/* protect pages accessed multiple times through file descriptors */
			
 
				++static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming)
			
 
				++{
			
 
				++	int type = page_is_file_lru(page);
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
			
 
				++	unsigned long new_flags, old_flags = READ_ONCE(page->flags);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE_PAGE(!(old_flags & LRU_GEN_MASK), page);
			
 
				++
			
 
				++	do {
			
 
				++		new_gen = (old_gen + 1) % MAX_NR_GENS;
			
 
				++
			
 
				++		new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
			
 
				++		new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF;
			
 
				++		/* for end_page_writeback() */
			
 
				++		if (reclaiming)
			
 
				++			new_flags |= BIT(PG_reclaim);
			
 
				++	} while (!try_cmpxchg(&page->flags, &old_flags, new_flags));
			
 
				++
			
 
				++	lru_gen_update_size(lruvec, page, old_gen, new_gen);
			
 
				++
			
 
				++	return new_gen;
			
 
				++}
			
 
				++
			
 
				++static void inc_min_seq(struct lruvec *lruvec, int type)
			
 
				++{
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	reset_ctrl_pos(lruvec, type, true);
			
 
				++	WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
			
 
				++}
			
 
				++
			
 
				++static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
			
 
				++{
			
 
				++	int gen, type, zone;
			
 
				++	bool success = false;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	DEFINE_MIN_SEQ(lruvec);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
			
 
				++
			
 
				++	/* find the oldest populated generation */
			
 
				++	for (type = !can_swap; type < ANON_AND_FILE; type++) {
			
 
				++		while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) {
			
 
				++			gen = lru_gen_from_seq(min_seq[type]);
			
 
				++
			
 
				++			for (zone = 0; zone < MAX_NR_ZONES; zone++) {
			
 
				++				if (!list_empty(&lrugen->lists[gen][type][zone]))
			
 
				++					goto next;
			
 
				++			}
			
 
				++
			
 
				++			min_seq[type]++;
			
 
				++		}
			
 
				++next:
			
 
				++		;
			
 
				++	}
			
 
				++
			
 
				++	/* see the comment on lru_gen_struct */
			
 
				++	if (can_swap) {
			
 
				++		min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
			
 
				++		min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
			
 
				++	}
			
 
				++
			
 
				++	for (type = !can_swap; type < ANON_AND_FILE; type++) {
			
 
				++		if (min_seq[type] == lrugen->min_seq[type])
			
 
				++			continue;
			
 
				++
			
 
				++		reset_ctrl_pos(lruvec, type, true);
			
 
				++		WRITE_ONCE(lrugen->min_seq[type], min_seq[type]);
			
 
				++		success = true;
			
 
				++	}
			
 
				++
			
 
				++	return success;
			
 
				++}
			
 
				++
			
 
				++static void inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, bool can_swap)
			
 
				++{
			
 
				++	int prev, next;
			
 
				++	int type, zone;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	spin_lock_irq(&lruvec->lru_lock);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
			
 
				++
			
 
				++	if (max_seq != lrugen->max_seq)
			
 
				++		goto unlock;
			
 
				++
			
 
				++	for (type = ANON_AND_FILE - 1; type >= 0; type--) {
			
 
				++		if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
			
 
				++			continue;
			
 
				++
			
 
				++		VM_WARN_ON_ONCE(type == LRU_GEN_FILE || can_swap);
			
 
				++
			
 
				++		inc_min_seq(lruvec, type);
			
 
				++	}
			
 
				++
			
 
				++	/*
			
 
				++	 * Update the active/inactive LRU sizes for compatibility. Both sides of
			
 
				++	 * the current max_seq need to be covered, since max_seq+1 can overlap
			
 
				++	 * with min_seq[LRU_GEN_ANON] if swapping is constrained. And if they do
			
 
				++	 * overlap, cold/hot inversion happens.
			
 
				++	 */
			
 
				++	prev = lru_gen_from_seq(lrugen->max_seq - 1);
			
 
				++	next = lru_gen_from_seq(lrugen->max_seq + 1);
			
 
				++
			
 
				++	for (type = 0; type < ANON_AND_FILE; type++) {
			
 
				++		for (zone = 0; zone < MAX_NR_ZONES; zone++) {
			
 
				++			enum lru_list lru = type * LRU_INACTIVE_FILE;
			
 
				++			long delta = lrugen->nr_pages[prev][type][zone] -
			
 
				++				     lrugen->nr_pages[next][type][zone];
			
 
				++
			
 
				++			if (!delta)
			
 
				++				continue;
			
 
				++
			
 
				++			__update_lru_size(lruvec, lru, zone, delta);
			
 
				++			__update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -delta);
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	for (type = 0; type < ANON_AND_FILE; type++)
			
 
				++		reset_ctrl_pos(lruvec, type, false);
			
 
				++
			
 
				++	/* make sure preceding modifications appear */
			
 
				++	smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
			
 
				++unlock:
			
 
				++	spin_unlock_irq(&lruvec->lru_lock);
			
 
				++}
			
 
				++
			
 
				++static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
			
 
				++			     struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
			
 
				++{
			
 
				++	int gen, type, zone;
			
 
				++	unsigned long old = 0;
			
 
				++	unsigned long young = 0;
			
 
				++	unsigned long total = 0;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++
			
 
				++	for (type = !can_swap; type < ANON_AND_FILE; type++) {
			
 
				++		unsigned long seq;
			
 
				++
			
 
				++		for (seq = min_seq[type]; seq <= max_seq; seq++) {
			
 
				++			unsigned long size = 0;
			
 
				++
			
 
				++			gen = lru_gen_from_seq(seq);
			
 
				++
			
 
				++			for (zone = 0; zone < MAX_NR_ZONES; zone++)
			
 
				++				size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
			
 
				++
			
 
				++			total += size;
			
 
				++			if (seq == max_seq)
			
 
				++				young += size;
			
 
				++			else if (seq + MIN_NR_GENS == max_seq)
			
 
				++				old += size;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	/* try to scrape all its memory if this memcg was deleted */
			
 
				++	*nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
			
 
				++
			
 
				++	/*
			
 
				++	 * The aging tries to be lazy to reduce the overhead, while the eviction
			
 
				++	 * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
			
 
				++	 * ideal number of generations is MIN_NR_GENS+1.
			
 
				++	 */
			
 
				++	if (min_seq[!can_swap] + MIN_NR_GENS > max_seq)
			
 
				++		return true;
			
 
				++	if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
			
 
				++		return false;
			
 
				++
			
 
				++	/*
			
 
				++	 * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1)
			
 
				++	 * of the total number of pages for each generation. A reasonable range
			
 
				++	 * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The
			
 
				++	 * aging cares about the upper bound of hot pages, while the eviction
			
 
				++	 * cares about the lower bound of cold pages.
			
 
				++	 */
			
 
				++	if (young * MIN_NR_GENS > total)
			
 
				++		return true;
			
 
				++	if (old * (MIN_NR_GENS + 2) < total)
			
 
				++		return true;
			
 
				++
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				++{
			
 
				++	bool need_aging;
			
 
				++	unsigned long nr_to_scan;
			
 
				++	int swappiness = get_swappiness(lruvec, sc);
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	DEFINE_MAX_SEQ(lruvec);
			
 
				++	DEFINE_MIN_SEQ(lruvec);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
			
 
				++
			
 
				++	mem_cgroup_calculate_protection(NULL, memcg);
			
 
				++
			
 
				++	if (mem_cgroup_below_min(memcg))
			
 
				++		return;
			
 
				++
			
 
				++	need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
			
 
				++	if (need_aging)
			
 
				++		inc_max_seq(lruvec, max_seq, swappiness);
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				++{
			
 
				++	struct mem_cgroup *memcg;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(!current_is_kswapd());
			
 
				++
			
 
				++	memcg = mem_cgroup_iter(NULL, NULL, NULL);
			
 
				++	do {
			
 
				++		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
			
 
				++
			
 
				++		age_lruvec(lruvec, sc);
			
 
				++
			
 
				++		cond_resched();
			
 
				++	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
			
 
				++}
			
 
				++
			
 
				++/******************************************************************************
			
 
				++ *                          the eviction
			
 
				++ ******************************************************************************/
			
 
				++
			
 
				++static bool sort_page(struct lruvec *lruvec, struct page *page, int tier_idx)
			
 
				++{
			
 
				++	bool success;
			
 
				++	int gen = page_lru_gen(page);
			
 
				++	int type = page_is_file_lru(page);
			
 
				++	int zone = page_zonenum(page);
			
 
				++	int delta = thp_nr_pages(page);
			
 
				++	int refs = page_lru_refs(page);
			
 
				++	int tier = lru_tier_from_refs(refs);
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE_PAGE(gen >= MAX_NR_GENS, page);
			
 
				++
			
 
				++	/* unevictable */
			
 
				++	if (!page_evictable(page)) {
			
 
				++		success = lru_gen_del_page(lruvec, page, true);
			
 
				++		VM_WARN_ON_ONCE_PAGE(!success, page);
			
 
				++		SetPageUnevictable(page);
			
 
				++		add_page_to_lru_list(page, lruvec);
			
 
				++		__count_vm_events(UNEVICTABLE_PGCULLED, delta);
			
 
				++		return true;
			
 
				++	}
			
 
				++
			
 
				++	/* dirty lazyfree */
			
 
				++	if (type == LRU_GEN_FILE && PageAnon(page) && PageDirty(page)) {
			
 
				++		success = lru_gen_del_page(lruvec, page, true);
			
 
				++		VM_WARN_ON_ONCE_PAGE(!success, page);
			
 
				++		SetPageSwapBacked(page);
			
 
				++		add_page_to_lru_list_tail(page, lruvec);
			
 
				++		return true;
			
 
				++	}
			
 
				++
			
 
				++	/* protected */
			
 
				++	if (tier > tier_idx) {
			
 
				++		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
			
 
				++
			
 
				++		gen = page_inc_gen(lruvec, page, false);
			
 
				++		list_move_tail(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++
			
 
				++		WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
			
 
				++			   lrugen->protected[hist][type][tier - 1] + delta);
			
 
				++		__mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);
			
 
				++		return true;
			
 
				++	}
			
 
				++
			
 
				++	/* waiting for writeback */
			
 
				++	if (PageLocked(page) || PageWriteback(page) ||
			
 
				++	    (type == LRU_GEN_FILE && PageDirty(page))) {
			
 
				++		gen = page_inc_gen(lruvec, page, true);
			
 
				++		list_move(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++		return true;
			
 
				++	}
			
 
				++
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++static bool isolate_page(struct lruvec *lruvec, struct page *page, struct scan_control *sc)
			
 
				++{
			
 
				++	bool success;
			
 
				++
			
 
				++	/* unmapping inhibited */
			
 
				++	if (!sc->may_unmap && page_mapped(page))
			
 
				++		return false;
			
 
				++
			
 
				++	/* swapping inhibited */
			
 
				++	if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
			
 
				++	    (PageDirty(page) ||
			
 
				++	     (PageAnon(page) && !PageSwapCache(page))))
			
 
				++		return false;
			
 
				++
			
 
				++	/* raced with release_pages() */
			
 
				++	if (!get_page_unless_zero(page))
			
 
				++		return false;
			
 
				++
			
 
				++	/* raced with another isolation */
			
 
				++	if (!TestClearPageLRU(page)) {
			
 
				++		put_page(page);
			
 
				++		return false;
			
 
				++	}
			
 
				++
			
 
				++	/* see the comment on MAX_NR_TIERS */
			
 
				++	if (!PageReferenced(page))
			
 
				++		set_mask_bits(&page->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0);
			
 
				++
			
 
				++	/* for shrink_page_list() */
			
 
				++	ClearPageReclaim(page);
			
 
				++	ClearPageReferenced(page);
			
 
				++
			
 
				++	success = lru_gen_del_page(lruvec, page, true);
			
 
				++	VM_WARN_ON_ONCE_PAGE(!success, page);
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				++static int scan_pages(struct lruvec *lruvec, struct scan_control *sc,
			
 
				++		      int type, int tier, struct list_head *list)
			
 
				++{
			
 
				++	int gen, zone;
			
 
				++	enum vm_event_item item;
			
 
				++	int sorted = 0;
			
 
				++	int scanned = 0;
			
 
				++	int isolated = 0;
			
 
				++	int remaining = MAX_LRU_BATCH;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(!list_empty(list));
			
 
				++
			
 
				++	if (get_nr_gens(lruvec, type) == MIN_NR_GENS)
			
 
				++		return 0;
			
 
				++
			
 
				++	gen = lru_gen_from_seq(lrugen->min_seq[type]);
			
 
				++
			
 
				++	for (zone = sc->reclaim_idx; zone >= 0; zone--) {
			
 
				++		LIST_HEAD(moved);
			
 
				++		int skipped = 0;
			
 
				++		struct list_head *head = &lrugen->lists[gen][type][zone];
			
 
				++
			
 
				++		while (!list_empty(head)) {
			
 
				++			struct page *page = lru_to_page(head);
			
 
				++			int delta = thp_nr_pages(page);
			
 
				++
			
 
				++			VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(PageActive(page), page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(page_is_file_lru(page) != type, page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page);
			
 
				++
			
 
				++			scanned += delta;
			
 
				++
			
 
				++			if (sort_page(lruvec, page, tier))
			
 
				++				sorted += delta;
			
 
				++			else if (isolate_page(lruvec, page, sc)) {
			
 
				++				list_add(&page->lru, list);
			
 
				++				isolated += delta;
			
 
				++			} else {
			
 
				++				list_move(&page->lru, &moved);
			
 
				++				skipped += delta;
			
 
				++			}
			
 
				++
			
 
				++			if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
			
 
				++				break;
			
 
				++		}
			
 
				++
			
 
				++		if (skipped) {
			
 
				++			list_splice(&moved, head);
			
 
				++			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
			
 
				++		}
			
 
				++
			
 
				++		if (!remaining || isolated >= MIN_LRU_BATCH)
			
 
				++			break;
			
 
				++	}
			
 
				++
			
 
				++	item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
			
 
				++	if (!cgroup_reclaim(sc)) {
			
 
				++		__count_vm_events(item, isolated);
			
 
				++		__count_vm_events(PGREFILL, sorted);
			
 
				++	}
			
 
				++	__count_memcg_events(memcg, item, isolated);
			
 
				++	__count_memcg_events(memcg, PGREFILL, sorted);
			
 
				++	__count_vm_events(PGSCAN_ANON + type, isolated);
			
 
				++
			
 
				++	/*
			
 
				++	 * There might not be eligible pages due to reclaim_idx, may_unmap and
			
 
				++	 * may_writepage. Check the remaining to prevent livelock if it's not
			
 
				++	 * making progress.
			
 
				++	 */
			
 
				++	return isolated || !remaining ? scanned : 0;
			
 
				++}
			
 
				++
			
 
				++static int get_tier_idx(struct lruvec *lruvec, int type)
			
 
				++{
			
 
				++	int tier;
			
 
				++	struct ctrl_pos sp, pv;
			
 
				++
			
 
				++	/*
			
 
				++	 * To leave a margin for fluctuations, use a larger gain factor (1:2).
			
 
				++	 * This value is chosen because any other tier would have at least twice
			
 
				++	 * as many refaults as the first tier.
			
 
				++	 */
			
 
				++	read_ctrl_pos(lruvec, type, 0, 1, &sp);
			
 
				++	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
			
 
				++		read_ctrl_pos(lruvec, type, tier, 2, &pv);
			
 
				++		if (!positive_ctrl_err(&sp, &pv))
			
 
				++			break;
			
 
				++	}
			
 
				++
			
 
				++	return tier - 1;
			
 
				++}
			
 
				++
			
 
				++static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx)
			
 
				++{
			
 
				++	int type, tier;
			
 
				++	struct ctrl_pos sp, pv;
			
 
				++	int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
			
 
				++
			
 
				++	/*
			
 
				++	 * Compare the first tier of anon with that of file to determine which
			
 
				++	 * type to scan. Also need to compare other tiers of the selected type
			
 
				++	 * with the first tier of the other type to determine the last tier (of
			
 
				++	 * the selected type) to evict.
			
 
				++	 */
			
 
				++	read_ctrl_pos(lruvec, LRU_GEN_ANON, 0, gain[LRU_GEN_ANON], &sp);
			
 
				++	read_ctrl_pos(lruvec, LRU_GEN_FILE, 0, gain[LRU_GEN_FILE], &pv);
			
 
				++	type = positive_ctrl_err(&sp, &pv);
			
 
				++
			
 
				++	read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp);
			
 
				++	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
			
 
				++		read_ctrl_pos(lruvec, type, tier, gain[type], &pv);
			
 
				++		if (!positive_ctrl_err(&sp, &pv))
			
 
				++			break;
			
 
				++	}
			
 
				++
			
 
				++	*tier_idx = tier - 1;
			
 
				++
			
 
				++	return type;
			
 
				++}
			
 
				++
			
 
				++static int isolate_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
			
 
				++			 int *type_scanned, struct list_head *list)
			
 
				++{
			
 
				++	int i;
			
 
				++	int type;
			
 
				++	int scanned;
			
 
				++	int tier = -1;
			
 
				++	DEFINE_MIN_SEQ(lruvec);
			
 
				++
			
 
				++	/*
			
 
				++	 * Try to make the obvious choice first. When anon and file are both
			
 
				++	 * available from the same generation, interpret swappiness 1 as file
			
 
				++	 * first and 200 as anon first.
			
 
				++	 */
			
 
				++	if (!swappiness)
			
 
				++		type = LRU_GEN_FILE;
			
 
				++	else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE])
			
 
				++		type = LRU_GEN_ANON;
			
 
				++	else if (swappiness == 1)
			
 
				++		type = LRU_GEN_FILE;
			
 
				++	else if (swappiness == 200)
			
 
				++		type = LRU_GEN_ANON;
			
 
				++	else
			
 
				++		type = get_type_to_scan(lruvec, swappiness, &tier);
			
 
				++
			
 
				++	for (i = !swappiness; i < ANON_AND_FILE; i++) {
			
 
				++		if (tier < 0)
			
 
				++			tier = get_tier_idx(lruvec, type);
			
 
				++
			
 
				++		scanned = scan_pages(lruvec, sc, type, tier, list);
			
 
				++		if (scanned)
			
 
				++			break;
			
 
				++
			
 
				++		type = !type;
			
 
				++		tier = -1;
			
 
				++	}
			
 
				++
			
 
				++	*type_scanned = type;
			
 
				++
			
 
				++	return scanned;
			
 
				++}
			
 
				++
			
 
				++static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
			
 
				++{
			
 
				++	int type;
			
 
				++	int scanned;
			
 
				++	int reclaimed;
			
 
				++	LIST_HEAD(list);
			
 
				++	struct page *page;
			
 
				++	enum vm_event_item item;
			
 
				++	struct reclaim_stat stat;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
			
 
				++
			
 
				++	spin_lock_irq(&lruvec->lru_lock);
			
 
				++
			
 
				++	scanned = isolate_pages(lruvec, sc, swappiness, &type, &list);
			
 
				++
			
 
				++	scanned += try_to_inc_min_seq(lruvec, swappiness);
			
 
				++
			
 
				++	if (get_nr_gens(lruvec, !swappiness) == MIN_NR_GENS)
			
 
				++		scanned = 0;
			
 
				++
			
 
				++	spin_unlock_irq(&lruvec->lru_lock);
			
 
				++
			
 
				++	if (list_empty(&list))
			
 
				++		return scanned;
			
 
				++
			
 
				++	reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
			
 
				++
			
 
				++	list_for_each_entry(page, &list, lru) {
			
 
				++		/* restore LRU_REFS_FLAGS cleared by isolate_page() */
			
 
				++		if (PageWorkingset(page))
			
 
				++			SetPageReferenced(page);
			
 
				++
			
 
				++		/* don't add rejected pages to the oldest generation */
			
 
				++		if (PageReclaim(page) &&
			
 
				++		    (PageDirty(page) || PageWriteback(page)))
			
 
				++			ClearPageActive(page);
			
 
				++		else
			
 
				++			SetPageActive(page);
			
 
				++	}
			
 
				++
			
 
				++	spin_lock_irq(&lruvec->lru_lock);
			
 
				++
			
 
				++	move_pages_to_lru(lruvec, &list);
			
 
				++
			
 
				++	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
			
 
				++	if (!cgroup_reclaim(sc))
			
 
				++		__count_vm_events(item, reclaimed);
			
 
				++	__count_memcg_events(memcg, item, reclaimed);
			
 
				++	__count_vm_events(PGSTEAL_ANON + type, reclaimed);
			
 
				++
			
 
				++	spin_unlock_irq(&lruvec->lru_lock);
			
 
				++
			
 
				++	mem_cgroup_uncharge_list(&list);
			
 
				++	free_unref_page_list(&list);
			
 
				++
			
 
				++	sc->nr_reclaimed += reclaimed;
			
 
				++
			
 
				++	return scanned;
			
 
				++}
			
 
				++
			
 
				++static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
			
 
				++				    bool can_swap)
			
 
				++{
			
 
				++	bool need_aging;
			
 
				++	unsigned long nr_to_scan;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	DEFINE_MAX_SEQ(lruvec);
			
 
				++	DEFINE_MIN_SEQ(lruvec);
			
 
				++
			
 
				++	if (mem_cgroup_below_min(memcg) ||
			
 
				++	    (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
			
 
				++		return 0;
			
 
				++
			
 
				++	need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
			
 
				++	if (!need_aging)
			
 
				++		return nr_to_scan;
			
 
				++
			
 
				++	/* skip the aging path at the default priority */
			
 
				++	if (sc->priority == DEF_PRIORITY)
			
 
				++		goto done;
			
 
				++
			
 
				++	/* leave the work to lru_gen_age_node() */
			
 
				++	if (current_is_kswapd())
			
 
				++		return 0;
			
 
				++
			
 
				++	inc_max_seq(lruvec, max_seq, can_swap);
			
 
				++done:
			
 
				++	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				++{
			
 
				++	struct blk_plug plug;
			
 
				++	unsigned long scanned = 0;
			
 
				++
			
 
				++	lru_add_drain();
			
 
				++
			
 
				++	blk_start_plug(&plug);
			
 
				++
			
 
				++	while (true) {
			
 
				++		int delta;
			
 
				++		int swappiness;
			
 
				++		unsigned long nr_to_scan;
			
 
				++
			
 
				++		if (sc->may_swap)
			
 
				++			swappiness = get_swappiness(lruvec, sc);
			
 
				++		else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
			
 
				++			swappiness = 1;
			
 
				++		else
			
 
				++			swappiness = 0;
			
 
				++
			
 
				++		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
			
 
				++		if (!nr_to_scan)
			
 
				++			break;
			
 
				++
			
 
				++		delta = evict_pages(lruvec, sc, swappiness);
			
 
				++		if (!delta)
			
 
				++			break;
			
 
				++
			
 
				++		scanned += delta;
			
 
				++		if (scanned >= nr_to_scan)
			
 
				++			break;
			
 
				++
			
 
				++		cond_resched();
			
 
				++	}
			
 
				++
			
 
				++	blk_finish_plug(&plug);
			
 
				++}
			
 
				++
			
 
				+ /******************************************************************************
			
 
				+  *                          initialization
			
 
				+  ******************************************************************************/
			
 
				+@@ -2894,6 +3649,16 @@ static int __init init_lru_gen(void)
			
 
				+ };
			
 
				+ late_initcall(init_lru_gen);
			
 
				+ 
			
 
				++#else /* !CONFIG_LRU_GEN */
			
 
				++
			
 
				++static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				+ #endif /* CONFIG_LRU_GEN */
			
 
				+ 
			
 
				+ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				+@@ -2907,6 +3672,11 @@ static void shrink_lruvec(struct lruvec
			
 
				+ 	bool proportional_reclaim;
			
 
				+ 	struct blk_plug plug;
			
 
				+ 
			
 
				++	if (lru_gen_enabled()) {
			
 
				++		lru_gen_shrink_lruvec(lruvec, sc);
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				+ 	get_scan_count(lruvec, sc, nr);
			
 
				+ 
			
 
				+ 	/* Record the original scan target for proportional adjustments later */
			
 
				+@@ -3372,6 +4142,9 @@ static void snapshot_refaults(struct mem
			
 
				+ 	struct lruvec *target_lruvec;
			
 
				+ 	unsigned long refaults;
			
 
				+ 
			
 
				++	if (lru_gen_enabled())
			
 
				++		return;
			
 
				++
			
 
				+ 	target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
			
 
				+ 	refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
			
 
				+ 	target_lruvec->refaults[0] = refaults;
			
 
				+@@ -3736,12 +4509,16 @@ unsigned long try_to_free_mem_cgroup_pag
			
 
				+ }
			
 
				+ #endif
			
 
				+ 
			
 
				+-static void age_active_anon(struct pglist_data *pgdat,
			
 
				+-				struct scan_control *sc)
			
 
				++static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				+ {
			
 
				+ 	struct mem_cgroup *memcg;
			
 
				+ 	struct lruvec *lruvec;
			
 
				+ 
			
 
				++	if (lru_gen_enabled()) {
			
 
				++		lru_gen_age_node(pgdat, sc);
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				+ 	if (!can_age_anon_pages(pgdat, sc))
			
 
				+ 		return;
			
 
				+ 
			
 
				+@@ -4058,12 +4835,11 @@ restart:
			
 
				+ 		sc.may_swap = !nr_boost_reclaim;
			
 
				+ 
			
 
				+ 		/*
			
 
				+-		 * Do some background aging of the anon list, to give
			
 
				+-		 * pages a chance to be referenced before reclaiming. All
			
 
				+-		 * pages are rotated regardless of classzone as this is
			
 
				+-		 * about consistent aging.
			
 
				++		 * Do some background aging, to give pages a chance to be
			
 
				++		 * referenced before reclaiming. All pages are rotated
			
 
				++		 * regardless of classzone as this is about consistent aging.
			
 
				+ 		 */
			
 
				+-		age_active_anon(pgdat, &sc);
			
 
				++		kswapd_age_node(pgdat, &sc);
			
 
				+ 
			
 
				+ 		/*
			
 
				+ 		 * If we're getting trouble reclaiming, start doing writepage
			
 
				+--- a/mm/workingset.c
			
 
				++++ b/mm/workingset.c
			
 
				+@@ -187,7 +187,6 @@ static unsigned int bucket_order __read_
			
 
				+ static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
			
 
				+ 			 bool workingset)
			
 
				+ {
			
 
				+-	eviction >>= bucket_order;
			
 
				+ 	eviction &= EVICTION_MASK;
			
 
				+ 	eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
			
 
				+ 	eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
			
 
				+@@ -212,10 +211,107 @@ static void unpack_shadow(void *shadow,
			
 
				+ 
			
 
				+ 	*memcgidp = memcgid;
			
 
				+ 	*pgdat = NODE_DATA(nid);
			
 
				+-	*evictionp = entry << bucket_order;
			
 
				++	*evictionp = entry;
			
 
				+ 	*workingsetp = workingset;
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++
			
 
				++static void *lru_gen_eviction(struct page *page)
			
 
				++{
			
 
				++	int hist;
			
 
				++	unsigned long token;
			
 
				++	unsigned long min_seq;
			
 
				++	struct lruvec *lruvec;
			
 
				++	struct lru_gen_struct *lrugen;
			
 
				++	int type = page_is_file_lru(page);
			
 
				++	int delta = thp_nr_pages(page);
			
 
				++	int refs = page_lru_refs(page);
			
 
				++	int tier = lru_tier_from_refs(refs);
			
 
				++	struct mem_cgroup *memcg = page_memcg(page);
			
 
				++	struct pglist_data *pgdat = page_pgdat(page);
			
 
				++
			
 
				++	BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT);
			
 
				++
			
 
				++	lruvec = mem_cgroup_lruvec(memcg, pgdat);
			
 
				++	lrugen = &lruvec->lrugen;
			
 
				++	min_seq = READ_ONCE(lrugen->min_seq[type]);
			
 
				++	token = (min_seq << LRU_REFS_WIDTH) | max(refs - 1, 0);
			
 
				++
			
 
				++	hist = lru_hist_from_seq(min_seq);
			
 
				++	atomic_long_add(delta, &lrugen->evicted[hist][type][tier]);
			
 
				++
			
 
				++	return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_refault(struct page *page, void *shadow)
			
 
				++{
			
 
				++	int hist, tier, refs;
			
 
				++	int memcg_id;
			
 
				++	bool workingset;
			
 
				++	unsigned long token;
			
 
				++	unsigned long min_seq;
			
 
				++	struct lruvec *lruvec;
			
 
				++	struct lru_gen_struct *lrugen;
			
 
				++	struct mem_cgroup *memcg;
			
 
				++	struct pglist_data *pgdat;
			
 
				++	int type = page_is_file_lru(page);
			
 
				++	int delta = thp_nr_pages(page);
			
 
				++
			
 
				++	unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
			
 
				++
			
 
				++	if (pgdat != page_pgdat(page))
			
 
				++		return;
			
 
				++
			
 
				++	rcu_read_lock();
			
 
				++
			
 
				++	memcg = page_memcg_rcu(page);
			
 
				++	if (memcg_id != mem_cgroup_id(memcg))
			
 
				++		goto unlock;
			
 
				++
			
 
				++	lruvec = mem_cgroup_lruvec(memcg, pgdat);
			
 
				++	lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	min_seq = READ_ONCE(lrugen->min_seq[type]);
			
 
				++	if ((token >> LRU_REFS_WIDTH) != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
			
 
				++		goto unlock;
			
 
				++
			
 
				++	hist = lru_hist_from_seq(min_seq);
			
 
				++	/* see the comment in page_lru_refs() */
			
 
				++	refs = (token & (BIT(LRU_REFS_WIDTH) - 1)) + workingset;
			
 
				++	tier = lru_tier_from_refs(refs);
			
 
				++
			
 
				++	atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]);
			
 
				++	mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta);
			
 
				++
			
 
				++	/*
			
 
				++	 * Count the following two cases as stalls:
			
 
				++	 * 1. For pages accessed through page tables, hotter pages pushed out
			
 
				++	 *    hot pages which refaulted immediately.
			
 
				++	 * 2. For pages accessed multiple times through file descriptors,
			
 
				++	 *    numbers of accesses might have been out of the range.
			
 
				++	 */
			
 
				++	if (lru_gen_in_fault() || refs == BIT(LRU_REFS_WIDTH)) {
			
 
				++		SetPageWorkingset(page);
			
 
				++		mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta);
			
 
				++	}
			
 
				++unlock:
			
 
				++	rcu_read_unlock();
			
 
				++}
			
 
				++
			
 
				++#else /* !CONFIG_LRU_GEN */
			
 
				++
			
 
				++static void *lru_gen_eviction(struct page *page)
			
 
				++{
			
 
				++	return NULL;
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_refault(struct page *page, void *shadow)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++#endif /* CONFIG_LRU_GEN */
			
 
				++
			
 
				+ /**
			
 
				+  * workingset_age_nonresident - age non-resident entries as LRU ages
			
 
				+  * @lruvec: the lruvec that was aged
			
 
				+@@ -264,10 +360,14 @@ void *workingset_eviction(struct page *p
			
 
				+ 	VM_BUG_ON_PAGE(page_count(page), page);
			
 
				+ 	VM_BUG_ON_PAGE(!PageLocked(page), page);
			
 
				+ 
			
 
				++	if (lru_gen_enabled())
			
 
				++		return lru_gen_eviction(page);
			
 
				++
			
 
				+ 	lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
			
 
				+ 	/* XXX: target_memcg can be NULL, go through lruvec */
			
 
				+ 	memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
			
 
				+ 	eviction = atomic_long_read(&lruvec->nonresident_age);
			
 
				++	eviction >>= bucket_order;
			
 
				+ 	workingset_age_nonresident(lruvec, thp_nr_pages(page));
			
 
				+ 	return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
			
 
				+ }
			
 
				+@@ -296,7 +396,13 @@ void workingset_refault(struct page *pag
			
 
				+ 	bool workingset;
			
 
				+ 	int memcgid;
			
 
				+ 
			
 
				++	if (lru_gen_enabled()) {
			
 
				++		lru_gen_refault(page, shadow);
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				+ 	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
			
 
				++	eviction <<= bucket_order;
			
 
				+ 
			
 
				+ 	rcu_read_lock();
			
 
				+ 	/*
			
--- a/target/linux/generic/backport-6.1/020-v6.1-07-mm-multi-gen-LRU-exploit-locality-in-rmap.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-07-mm-multi-gen-LRU-exploit-locality-in-rmap.patch
@@ -0,0 +1,491 @@
 
				+From e4277535f6d6708bb19b88c4bad155832671d69b Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:04 -0600
			
 
				+Subject: [PATCH 07/29] mm: multi-gen LRU: exploit locality in rmap
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Searching the rmap for PTEs mapping each page on an LRU list (to test and
			
 
				+clear the accessed bit) can be expensive because pages from different VMAs
			
 
				+(PA space) are not cache friendly to the rmap (VA space).  For workloads
			
 
				+mostly using mapped pages, searching the rmap can incur the highest CPU
			
 
				+cost in the reclaim path.
			
 
				+
			
 
				+This patch exploits spatial locality to reduce the trips into the rmap.
			
 
				+When shrink_page_list() walks the rmap and finds a young PTE, a new
			
 
				+function lru_gen_look_around() scans at most BITS_PER_LONG-1 adjacent
			
 
				+PTEs.  On finding another young PTE, it clears the accessed bit and
			
 
				+updates the gen counter of the page mapped by this PTE to
			
 
				+(max_seq%MAX_NR_GENS)+1.
			
 
				+
			
 
				+Server benchmark results:
			
 
				+  Single workload:
			
 
				+    fio (buffered I/O): no change
			
 
				+
			
 
				+  Single workload:
			
 
				+    memcached (anon): +[3, 5]%
			
 
				+                Ops/sec      KB/sec
			
 
				+      patch1-6: 1106168.46   43025.04
			
 
				+      patch1-7: 1147696.57   44640.29
			
 
				+
			
 
				+  Configurations:
			
 
				+    no change
			
 
				+
			
 
				+Client benchmark results:
			
 
				+  kswapd profiles:
			
 
				+    patch1-6
			
 
				+      39.03%  lzo1x_1_do_compress (real work)
			
 
				+      18.47%  page_vma_mapped_walk (overhead)
			
 
				+       6.74%  _raw_spin_unlock_irq
			
 
				+       3.97%  do_raw_spin_lock
			
 
				+       2.49%  ptep_clear_flush
			
 
				+       2.48%  anon_vma_interval_tree_iter_first
			
 
				+       1.92%  page_referenced_one
			
 
				+       1.88%  __zram_bvec_write
			
 
				+       1.48%  memmove
			
 
				+       1.31%  vma_interval_tree_iter_next
			
 
				+
			
 
				+    patch1-7
			
 
				+      48.16%  lzo1x_1_do_compress (real work)
			
 
				+       8.20%  page_vma_mapped_walk (overhead)
			
 
				+       7.06%  _raw_spin_unlock_irq
			
 
				+       2.92%  ptep_clear_flush
			
 
				+       2.53%  __zram_bvec_write
			
 
				+       2.11%  do_raw_spin_lock
			
 
				+       2.02%  memmove
			
 
				+       1.93%  lru_gen_look_around
			
 
				+       1.56%  free_unref_page_list
			
 
				+       1.40%  memset
			
 
				+
			
 
				+  Configurations:
			
 
				+    no change
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Acked-by: Barry Song <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/memcontrol.h |  31 +++++++
			
 
				+ include/linux/mmzone.h     |   6 ++
			
 
				+ mm/internal.h              |   1 +
			
 
				+ mm/memcontrol.c            |   1 +
			
 
				+ mm/rmap.c                  |   7 ++
			
 
				+ mm/swap.c                  |   4 +-
			
 
				+ mm/vmscan.c                | 184 +++++++++++++++++++++++++++++++++++++
			
 
				+ 7 files changed, 232 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+--- a/include/linux/memcontrol.h
			
 
				++++ b/include/linux/memcontrol.h
			
 
				+@@ -442,6 +442,7 @@ static inline struct obj_cgroup *__page_
			
 
				+  * - LRU isolation
			
 
				+  * - lock_page_memcg()
			
 
				+  * - exclusive reference
			
 
				++ * - mem_cgroup_trylock_pages()
			
 
				+  *
			
 
				+  * For a kmem page a caller should hold an rcu read lock to protect memcg
			
 
				+  * associated with a kmem page from being released.
			
 
				+@@ -497,6 +498,7 @@ static inline struct mem_cgroup *page_me
			
 
				+  * - LRU isolation
			
 
				+  * - lock_page_memcg()
			
 
				+  * - exclusive reference
			
 
				++ * - mem_cgroup_trylock_pages()
			
 
				+  *
			
 
				+  * For a kmem page a caller should hold an rcu read lock to protect memcg
			
 
				+  * associated with a kmem page from being released.
			
 
				+@@ -953,6 +955,23 @@ void unlock_page_memcg(struct page *page
			
 
				+ 
			
 
				+ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
			
 
				+ 
			
 
				++/* try to stablize page_memcg() for all the pages in a memcg */
			
 
				++static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++	rcu_read_lock();
			
 
				++
			
 
				++	if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
			
 
				++		return true;
			
 
				++
			
 
				++	rcu_read_unlock();
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++static inline void mem_cgroup_unlock_pages(void)
			
 
				++{
			
 
				++	rcu_read_unlock();
			
 
				++}
			
 
				++
			
 
				+ /* idx can be of type enum memcg_stat_item or node_stat_item */
			
 
				+ static inline void mod_memcg_state(struct mem_cgroup *memcg,
			
 
				+ 				   int idx, int val)
			
 
				+@@ -1369,6 +1388,18 @@ static inline void unlock_page_memcg(str
			
 
				+ {
			
 
				+ }
			
 
				+ 
			
 
				++static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++	/* to match page_memcg_rcu() */
			
 
				++	rcu_read_lock();
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				++static inline void mem_cgroup_unlock_pages(void)
			
 
				++{
			
 
				++	rcu_read_unlock();
			
 
				++}
			
 
				++
			
 
				+ static inline void mem_cgroup_handle_over_high(void)
			
 
				+ {
			
 
				+ }
			
 
				+--- a/include/linux/mmzone.h
			
 
				++++ b/include/linux/mmzone.h
			
 
				+@@ -352,6 +352,7 @@ enum lruvec_flags {
			
 
				+ #ifndef __GENERATING_BOUNDS_H
			
 
				+ 
			
 
				+ struct lruvec;
			
 
				++struct page_vma_mapped_walk;
			
 
				+ 
			
 
				+ #define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
			
 
				+ #define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
			
 
				+@@ -407,6 +408,7 @@ struct lru_gen_struct {
			
 
				+ };
			
 
				+ 
			
 
				+ void lru_gen_init_lruvec(struct lruvec *lruvec);
			
 
				++void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
			
 
				+ 
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				+ void lru_gen_init_memcg(struct mem_cgroup *memcg);
			
 
				+@@ -419,6 +421,10 @@ static inline void lru_gen_init_lruvec(s
			
 
				+ {
			
 
				+ }
			
 
				+ 
			
 
				++static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				+ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
			
 
				+ {
			
 
				+--- a/mm/internal.h
			
 
				++++ b/mm/internal.h
			
 
				+@@ -35,6 +35,7 @@
			
 
				+ void page_writeback_init(void);
			
 
				+ 
			
 
				+ vm_fault_t do_swap_page(struct vm_fault *vmf);
			
 
				++void activate_page(struct page *page);
			
 
				+ 
			
 
				+ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
			
 
				+ 		unsigned long floor, unsigned long ceiling);
			
 
				+--- a/mm/memcontrol.c
			
 
				++++ b/mm/memcontrol.c
			
 
				+@@ -2798,6 +2798,7 @@ static void commit_charge(struct page *p
			
 
				+ 	 * - LRU isolation
			
 
				+ 	 * - lock_page_memcg()
			
 
				+ 	 * - exclusive reference
			
 
				++	 * - mem_cgroup_trylock_pages()
			
 
				+ 	 */
			
 
				+ 	page->memcg_data = (unsigned long)memcg;
			
 
				+ }
			
 
				+--- a/mm/rmap.c
			
 
				++++ b/mm/rmap.c
			
 
				+@@ -73,6 +73,7 @@
			
 
				+ #include <linux/page_idle.h>
			
 
				+ #include <linux/memremap.h>
			
 
				+ #include <linux/userfaultfd_k.h>
			
 
				++#include <linux/mm_inline.h>
			
 
				+ 
			
 
				+ #include <asm/tlbflush.h>
			
 
				+ 
			
 
				+@@ -793,6 +794,12 @@ static bool page_referenced_one(struct p
			
 
				+ 		}
			
 
				+ 
			
 
				+ 		if (pvmw.pte) {
			
 
				++			if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
			
 
				++			    !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
			
 
				++				lru_gen_look_around(&pvmw);
			
 
				++				referenced++;
			
 
				++			}
			
 
				++
			
 
				+ 			if (ptep_clear_flush_young_notify(vma, address,
			
 
				+ 						pvmw.pte)) {
			
 
				+ 				/*
			
 
				+--- a/mm/swap.c
			
 
				++++ b/mm/swap.c
			
 
				+@@ -325,7 +325,7 @@ static bool need_activate_page_drain(int
			
 
				+ 	return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
			
 
				+ }
			
 
				+ 
			
 
				+-static void activate_page(struct page *page)
			
 
				++void activate_page(struct page *page)
			
 
				+ {
			
 
				+ 	page = compound_head(page);
			
 
				+ 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
			
 
				+@@ -345,7 +345,7 @@ static inline void activate_page_drain(i
			
 
				+ {
			
 
				+ }
			
 
				+ 
			
 
				+-static void activate_page(struct page *page)
			
 
				++void activate_page(struct page *page)
			
 
				+ {
			
 
				+ 	struct lruvec *lruvec;
			
 
				+ 
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -1409,6 +1409,11 @@ retry:
			
 
				+ 		if (!sc->may_unmap && page_mapped(page))
			
 
				+ 			goto keep_locked;
			
 
				+ 
			
 
				++		/* page_update_gen() tried to promote this page? */
			
 
				++		if (lru_gen_enabled() && !ignore_references &&
			
 
				++		    page_mapped(page) && PageReferenced(page))
			
 
				++			goto keep_locked;
			
 
				++
			
 
				+ 		may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
			
 
				+ 			(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
			
 
				+ 
			
 
				+@@ -2990,6 +2995,29 @@ static bool positive_ctrl_err(struct ctr
			
 
				+  *                          the aging
			
 
				+  ******************************************************************************/
			
 
				+ 
			
 
				++/* promote pages accessed through page tables */
			
 
				++static int page_update_gen(struct page *page, int gen)
			
 
				++{
			
 
				++	unsigned long new_flags, old_flags = READ_ONCE(page->flags);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
			
 
				++	VM_WARN_ON_ONCE(!rcu_read_lock_held());
			
 
				++
			
 
				++	do {
			
 
				++		/* lru_gen_del_page() has isolated this page? */
			
 
				++		if (!(old_flags & LRU_GEN_MASK)) {
			
 
				++			/* for shrink_page_list() */
			
 
				++			new_flags = old_flags | BIT(PG_referenced);
			
 
				++			continue;
			
 
				++		}
			
 
				++
			
 
				++		new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
			
 
				++		new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
			
 
				++	} while (!try_cmpxchg(&page->flags, &old_flags, new_flags));
			
 
				++
			
 
				++	return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
			
 
				++}
			
 
				++
			
 
				+ /* protect pages accessed multiple times through file descriptors */
			
 
				+ static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming)
			
 
				+ {
			
 
				+@@ -3001,6 +3029,11 @@ static int page_inc_gen(struct lruvec *l
			
 
				+ 	VM_WARN_ON_ONCE_PAGE(!(old_flags & LRU_GEN_MASK), page);
			
 
				+ 
			
 
				+ 	do {
			
 
				++		new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
			
 
				++		/* page_update_gen() has promoted this page? */
			
 
				++		if (new_gen >= 0 && new_gen != old_gen)
			
 
				++			return new_gen;
			
 
				++
			
 
				+ 		new_gen = (old_gen + 1) % MAX_NR_GENS;
			
 
				+ 
			
 
				+ 		new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
			
 
				+@@ -3015,6 +3048,43 @@ static int page_inc_gen(struct lruvec *l
			
 
				+ 	return new_gen;
			
 
				+ }
			
 
				+ 
			
 
				++static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
			
 
				++{
			
 
				++	unsigned long pfn = pte_pfn(pte);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
			
 
				++
			
 
				++	if (!pte_present(pte) || is_zero_pfn(pfn))
			
 
				++		return -1;
			
 
				++
			
 
				++	if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
			
 
				++		return -1;
			
 
				++
			
 
				++	if (WARN_ON_ONCE(!pfn_valid(pfn)))
			
 
				++		return -1;
			
 
				++
			
 
				++	return pfn;
			
 
				++}
			
 
				++
			
 
				++static struct page *get_pfn_page(unsigned long pfn, struct mem_cgroup *memcg,
			
 
				++				 struct pglist_data *pgdat)
			
 
				++{
			
 
				++	struct page *page;
			
 
				++
			
 
				++	/* try to avoid unnecessary memory loads */
			
 
				++	if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
			
 
				++		return NULL;
			
 
				++
			
 
				++	page = compound_head(pfn_to_page(pfn));
			
 
				++	if (page_to_nid(page) != pgdat->node_id)
			
 
				++		return NULL;
			
 
				++
			
 
				++	if (page_memcg_rcu(page) != memcg)
			
 
				++		return NULL;
			
 
				++
			
 
				++	return page;
			
 
				++}
			
 
				++
			
 
				+ static void inc_min_seq(struct lruvec *lruvec, int type)
			
 
				+ {
			
 
				+ 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				+@@ -3214,6 +3284,114 @@ static void lru_gen_age_node(struct pgli
			
 
				+ 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
			
 
				+ }
			
 
				+ 
			
 
				++/*
			
 
				++ * This function exploits spatial locality when shrink_page_list() walks the
			
 
				++ * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages.
			
 
				++ */
			
 
				++void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
			
 
				++{
			
 
				++	int i;
			
 
				++	pte_t *pte;
			
 
				++	unsigned long start;
			
 
				++	unsigned long end;
			
 
				++	unsigned long addr;
			
 
				++	unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
			
 
				++	struct page *page = pvmw->page;
			
 
				++	struct mem_cgroup *memcg = page_memcg(page);
			
 
				++	struct pglist_data *pgdat = page_pgdat(page);
			
 
				++	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
			
 
				++	DEFINE_MAX_SEQ(lruvec);
			
 
				++	int old_gen, new_gen = lru_gen_from_seq(max_seq);
			
 
				++
			
 
				++	lockdep_assert_held(pvmw->ptl);
			
 
				++	VM_WARN_ON_ONCE_PAGE(PageLRU(page), page);
			
 
				++
			
 
				++	if (spin_is_contended(pvmw->ptl))
			
 
				++		return;
			
 
				++
			
 
				++	start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
			
 
				++	end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
			
 
				++
			
 
				++	if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
			
 
				++		if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
			
 
				++			end = start + MIN_LRU_BATCH * PAGE_SIZE;
			
 
				++		else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2)
			
 
				++			start = end - MIN_LRU_BATCH * PAGE_SIZE;
			
 
				++		else {
			
 
				++			start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2;
			
 
				++			end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
			
 
				++
			
 
				++	rcu_read_lock();
			
 
				++	arch_enter_lazy_mmu_mode();
			
 
				++
			
 
				++	for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
			
 
				++		unsigned long pfn;
			
 
				++
			
 
				++		pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
			
 
				++		if (pfn == -1)
			
 
				++			continue;
			
 
				++
			
 
				++		if (!pte_young(pte[i]))
			
 
				++			continue;
			
 
				++
			
 
				++		page = get_pfn_page(pfn, memcg, pgdat);
			
 
				++		if (!page)
			
 
				++			continue;
			
 
				++
			
 
				++		if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
			
 
				++			VM_WARN_ON_ONCE(true);
			
 
				++
			
 
				++		if (pte_dirty(pte[i]) && !PageDirty(page) &&
			
 
				++		    !(PageAnon(page) && PageSwapBacked(page) &&
			
 
				++		      !PageSwapCache(page)))
			
 
				++			set_page_dirty(page);
			
 
				++
			
 
				++		old_gen = page_lru_gen(page);
			
 
				++		if (old_gen < 0)
			
 
				++			SetPageReferenced(page);
			
 
				++		else if (old_gen != new_gen)
			
 
				++			__set_bit(i, bitmap);
			
 
				++	}
			
 
				++
			
 
				++	arch_leave_lazy_mmu_mode();
			
 
				++	rcu_read_unlock();
			
 
				++
			
 
				++	if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
			
 
				++		for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
			
 
				++			page = pte_page(pte[i]);
			
 
				++			activate_page(page);
			
 
				++		}
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				++	/* page_update_gen() requires stable page_memcg() */
			
 
				++	if (!mem_cgroup_trylock_pages(memcg))
			
 
				++		return;
			
 
				++
			
 
				++	spin_lock_irq(&lruvec->lru_lock);
			
 
				++	new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
			
 
				++
			
 
				++	for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
			
 
				++		page = compound_head(pte_page(pte[i]));
			
 
				++		if (page_memcg_rcu(page) != memcg)
			
 
				++			continue;
			
 
				++
			
 
				++		old_gen = page_update_gen(page, new_gen);
			
 
				++		if (old_gen < 0 || old_gen == new_gen)
			
 
				++			continue;
			
 
				++
			
 
				++		lru_gen_update_size(lruvec, page, old_gen, new_gen);
			
 
				++	}
			
 
				++
			
 
				++	spin_unlock_irq(&lruvec->lru_lock);
			
 
				++
			
 
				++	mem_cgroup_unlock_pages();
			
 
				++}
			
 
				++
			
 
				+ /******************************************************************************
			
 
				+  *                          the eviction
			
 
				+  ******************************************************************************/
			
 
				+@@ -3250,6 +3428,12 @@ static bool sort_page(struct lruvec *lru
			
 
				+ 		return true;
			
 
				+ 	}
			
 
				+ 
			
 
				++	/* promoted */
			
 
				++	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
			
 
				++		list_move(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++		return true;
			
 
				++	}
			
 
				++
			
 
				+ 	/* protected */
			
 
				+ 	if (tier > tier_idx) {
			
 
				+ 		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
			
--- a/target/linux/generic/backport-6.1/020-v6.1-08-mm-multi-gen-LRU-support-page-table-walks.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-08-mm-multi-gen-LRU-support-page-table-walks.patch
@@ -0,0 +1,1687 @@
 
				+From 05223c4e80b34e29f2255c04ffebc2c4475e7593 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:05 -0600
			
 
				+Subject: [PATCH 08/29] mm: multi-gen LRU: support page table walks
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+To further exploit spatial locality, the aging prefers to walk page tables
			
 
				+to search for young PTEs and promote hot pages.  A kill switch will be
			
 
				+added in the next patch to disable this behavior.  When disabled, the
			
 
				+aging relies on the rmap only.
			
 
				+
			
 
				+NB: this behavior has nothing similar with the page table scanning in the
			
 
				+2.4 kernel [1], which searches page tables for old PTEs, adds cold pages
			
 
				+to swapcache and unmaps them.
			
 
				+
			
 
				+To avoid confusion, the term "iteration" specifically means the traversal
			
 
				+of an entire mm_struct list; the term "walk" will be applied to page
			
 
				+tables and the rmap, as usual.
			
 
				+
			
 
				+An mm_struct list is maintained for each memcg, and an mm_struct follows
			
 
				+its owner task to the new memcg when this task is migrated.  Given an
			
 
				+lruvec, the aging iterates lruvec_memcg()->mm_list and calls
			
 
				+walk_page_range() with each mm_struct on this list to promote hot pages
			
 
				+before it increments max_seq.
			
 
				+
			
 
				+When multiple page table walkers iterate the same list, each of them gets
			
 
				+a unique mm_struct; therefore they can run concurrently.  Page table
			
 
				+walkers ignore any misplaced pages, e.g., if an mm_struct was migrated,
			
 
				+pages it left in the previous memcg will not be promoted when its current
			
 
				+memcg is under reclaim.  Similarly, page table walkers will not promote
			
 
				+pages from nodes other than the one under reclaim.
			
 
				+
			
 
				+This patch uses the following optimizations when walking page tables:
			
 
				+1. It tracks the usage of mm_struct's between context switches so that
			
 
				+   page table walkers can skip processes that have been sleeping since
			
 
				+   the last iteration.
			
 
				+2. It uses generational Bloom filters to record populated branches so
			
 
				+   that page table walkers can reduce their search space based on the
			
 
				+   query results, e.g., to skip page tables containing mostly holes or
			
 
				+   misplaced pages.
			
 
				+3. It takes advantage of the accessed bit in non-leaf PMD entries when
			
 
				+   CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y.
			
 
				+4. It does not zigzag between a PGD table and the same PMD table
			
 
				+   spanning multiple VMAs. IOW, it finishes all the VMAs within the
			
 
				+   range of the same PMD table before it returns to a PGD table. This
			
 
				+   improves the cache performance for workloads that have large
			
 
				+   numbers of tiny VMAs [2], especially when CONFIG_PGTABLE_LEVELS=5.
			
 
				+
			
 
				+Server benchmark results:
			
 
				+  Single workload:
			
 
				+    fio (buffered I/O): no change
			
 
				+
			
 
				+  Single workload:
			
 
				+    memcached (anon): +[8, 10]%
			
 
				+                Ops/sec      KB/sec
			
 
				+      patch1-7: 1147696.57   44640.29
			
 
				+      patch1-8: 1245274.91   48435.66
			
 
				+
			
 
				+  Configurations:
			
 
				+    no change
			
 
				+
			
 
				+Client benchmark results:
			
 
				+  kswapd profiles:
			
 
				+    patch1-7
			
 
				+      48.16%  lzo1x_1_do_compress (real work)
			
 
				+       8.20%  page_vma_mapped_walk (overhead)
			
 
				+       7.06%  _raw_spin_unlock_irq
			
 
				+       2.92%  ptep_clear_flush
			
 
				+       2.53%  __zram_bvec_write
			
 
				+       2.11%  do_raw_spin_lock
			
 
				+       2.02%  memmove
			
 
				+       1.93%  lru_gen_look_around
			
 
				+       1.56%  free_unref_page_list
			
 
				+       1.40%  memset
			
 
				+
			
 
				+    patch1-8
			
 
				+      49.44%  lzo1x_1_do_compress (real work)
			
 
				+       6.19%  page_vma_mapped_walk (overhead)
			
 
				+       5.97%  _raw_spin_unlock_irq
			
 
				+       3.13%  get_pfn_page
			
 
				+       2.85%  ptep_clear_flush
			
 
				+       2.42%  __zram_bvec_write
			
 
				+       2.08%  do_raw_spin_lock
			
 
				+       1.92%  memmove
			
 
				+       1.44%  alloc_zspage
			
 
				+       1.36%  memset
			
 
				+
			
 
				+  Configurations:
			
 
				+    no change
			
 
				+
			
 
				+Thanks to the following developers for their efforts [3].
			
 
				+  kernel test robot <[email protected]>
			
 
				+
			
 
				+[1] https://lwn.net/Articles/23732/
			
 
				+[2] https://llvm.org/docs/ScudoHardenedAllocator.html
			
 
				+[3] https://lore.kernel.org/r/[email protected]/
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Barry Song <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ fs/exec.c                  |    2 +
			
 
				+ include/linux/memcontrol.h |    5 +
			
 
				+ include/linux/mm_types.h   |   76 +++
			
 
				+ include/linux/mmzone.h     |   56 +-
			
 
				+ include/linux/swap.h       |    4 +
			
 
				+ kernel/exit.c              |    1 +
			
 
				+ kernel/fork.c              |    9 +
			
 
				+ kernel/sched/core.c        |    1 +
			
 
				+ mm/memcontrol.c            |   25 +
			
 
				+ mm/vmscan.c                | 1010 +++++++++++++++++++++++++++++++++++-
			
 
				+ 10 files changed, 1172 insertions(+), 17 deletions(-)
			
 
				+
			
 
				+--- a/fs/exec.c
			
 
				++++ b/fs/exec.c
			
 
				+@@ -1013,6 +1013,7 @@ static int exec_mmap(struct mm_struct *m
			
 
				+ 	active_mm = tsk->active_mm;
			
 
				+ 	tsk->active_mm = mm;
			
 
				+ 	tsk->mm = mm;
			
 
				++	lru_gen_add_mm(mm);
			
 
				+ 	/*
			
 
				+ 	 * This prevents preemption while active_mm is being loaded and
			
 
				+ 	 * it and mm are being updated, which could cause problems for
			
 
				+@@ -1028,6 +1029,7 @@ static int exec_mmap(struct mm_struct *m
			
 
				+ 	tsk->mm->vmacache_seqnum = 0;
			
 
				+ 	vmacache_flush(tsk);
			
 
				+ 	task_unlock(tsk);
			
 
				++	lru_gen_use_mm(mm);
			
 
				+ 	if (old_mm) {
			
 
				+ 		mmap_read_unlock(old_mm);
			
 
				+ 		BUG_ON(active_mm != old_mm);
			
 
				+--- a/include/linux/memcontrol.h
			
 
				++++ b/include/linux/memcontrol.h
			
 
				+@@ -348,6 +348,11 @@ struct mem_cgroup {
			
 
				+ 	struct deferred_split deferred_split_queue;
			
 
				+ #endif
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++	/* per-memcg mm_struct list */
			
 
				++	struct lru_gen_mm_list mm_list;
			
 
				++#endif
			
 
				++
			
 
				+ 	struct mem_cgroup_per_node *nodeinfo[];
			
 
				+ };
			
 
				+ 
			
 
				+--- a/include/linux/mm_types.h
			
 
				++++ b/include/linux/mm_types.h
			
 
				+@@ -580,6 +580,22 @@ struct mm_struct {
			
 
				+ #ifdef CONFIG_IOMMU_SUPPORT
			
 
				+ 		u32 pasid;
			
 
				+ #endif
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++		struct {
			
 
				++			/* this mm_struct is on lru_gen_mm_list */
			
 
				++			struct list_head list;
			
 
				++			/*
			
 
				++			 * Set when switching to this mm_struct, as a hint of
			
 
				++			 * whether it has been used since the last time per-node
			
 
				++			 * page table walkers cleared the corresponding bits.
			
 
				++			 */
			
 
				++			unsigned long bitmap;
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++			/* points to the memcg of "owner" above */
			
 
				++			struct mem_cgroup *memcg;
			
 
				++#endif
			
 
				++		} lru_gen;
			
 
				++#endif /* CONFIG_LRU_GEN */
			
 
				+ 	} __randomize_layout;
			
 
				+ 
			
 
				+ 	/*
			
 
				+@@ -606,6 +622,66 @@ static inline cpumask_t *mm_cpumask(stru
			
 
				+ 	return (struct cpumask *)&mm->cpu_bitmap;
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++
			
 
				++struct lru_gen_mm_list {
			
 
				++	/* mm_struct list for page table walkers */
			
 
				++	struct list_head fifo;
			
 
				++	/* protects the list above */
			
 
				++	spinlock_t lock;
			
 
				++};
			
 
				++
			
 
				++void lru_gen_add_mm(struct mm_struct *mm);
			
 
				++void lru_gen_del_mm(struct mm_struct *mm);
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++void lru_gen_migrate_mm(struct mm_struct *mm);
			
 
				++#endif
			
 
				++
			
 
				++static inline void lru_gen_init_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++	INIT_LIST_HEAD(&mm->lru_gen.list);
			
 
				++	mm->lru_gen.bitmap = 0;
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++	mm->lru_gen.memcg = NULL;
			
 
				++#endif
			
 
				++}
			
 
				++
			
 
				++static inline void lru_gen_use_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++	/*
			
 
				++	 * When the bitmap is set, page reclaim knows this mm_struct has been
			
 
				++	 * used since the last time it cleared the bitmap. So it might be worth
			
 
				++	 * walking the page tables of this mm_struct to clear the accessed bit.
			
 
				++	 */
			
 
				++	WRITE_ONCE(mm->lru_gen.bitmap, -1);
			
 
				++}
			
 
				++
			
 
				++#else /* !CONFIG_LRU_GEN */
			
 
				++
			
 
				++static inline void lru_gen_add_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++static inline void lru_gen_del_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++static inline void lru_gen_migrate_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				++static inline void lru_gen_init_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++static inline void lru_gen_use_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++#endif /* CONFIG_LRU_GEN */
			
 
				++
			
 
				+ struct mmu_gather;
			
 
				+ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
			
 
				+ extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
			
 
				+--- a/include/linux/mmzone.h
			
 
				++++ b/include/linux/mmzone.h
			
 
				+@@ -385,7 +385,7 @@ enum {
			
 
				+  * min_seq behind.
			
 
				+  *
			
 
				+  * The number of pages in each generation is eventually consistent and therefore
			
 
				+- * can be transiently negative.
			
 
				++ * can be transiently negative when reset_batch_size() is pending.
			
 
				+  */
			
 
				+ struct lru_gen_struct {
			
 
				+ 	/* the aging increments the youngest generation number */
			
 
				+@@ -407,6 +407,53 @@ struct lru_gen_struct {
			
 
				+ 	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
			
 
				+ };
			
 
				+ 
			
 
				++enum {
			
 
				++	MM_LEAF_TOTAL,		/* total leaf entries */
			
 
				++	MM_LEAF_OLD,		/* old leaf entries */
			
 
				++	MM_LEAF_YOUNG,		/* young leaf entries */
			
 
				++	MM_NONLEAF_TOTAL,	/* total non-leaf entries */
			
 
				++	MM_NONLEAF_FOUND,	/* non-leaf entries found in Bloom filters */
			
 
				++	MM_NONLEAF_ADDED,	/* non-leaf entries added to Bloom filters */
			
 
				++	NR_MM_STATS
			
 
				++};
			
 
				++
			
 
				++/* double-buffering Bloom filters */
			
 
				++#define NR_BLOOM_FILTERS	2
			
 
				++
			
 
				++struct lru_gen_mm_state {
			
 
				++	/* set to max_seq after each iteration */
			
 
				++	unsigned long seq;
			
 
				++	/* where the current iteration continues (inclusive) */
			
 
				++	struct list_head *head;
			
 
				++	/* where the last iteration ended (exclusive) */
			
 
				++	struct list_head *tail;
			
 
				++	/* to wait for the last page table walker to finish */
			
 
				++	struct wait_queue_head wait;
			
 
				++	/* Bloom filters flip after each iteration */
			
 
				++	unsigned long *filters[NR_BLOOM_FILTERS];
			
 
				++	/* the mm stats for debugging */
			
 
				++	unsigned long stats[NR_HIST_GENS][NR_MM_STATS];
			
 
				++	/* the number of concurrent page table walkers */
			
 
				++	int nr_walkers;
			
 
				++};
			
 
				++
			
 
				++struct lru_gen_mm_walk {
			
 
				++	/* the lruvec under reclaim */
			
 
				++	struct lruvec *lruvec;
			
 
				++	/* unstable max_seq from lru_gen_struct */
			
 
				++	unsigned long max_seq;
			
 
				++	/* the next address within an mm to scan */
			
 
				++	unsigned long next_addr;
			
 
				++	/* to batch promoted pages */
			
 
				++	int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
			
 
				++	/* to batch the mm stats */
			
 
				++	int mm_stats[NR_MM_STATS];
			
 
				++	/* total batched items */
			
 
				++	int batched;
			
 
				++	bool can_swap;
			
 
				++	bool force_scan;
			
 
				++};
			
 
				++
			
 
				+ void lru_gen_init_lruvec(struct lruvec *lruvec);
			
 
				+ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
			
 
				+ 
			
 
				+@@ -457,6 +504,8 @@ struct lruvec {
			
 
				+ #ifdef CONFIG_LRU_GEN
			
 
				+ 	/* evictable pages divided into generations */
			
 
				+ 	struct lru_gen_struct		lrugen;
			
 
				++	/* to concurrently iterate lru_gen_mm_list */
			
 
				++	struct lru_gen_mm_state		mm_state;
			
 
				+ #endif
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				+ 	struct pglist_data *pgdat;
			
 
				+@@ -1042,6 +1091,11 @@ typedef struct pglist_data {
			
 
				+ 
			
 
				+ 	unsigned long		flags;
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++	/* kswap mm walk data */
			
 
				++	struct lru_gen_mm_walk	mm_walk;
			
 
				++#endif
			
 
				++
			
 
				+ 	ZONE_PADDING(_pad2_)
			
 
				+ 
			
 
				+ 	/* Per-node vmstats */
			
 
				+--- a/include/linux/swap.h
			
 
				++++ b/include/linux/swap.h
			
 
				+@@ -137,6 +137,10 @@ union swap_header {
			
 
				+  */
			
 
				+ struct reclaim_state {
			
 
				+ 	unsigned long reclaimed_slab;
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++	/* per-thread mm walk data */
			
 
				++	struct lru_gen_mm_walk *mm_walk;
			
 
				++#endif
			
 
				+ };
			
 
				+ 
			
 
				+ #ifdef __KERNEL__
			
 
				+--- a/kernel/exit.c
			
 
				++++ b/kernel/exit.c
			
 
				+@@ -469,6 +469,7 @@ assign_new_owner:
			
 
				+ 		goto retry;
			
 
				+ 	}
			
 
				+ 	WRITE_ONCE(mm->owner, c);
			
 
				++	lru_gen_migrate_mm(mm);
			
 
				+ 	task_unlock(c);
			
 
				+ 	put_task_struct(c);
			
 
				+ }
			
 
				+--- a/kernel/fork.c
			
 
				++++ b/kernel/fork.c
			
 
				+@@ -1083,6 +1083,7 @@ static struct mm_struct *mm_init(struct
			
 
				+ 		goto fail_nocontext;
			
 
				+ 
			
 
				+ 	mm->user_ns = get_user_ns(user_ns);
			
 
				++	lru_gen_init_mm(mm);
			
 
				+ 	return mm;
			
 
				+ 
			
 
				+ fail_nocontext:
			
 
				+@@ -1125,6 +1126,7 @@ static inline void __mmput(struct mm_str
			
 
				+ 	}
			
 
				+ 	if (mm->binfmt)
			
 
				+ 		module_put(mm->binfmt->module);
			
 
				++	lru_gen_del_mm(mm);
			
 
				+ 	mmdrop(mm);
			
 
				+ }
			
 
				+ 
			
 
				+@@ -2622,6 +2624,13 @@ pid_t kernel_clone(struct kernel_clone_a
			
 
				+ 		get_task_struct(p);
			
 
				+ 	}
			
 
				+ 
			
 
				++	if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
			
 
				++		/* lock the task to synchronize with memcg migration */
			
 
				++		task_lock(p);
			
 
				++		lru_gen_add_mm(p->mm);
			
 
				++		task_unlock(p);
			
 
				++	}
			
 
				++
			
 
				+ 	wake_up_new_task(p);
			
 
				+ 
			
 
				+ 	/* forking complete and child started to run, tell ptracer */
			
 
				+--- a/kernel/sched/core.c
			
 
				++++ b/kernel/sched/core.c
			
 
				+@@ -5010,6 +5010,7 @@ context_switch(struct rq *rq, struct tas
			
 
				+ 		 * finish_task_switch()'s mmdrop().
			
 
				+ 		 */
			
 
				+ 		switch_mm_irqs_off(prev->active_mm, next->mm, next);
			
 
				++		lru_gen_use_mm(next->mm);
			
 
				+ 
			
 
				+ 		if (!prev->mm) {                        // from kernel
			
 
				+ 			/* will mmdrop() in finish_task_switch(). */
			
 
				+--- a/mm/memcontrol.c
			
 
				++++ b/mm/memcontrol.c
			
 
				+@@ -6212,6 +6212,30 @@ static void mem_cgroup_move_task(void)
			
 
				+ }
			
 
				+ #endif
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++static void mem_cgroup_attach(struct cgroup_taskset *tset)
			
 
				++{
			
 
				++	struct task_struct *task;
			
 
				++	struct cgroup_subsys_state *css;
			
 
				++
			
 
				++	/* find the first leader if there is any */
			
 
				++	cgroup_taskset_for_each_leader(task, css, tset)
			
 
				++		break;
			
 
				++
			
 
				++	if (!task)
			
 
				++		return;
			
 
				++
			
 
				++	task_lock(task);
			
 
				++	if (task->mm && READ_ONCE(task->mm->owner) == task)
			
 
				++		lru_gen_migrate_mm(task->mm);
			
 
				++	task_unlock(task);
			
 
				++}
			
 
				++#else
			
 
				++static void mem_cgroup_attach(struct cgroup_taskset *tset)
			
 
				++{
			
 
				++}
			
 
				++#endif /* CONFIG_LRU_GEN */
			
 
				++
			
 
				+ static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
			
 
				+ {
			
 
				+ 	if (value == PAGE_COUNTER_MAX)
			
 
				+@@ -6555,6 +6579,7 @@ struct cgroup_subsys memory_cgrp_subsys
			
 
				+ 	.css_reset = mem_cgroup_css_reset,
			
 
				+ 	.css_rstat_flush = mem_cgroup_css_rstat_flush,
			
 
				+ 	.can_attach = mem_cgroup_can_attach,
			
 
				++	.attach = mem_cgroup_attach,
			
 
				+ 	.cancel_attach = mem_cgroup_cancel_attach,
			
 
				+ 	.post_attach = mem_cgroup_move_task,
			
 
				+ 	.dfl_cftypes = memory_files,
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -50,6 +50,8 @@
			
 
				+ #include <linux/printk.h>
			
 
				+ #include <linux/dax.h>
			
 
				+ #include <linux/psi.h>
			
 
				++#include <linux/pagewalk.h>
			
 
				++#include <linux/shmem_fs.h>
			
 
				+ 
			
 
				+ #include <asm/tlbflush.h>
			
 
				+ #include <asm/div64.h>
			
 
				+@@ -2853,7 +2855,7 @@ static bool can_age_anon_pages(struct pg
			
 
				+ 		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
			
 
				+ 			for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
			
 
				+ 
			
 
				+-static struct lruvec __maybe_unused *get_lruvec(struct mem_cgroup *memcg, int nid)
			
 
				++static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
			
 
				+ {
			
 
				+ 	struct pglist_data *pgdat = NODE_DATA(nid);
			
 
				+ 
			
 
				+@@ -2899,6 +2901,371 @@ static bool __maybe_unused seq_is_valid(
			
 
				+ }
			
 
				+ 
			
 
				+ /******************************************************************************
			
 
				++ *                          mm_struct list
			
 
				++ ******************************************************************************/
			
 
				++
			
 
				++static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++	static struct lru_gen_mm_list mm_list = {
			
 
				++		.fifo = LIST_HEAD_INIT(mm_list.fifo),
			
 
				++		.lock = __SPIN_LOCK_UNLOCKED(mm_list.lock),
			
 
				++	};
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++	if (memcg)
			
 
				++		return &memcg->mm_list;
			
 
				++#endif
			
 
				++	VM_WARN_ON_ONCE(!mem_cgroup_disabled());
			
 
				++
			
 
				++	return &mm_list;
			
 
				++}
			
 
				++
			
 
				++void lru_gen_add_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++	int nid;
			
 
				++	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
			
 
				++	struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(!list_empty(&mm->lru_gen.list));
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++	VM_WARN_ON_ONCE(mm->lru_gen.memcg);
			
 
				++	mm->lru_gen.memcg = memcg;
			
 
				++#endif
			
 
				++	spin_lock(&mm_list->lock);
			
 
				++
			
 
				++	for_each_node_state(nid, N_MEMORY) {
			
 
				++		struct lruvec *lruvec = get_lruvec(memcg, nid);
			
 
				++
			
 
				++		if (!lruvec)
			
 
				++			continue;
			
 
				++
			
 
				++		/* the first addition since the last iteration */
			
 
				++		if (lruvec->mm_state.tail == &mm_list->fifo)
			
 
				++			lruvec->mm_state.tail = &mm->lru_gen.list;
			
 
				++	}
			
 
				++
			
 
				++	list_add_tail(&mm->lru_gen.list, &mm_list->fifo);
			
 
				++
			
 
				++	spin_unlock(&mm_list->lock);
			
 
				++}
			
 
				++
			
 
				++void lru_gen_del_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++	int nid;
			
 
				++	struct lru_gen_mm_list *mm_list;
			
 
				++	struct mem_cgroup *memcg = NULL;
			
 
				++
			
 
				++	if (list_empty(&mm->lru_gen.list))
			
 
				++		return;
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++	memcg = mm->lru_gen.memcg;
			
 
				++#endif
			
 
				++	mm_list = get_mm_list(memcg);
			
 
				++
			
 
				++	spin_lock(&mm_list->lock);
			
 
				++
			
 
				++	for_each_node(nid) {
			
 
				++		struct lruvec *lruvec = get_lruvec(memcg, nid);
			
 
				++
			
 
				++		if (!lruvec)
			
 
				++			continue;
			
 
				++
			
 
				++		/* where the last iteration ended (exclusive) */
			
 
				++		if (lruvec->mm_state.tail == &mm->lru_gen.list)
			
 
				++			lruvec->mm_state.tail = lruvec->mm_state.tail->next;
			
 
				++
			
 
				++		/* where the current iteration continues (inclusive) */
			
 
				++		if (lruvec->mm_state.head != &mm->lru_gen.list)
			
 
				++			continue;
			
 
				++
			
 
				++		lruvec->mm_state.head = lruvec->mm_state.head->next;
			
 
				++		/* the deletion ends the current iteration */
			
 
				++		if (lruvec->mm_state.head == &mm_list->fifo)
			
 
				++			WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1);
			
 
				++	}
			
 
				++
			
 
				++	list_del_init(&mm->lru_gen.list);
			
 
				++
			
 
				++	spin_unlock(&mm_list->lock);
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++	mem_cgroup_put(mm->lru_gen.memcg);
			
 
				++	mm->lru_gen.memcg = NULL;
			
 
				++#endif
			
 
				++}
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++void lru_gen_migrate_mm(struct mm_struct *mm)
			
 
				++{
			
 
				++	struct mem_cgroup *memcg;
			
 
				++	struct task_struct *task = rcu_dereference_protected(mm->owner, true);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(task->mm != mm);
			
 
				++	lockdep_assert_held(&task->alloc_lock);
			
 
				++
			
 
				++	/* for mm_update_next_owner() */
			
 
				++	if (mem_cgroup_disabled())
			
 
				++		return;
			
 
				++
			
 
				++	rcu_read_lock();
			
 
				++	memcg = mem_cgroup_from_task(task);
			
 
				++	rcu_read_unlock();
			
 
				++	if (memcg == mm->lru_gen.memcg)
			
 
				++		return;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(!mm->lru_gen.memcg);
			
 
				++	VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list));
			
 
				++
			
 
				++	lru_gen_del_mm(mm);
			
 
				++	lru_gen_add_mm(mm);
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				++/*
			
 
				++ * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
			
 
				++ * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
			
 
				++ * bits in a bitmap, k is the number of hash functions and n is the number of
			
 
				++ * inserted items.
			
 
				++ *
			
 
				++ * Page table walkers use one of the two filters to reduce their search space.
			
 
				++ * To get rid of non-leaf entries that no longer have enough leaf entries, the
			
 
				++ * aging uses the double-buffering technique to flip to the other filter each
			
 
				++ * time it produces a new generation. For non-leaf entries that have enough
			
 
				++ * leaf entries, the aging carries them over to the next generation in
			
 
				++ * walk_pmd_range(); the eviction also report them when walking the rmap
			
 
				++ * in lru_gen_look_around().
			
 
				++ *
			
 
				++ * For future optimizations:
			
 
				++ * 1. It's not necessary to keep both filters all the time. The spare one can be
			
 
				++ *    freed after the RCU grace period and reallocated if needed again.
			
 
				++ * 2. And when reallocating, it's worth scaling its size according to the number
			
 
				++ *    of inserted entries in the other filter, to reduce the memory overhead on
			
 
				++ *    small systems and false positives on large systems.
			
 
				++ * 3. Jenkins' hash function is an alternative to Knuth's.
			
 
				++ */
			
 
				++#define BLOOM_FILTER_SHIFT	15
			
 
				++
			
 
				++static inline int filter_gen_from_seq(unsigned long seq)
			
 
				++{
			
 
				++	return seq % NR_BLOOM_FILTERS;
			
 
				++}
			
 
				++
			
 
				++static void get_item_key(void *item, int *key)
			
 
				++{
			
 
				++	u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
			
 
				++
			
 
				++	BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
			
 
				++
			
 
				++	key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
			
 
				++	key[1] = hash >> BLOOM_FILTER_SHIFT;
			
 
				++}
			
 
				++
			
 
				++static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
			
 
				++{
			
 
				++	unsigned long *filter;
			
 
				++	int gen = filter_gen_from_seq(seq);
			
 
				++
			
 
				++	filter = lruvec->mm_state.filters[gen];
			
 
				++	if (filter) {
			
 
				++		bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				++	filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
			
 
				++			       __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
			
 
				++	WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
			
 
				++}
			
 
				++
			
 
				++static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
			
 
				++{
			
 
				++	int key[2];
			
 
				++	unsigned long *filter;
			
 
				++	int gen = filter_gen_from_seq(seq);
			
 
				++
			
 
				++	filter = READ_ONCE(lruvec->mm_state.filters[gen]);
			
 
				++	if (!filter)
			
 
				++		return;
			
 
				++
			
 
				++	get_item_key(item, key);
			
 
				++
			
 
				++	if (!test_bit(key[0], filter))
			
 
				++		set_bit(key[0], filter);
			
 
				++	if (!test_bit(key[1], filter))
			
 
				++		set_bit(key[1], filter);
			
 
				++}
			
 
				++
			
 
				++static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
			
 
				++{
			
 
				++	int key[2];
			
 
				++	unsigned long *filter;
			
 
				++	int gen = filter_gen_from_seq(seq);
			
 
				++
			
 
				++	filter = READ_ONCE(lruvec->mm_state.filters[gen]);
			
 
				++	if (!filter)
			
 
				++		return true;
			
 
				++
			
 
				++	get_item_key(item, key);
			
 
				++
			
 
				++	return test_bit(key[0], filter) && test_bit(key[1], filter);
			
 
				++}
			
 
				++
			
 
				++static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
			
 
				++{
			
 
				++	int i;
			
 
				++	int hist;
			
 
				++
			
 
				++	lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock);
			
 
				++
			
 
				++	if (walk) {
			
 
				++		hist = lru_hist_from_seq(walk->max_seq);
			
 
				++
			
 
				++		for (i = 0; i < NR_MM_STATS; i++) {
			
 
				++			WRITE_ONCE(lruvec->mm_state.stats[hist][i],
			
 
				++				   lruvec->mm_state.stats[hist][i] + walk->mm_stats[i]);
			
 
				++			walk->mm_stats[i] = 0;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	if (NR_HIST_GENS > 1 && last) {
			
 
				++		hist = lru_hist_from_seq(lruvec->mm_state.seq + 1);
			
 
				++
			
 
				++		for (i = 0; i < NR_MM_STATS; i++)
			
 
				++			WRITE_ONCE(lruvec->mm_state.stats[hist][i], 0);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++static bool should_skip_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
			
 
				++{
			
 
				++	int type;
			
 
				++	unsigned long size = 0;
			
 
				++	struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
			
 
				++	int key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap);
			
 
				++
			
 
				++	if (!walk->force_scan && !test_bit(key, &mm->lru_gen.bitmap))
			
 
				++		return true;
			
 
				++
			
 
				++	clear_bit(key, &mm->lru_gen.bitmap);
			
 
				++
			
 
				++	for (type = !walk->can_swap; type < ANON_AND_FILE; type++) {
			
 
				++		size += type ? get_mm_counter(mm, MM_FILEPAGES) :
			
 
				++			       get_mm_counter(mm, MM_ANONPAGES) +
			
 
				++			       get_mm_counter(mm, MM_SHMEMPAGES);
			
 
				++	}
			
 
				++
			
 
				++	if (size < MIN_LRU_BATCH)
			
 
				++		return true;
			
 
				++
			
 
				++	return !mmget_not_zero(mm);
			
 
				++}
			
 
				++
			
 
				++static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
			
 
				++			    struct mm_struct **iter)
			
 
				++{
			
 
				++	bool first = false;
			
 
				++	bool last = true;
			
 
				++	struct mm_struct *mm = NULL;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
			
 
				++	struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
			
 
				++
			
 
				++	/*
			
 
				++	 * There are four interesting cases for this page table walker:
			
 
				++	 * 1. It tries to start a new iteration of mm_list with a stale max_seq;
			
 
				++	 *    there is nothing left to do.
			
 
				++	 * 2. It's the first of the current generation, and it needs to reset
			
 
				++	 *    the Bloom filter for the next generation.
			
 
				++	 * 3. It reaches the end of mm_list, and it needs to increment
			
 
				++	 *    mm_state->seq; the iteration is done.
			
 
				++	 * 4. It's the last of the current generation, and it needs to reset the
			
 
				++	 *    mm stats counters for the next generation.
			
 
				++	 */
			
 
				++	spin_lock(&mm_list->lock);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq);
			
 
				++	VM_WARN_ON_ONCE(*iter && mm_state->seq > walk->max_seq);
			
 
				++	VM_WARN_ON_ONCE(*iter && !mm_state->nr_walkers);
			
 
				++
			
 
				++	if (walk->max_seq <= mm_state->seq) {
			
 
				++		if (!*iter)
			
 
				++			last = false;
			
 
				++		goto done;
			
 
				++	}
			
 
				++
			
 
				++	if (!mm_state->nr_walkers) {
			
 
				++		VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo);
			
 
				++
			
 
				++		mm_state->head = mm_list->fifo.next;
			
 
				++		first = true;
			
 
				++	}
			
 
				++
			
 
				++	while (!mm && mm_state->head != &mm_list->fifo) {
			
 
				++		mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list);
			
 
				++
			
 
				++		mm_state->head = mm_state->head->next;
			
 
				++
			
 
				++		/* force scan for those added after the last iteration */
			
 
				++		if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) {
			
 
				++			mm_state->tail = mm_state->head;
			
 
				++			walk->force_scan = true;
			
 
				++		}
			
 
				++
			
 
				++		if (should_skip_mm(mm, walk))
			
 
				++			mm = NULL;
			
 
				++	}
			
 
				++
			
 
				++	if (mm_state->head == &mm_list->fifo)
			
 
				++		WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
			
 
				++done:
			
 
				++	if (*iter && !mm)
			
 
				++		mm_state->nr_walkers--;
			
 
				++	if (!*iter && mm)
			
 
				++		mm_state->nr_walkers++;
			
 
				++
			
 
				++	if (mm_state->nr_walkers)
			
 
				++		last = false;
			
 
				++
			
 
				++	if (*iter || last)
			
 
				++		reset_mm_stats(lruvec, walk, last);
			
 
				++
			
 
				++	spin_unlock(&mm_list->lock);
			
 
				++
			
 
				++	if (mm && first)
			
 
				++		reset_bloom_filter(lruvec, walk->max_seq + 1);
			
 
				++
			
 
				++	if (*iter)
			
 
				++		mmput_async(*iter);
			
 
				++
			
 
				++	*iter = mm;
			
 
				++
			
 
				++	return last;
			
 
				++}
			
 
				++
			
 
				++static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
			
 
				++{
			
 
				++	bool success = false;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
			
 
				++	struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
			
 
				++
			
 
				++	spin_lock(&mm_list->lock);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq);
			
 
				++
			
 
				++	if (max_seq > mm_state->seq && !mm_state->nr_walkers) {
			
 
				++		VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo);
			
 
				++
			
 
				++		WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
			
 
				++		reset_mm_stats(lruvec, NULL, true);
			
 
				++		success = true;
			
 
				++	}
			
 
				++
			
 
				++	spin_unlock(&mm_list->lock);
			
 
				++
			
 
				++	return success;
			
 
				++}
			
 
				++
			
 
				++/******************************************************************************
			
 
				+  *                          refault feedback loop
			
 
				+  ******************************************************************************/
			
 
				+ 
			
 
				+@@ -3048,6 +3415,118 @@ static int page_inc_gen(struct lruvec *l
			
 
				+ 	return new_gen;
			
 
				+ }
			
 
				+ 
			
 
				++static void update_batch_size(struct lru_gen_mm_walk *walk, struct page *page,
			
 
				++			      int old_gen, int new_gen)
			
 
				++{
			
 
				++	int type = page_is_file_lru(page);
			
 
				++	int zone = page_zonenum(page);
			
 
				++	int delta = thp_nr_pages(page);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(old_gen >= MAX_NR_GENS);
			
 
				++	VM_WARN_ON_ONCE(new_gen >= MAX_NR_GENS);
			
 
				++
			
 
				++	walk->batched++;
			
 
				++
			
 
				++	walk->nr_pages[old_gen][type][zone] -= delta;
			
 
				++	walk->nr_pages[new_gen][type][zone] += delta;
			
 
				++}
			
 
				++
			
 
				++static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
			
 
				++{
			
 
				++	int gen, type, zone;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	walk->batched = 0;
			
 
				++
			
 
				++	for_each_gen_type_zone(gen, type, zone) {
			
 
				++		enum lru_list lru = type * LRU_INACTIVE_FILE;
			
 
				++		int delta = walk->nr_pages[gen][type][zone];
			
 
				++
			
 
				++		if (!delta)
			
 
				++			continue;
			
 
				++
			
 
				++		walk->nr_pages[gen][type][zone] = 0;
			
 
				++		WRITE_ONCE(lrugen->nr_pages[gen][type][zone],
			
 
				++			   lrugen->nr_pages[gen][type][zone] + delta);
			
 
				++
			
 
				++		if (lru_gen_is_active(lruvec, gen))
			
 
				++			lru += LRU_ACTIVE;
			
 
				++		__update_lru_size(lruvec, lru, zone, delta);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *args)
			
 
				++{
			
 
				++	struct address_space *mapping;
			
 
				++	struct vm_area_struct *vma = args->vma;
			
 
				++	struct lru_gen_mm_walk *walk = args->private;
			
 
				++
			
 
				++	if (!vma_is_accessible(vma))
			
 
				++		return true;
			
 
				++
			
 
				++	if (is_vm_hugetlb_page(vma))
			
 
				++		return true;
			
 
				++
			
 
				++	if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ))
			
 
				++		return true;
			
 
				++
			
 
				++	if (vma == get_gate_vma(vma->vm_mm))
			
 
				++		return true;
			
 
				++
			
 
				++	if (vma_is_anonymous(vma))
			
 
				++		return !walk->can_swap;
			
 
				++
			
 
				++	if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping))
			
 
				++		return true;
			
 
				++
			
 
				++	mapping = vma->vm_file->f_mapping;
			
 
				++	if (mapping_unevictable(mapping))
			
 
				++		return true;
			
 
				++
			
 
				++	if (shmem_mapping(mapping))
			
 
				++		return !walk->can_swap;
			
 
				++
			
 
				++	/* to exclude special mappings like dax, etc. */
			
 
				++	return !mapping->a_ops->readpage;
			
 
				++}
			
 
				++
			
 
				++/*
			
 
				++ * Some userspace memory allocators map many single-page VMAs. Instead of
			
 
				++ * returning back to the PGD table for each of such VMAs, finish an entire PMD
			
 
				++ * table to reduce zigzags and improve cache performance.
			
 
				++ */
			
 
				++static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk *args,
			
 
				++			 unsigned long *vm_start, unsigned long *vm_end)
			
 
				++{
			
 
				++	unsigned long start = round_up(*vm_end, size);
			
 
				++	unsigned long end = (start | ~mask) + 1;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(mask & size);
			
 
				++	VM_WARN_ON_ONCE((start & mask) != (*vm_start & mask));
			
 
				++
			
 
				++	while (args->vma) {
			
 
				++		if (start >= args->vma->vm_end) {
			
 
				++			args->vma = args->vma->vm_next;
			
 
				++			continue;
			
 
				++		}
			
 
				++
			
 
				++		if (end && end <= args->vma->vm_start)
			
 
				++			return false;
			
 
				++
			
 
				++		if (should_skip_vma(args->vma->vm_start, args->vma->vm_end, args)) {
			
 
				++			args->vma = args->vma->vm_next;
			
 
				++			continue;
			
 
				++		}
			
 
				++
			
 
				++		*vm_start = max(start, args->vma->vm_start);
			
 
				++		*vm_end = min(end - 1, args->vma->vm_end - 1) + 1;
			
 
				++
			
 
				++		return true;
			
 
				++	}
			
 
				++
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				+ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
			
 
				+ {
			
 
				+ 	unsigned long pfn = pte_pfn(pte);
			
 
				+@@ -3066,8 +3545,28 @@ static unsigned long get_pte_pfn(pte_t p
			
 
				+ 	return pfn;
			
 
				+ }
			
 
				+ 
			
 
				++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
			
 
				++static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr)
			
 
				++{
			
 
				++	unsigned long pfn = pmd_pfn(pmd);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
			
 
				++
			
 
				++	if (!pmd_present(pmd) || is_huge_zero_pmd(pmd))
			
 
				++		return -1;
			
 
				++
			
 
				++	if (WARN_ON_ONCE(pmd_devmap(pmd)))
			
 
				++		return -1;
			
 
				++
			
 
				++	if (WARN_ON_ONCE(!pfn_valid(pfn)))
			
 
				++		return -1;
			
 
				++
			
 
				++	return pfn;
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				+ static struct page *get_pfn_page(unsigned long pfn, struct mem_cgroup *memcg,
			
 
				+-				 struct pglist_data *pgdat)
			
 
				++				 struct pglist_data *pgdat, bool can_swap)
			
 
				+ {
			
 
				+ 	struct page *page;
			
 
				+ 
			
 
				+@@ -3082,9 +3581,375 @@ static struct page *get_pfn_page(unsigne
			
 
				+ 	if (page_memcg_rcu(page) != memcg)
			
 
				+ 		return NULL;
			
 
				+ 
			
 
				++	/* file VMAs can contain anon pages from COW */
			
 
				++	if (!page_is_file_lru(page) && !can_swap)
			
 
				++		return NULL;
			
 
				++
			
 
				+ 	return page;
			
 
				+ }
			
 
				+ 
			
 
				++static bool suitable_to_scan(int total, int young)
			
 
				++{
			
 
				++	int n = clamp_t(int, cache_line_size() / sizeof(pte_t), 2, 8);
			
 
				++
			
 
				++	/* suitable if the average number of young PTEs per cacheline is >=1 */
			
 
				++	return young * n >= total;
			
 
				++}
			
 
				++
			
 
				++static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
			
 
				++			   struct mm_walk *args)
			
 
				++{
			
 
				++	int i;
			
 
				++	pte_t *pte;
			
 
				++	spinlock_t *ptl;
			
 
				++	unsigned long addr;
			
 
				++	int total = 0;
			
 
				++	int young = 0;
			
 
				++	struct lru_gen_mm_walk *walk = args->private;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
			
 
				++	struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
			
 
				++	int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(pmd_leaf(*pmd));
			
 
				++
			
 
				++	ptl = pte_lockptr(args->mm, pmd);
			
 
				++	if (!spin_trylock(ptl))
			
 
				++		return false;
			
 
				++
			
 
				++	arch_enter_lazy_mmu_mode();
			
 
				++
			
 
				++	pte = pte_offset_map(pmd, start & PMD_MASK);
			
 
				++restart:
			
 
				++	for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) {
			
 
				++		unsigned long pfn;
			
 
				++		struct page *page;
			
 
				++
			
 
				++		total++;
			
 
				++		walk->mm_stats[MM_LEAF_TOTAL]++;
			
 
				++
			
 
				++		pfn = get_pte_pfn(pte[i], args->vma, addr);
			
 
				++		if (pfn == -1)
			
 
				++			continue;
			
 
				++
			
 
				++		if (!pte_young(pte[i])) {
			
 
				++			walk->mm_stats[MM_LEAF_OLD]++;
			
 
				++			continue;
			
 
				++		}
			
 
				++
			
 
				++		page = get_pfn_page(pfn, memcg, pgdat, walk->can_swap);
			
 
				++		if (!page)
			
 
				++			continue;
			
 
				++
			
 
				++		if (!ptep_test_and_clear_young(args->vma, addr, pte + i))
			
 
				++			VM_WARN_ON_ONCE(true);
			
 
				++
			
 
				++		young++;
			
 
				++		walk->mm_stats[MM_LEAF_YOUNG]++;
			
 
				++
			
 
				++		if (pte_dirty(pte[i]) && !PageDirty(page) &&
			
 
				++		    !(PageAnon(page) && PageSwapBacked(page) &&
			
 
				++		      !PageSwapCache(page)))
			
 
				++			set_page_dirty(page);
			
 
				++
			
 
				++		old_gen = page_update_gen(page, new_gen);
			
 
				++		if (old_gen >= 0 && old_gen != new_gen)
			
 
				++			update_batch_size(walk, page, old_gen, new_gen);
			
 
				++	}
			
 
				++
			
 
				++	if (i < PTRS_PER_PTE && get_next_vma(PMD_MASK, PAGE_SIZE, args, &start, &end))
			
 
				++		goto restart;
			
 
				++
			
 
				++	pte_unmap(pte);
			
 
				++
			
 
				++	arch_leave_lazy_mmu_mode();
			
 
				++	spin_unlock(ptl);
			
 
				++
			
 
				++	return suitable_to_scan(total, young);
			
 
				++}
			
 
				++
			
 
				++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
			
 
				++static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma,
			
 
				++				  struct mm_walk *args, unsigned long *bitmap, unsigned long *start)
			
 
				++{
			
 
				++	int i;
			
 
				++	pmd_t *pmd;
			
 
				++	spinlock_t *ptl;
			
 
				++	struct lru_gen_mm_walk *walk = args->private;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
			
 
				++	struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
			
 
				++	int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(pud_leaf(*pud));
			
 
				++
			
 
				++	/* try to batch at most 1+MIN_LRU_BATCH+1 entries */
			
 
				++	if (*start == -1) {
			
 
				++		*start = next;
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				++	i = next == -1 ? 0 : pmd_index(next) - pmd_index(*start);
			
 
				++	if (i && i <= MIN_LRU_BATCH) {
			
 
				++		__set_bit(i - 1, bitmap);
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				++	pmd = pmd_offset(pud, *start);
			
 
				++
			
 
				++	ptl = pmd_lockptr(args->mm, pmd);
			
 
				++	if (!spin_trylock(ptl))
			
 
				++		goto done;
			
 
				++
			
 
				++	arch_enter_lazy_mmu_mode();
			
 
				++
			
 
				++	do {
			
 
				++		unsigned long pfn;
			
 
				++		struct page *page;
			
 
				++		unsigned long addr = i ? (*start & PMD_MASK) + i * PMD_SIZE : *start;
			
 
				++
			
 
				++		pfn = get_pmd_pfn(pmd[i], vma, addr);
			
 
				++		if (pfn == -1)
			
 
				++			goto next;
			
 
				++
			
 
				++		if (!pmd_trans_huge(pmd[i])) {
			
 
				++			if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG))
			
 
				++				pmdp_test_and_clear_young(vma, addr, pmd + i);
			
 
				++			goto next;
			
 
				++		}
			
 
				++
			
 
				++		page = get_pfn_page(pfn, memcg, pgdat, walk->can_swap);
			
 
				++		if (!page)
			
 
				++			goto next;
			
 
				++
			
 
				++		if (!pmdp_test_and_clear_young(vma, addr, pmd + i))
			
 
				++			goto next;
			
 
				++
			
 
				++		walk->mm_stats[MM_LEAF_YOUNG]++;
			
 
				++
			
 
				++		if (pmd_dirty(pmd[i]) && !PageDirty(page) &&
			
 
				++		    !(PageAnon(page) && PageSwapBacked(page) &&
			
 
				++		      !PageSwapCache(page)))
			
 
				++			set_page_dirty(page);
			
 
				++
			
 
				++		old_gen = page_update_gen(page, new_gen);
			
 
				++		if (old_gen >= 0 && old_gen != new_gen)
			
 
				++			update_batch_size(walk, page, old_gen, new_gen);
			
 
				++next:
			
 
				++		i = i > MIN_LRU_BATCH ? 0 : find_next_bit(bitmap, MIN_LRU_BATCH, i) + 1;
			
 
				++	} while (i <= MIN_LRU_BATCH);
			
 
				++
			
 
				++	arch_leave_lazy_mmu_mode();
			
 
				++	spin_unlock(ptl);
			
 
				++done:
			
 
				++	*start = -1;
			
 
				++	bitmap_zero(bitmap, MIN_LRU_BATCH);
			
 
				++}
			
 
				++#else
			
 
				++static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma,
			
 
				++				  struct mm_walk *args, unsigned long *bitmap, unsigned long *start)
			
 
				++{
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				++static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
			
 
				++			   struct mm_walk *args)
			
 
				++{
			
 
				++	int i;
			
 
				++	pmd_t *pmd;
			
 
				++	unsigned long next;
			
 
				++	unsigned long addr;
			
 
				++	struct vm_area_struct *vma;
			
 
				++	unsigned long pos = -1;
			
 
				++	struct lru_gen_mm_walk *walk = args->private;
			
 
				++	unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(pud_leaf(*pud));
			
 
				++
			
 
				++	/*
			
 
				++	 * Finish an entire PMD in two passes: the first only reaches to PTE
			
 
				++	 * tables to avoid taking the PMD lock; the second, if necessary, takes
			
 
				++	 * the PMD lock to clear the accessed bit in PMD entries.
			
 
				++	 */
			
 
				++	pmd = pmd_offset(pud, start & PUD_MASK);
			
 
				++restart:
			
 
				++	/* walk_pte_range() may call get_next_vma() */
			
 
				++	vma = args->vma;
			
 
				++	for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) {
			
 
				++		pmd_t val = pmd_read_atomic(pmd + i);
			
 
				++
			
 
				++		/* for pmd_read_atomic() */
			
 
				++		barrier();
			
 
				++
			
 
				++		next = pmd_addr_end(addr, end);
			
 
				++
			
 
				++		if (!pmd_present(val) || is_huge_zero_pmd(val)) {
			
 
				++			walk->mm_stats[MM_LEAF_TOTAL]++;
			
 
				++			continue;
			
 
				++		}
			
 
				++
			
 
				++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
			
 
				++		if (pmd_trans_huge(val)) {
			
 
				++			unsigned long pfn = pmd_pfn(val);
			
 
				++			struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
			
 
				++
			
 
				++			walk->mm_stats[MM_LEAF_TOTAL]++;
			
 
				++
			
 
				++			if (!pmd_young(val)) {
			
 
				++				walk->mm_stats[MM_LEAF_OLD]++;
			
 
				++				continue;
			
 
				++			}
			
 
				++
			
 
				++			/* try to avoid unnecessary memory loads */
			
 
				++			if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
			
 
				++				continue;
			
 
				++
			
 
				++			walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
			
 
				++			continue;
			
 
				++		}
			
 
				++#endif
			
 
				++		walk->mm_stats[MM_NONLEAF_TOTAL]++;
			
 
				++
			
 
				++#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
			
 
				++		if (!pmd_young(val))
			
 
				++			continue;
			
 
				++
			
 
				++		walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
			
 
				++#endif
			
 
				++		if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
			
 
				++			continue;
			
 
				++
			
 
				++		walk->mm_stats[MM_NONLEAF_FOUND]++;
			
 
				++
			
 
				++		if (!walk_pte_range(&val, addr, next, args))
			
 
				++			continue;
			
 
				++
			
 
				++		walk->mm_stats[MM_NONLEAF_ADDED]++;
			
 
				++
			
 
				++		/* carry over to the next generation */
			
 
				++		update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i);
			
 
				++	}
			
 
				++
			
 
				++	walk_pmd_range_locked(pud, -1, vma, args, bitmap, &pos);
			
 
				++
			
 
				++	if (i < PTRS_PER_PMD && get_next_vma(PUD_MASK, PMD_SIZE, args, &start, &end))
			
 
				++		goto restart;
			
 
				++}
			
 
				++
			
 
				++static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end,
			
 
				++			  struct mm_walk *args)
			
 
				++{
			
 
				++	int i;
			
 
				++	pud_t *pud;
			
 
				++	unsigned long addr;
			
 
				++	unsigned long next;
			
 
				++	struct lru_gen_mm_walk *walk = args->private;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(p4d_leaf(*p4d));
			
 
				++
			
 
				++	pud = pud_offset(p4d, start & P4D_MASK);
			
 
				++restart:
			
 
				++	for (i = pud_index(start), addr = start; addr != end; i++, addr = next) {
			
 
				++		pud_t val = READ_ONCE(pud[i]);
			
 
				++
			
 
				++		next = pud_addr_end(addr, end);
			
 
				++
			
 
				++		if (!pud_present(val) || WARN_ON_ONCE(pud_leaf(val)))
			
 
				++			continue;
			
 
				++
			
 
				++		walk_pmd_range(&val, addr, next, args);
			
 
				++
			
 
				++		/* a racy check to curtail the waiting time */
			
 
				++		if (wq_has_sleeper(&walk->lruvec->mm_state.wait))
			
 
				++			return 1;
			
 
				++
			
 
				++		if (need_resched() || walk->batched >= MAX_LRU_BATCH) {
			
 
				++			end = (addr | ~PUD_MASK) + 1;
			
 
				++			goto done;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	if (i < PTRS_PER_PUD && get_next_vma(P4D_MASK, PUD_SIZE, args, &start, &end))
			
 
				++		goto restart;
			
 
				++
			
 
				++	end = round_up(end, P4D_SIZE);
			
 
				++done:
			
 
				++	if (!end || !args->vma)
			
 
				++		return 1;
			
 
				++
			
 
				++	walk->next_addr = max(end, args->vma->vm_start);
			
 
				++
			
 
				++	return -EAGAIN;
			
 
				++}
			
 
				++
			
 
				++static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_mm_walk *walk)
			
 
				++{
			
 
				++	static const struct mm_walk_ops mm_walk_ops = {
			
 
				++		.test_walk = should_skip_vma,
			
 
				++		.p4d_entry = walk_pud_range,
			
 
				++	};
			
 
				++
			
 
				++	int err;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++
			
 
				++	walk->next_addr = FIRST_USER_ADDRESS;
			
 
				++
			
 
				++	do {
			
 
				++		err = -EBUSY;
			
 
				++
			
 
				++		/* page_update_gen() requires stable page_memcg() */
			
 
				++		if (!mem_cgroup_trylock_pages(memcg))
			
 
				++			break;
			
 
				++
			
 
				++		/* the caller might be holding the lock for write */
			
 
				++		if (mmap_read_trylock(mm)) {
			
 
				++			err = walk_page_range(mm, walk->next_addr, ULONG_MAX, &mm_walk_ops, walk);
			
 
				++
			
 
				++			mmap_read_unlock(mm);
			
 
				++		}
			
 
				++
			
 
				++		mem_cgroup_unlock_pages();
			
 
				++
			
 
				++		if (walk->batched) {
			
 
				++			spin_lock_irq(&lruvec->lru_lock);
			
 
				++			reset_batch_size(lruvec, walk);
			
 
				++			spin_unlock_irq(&lruvec->lru_lock);
			
 
				++		}
			
 
				++
			
 
				++		cond_resched();
			
 
				++	} while (err == -EAGAIN);
			
 
				++}
			
 
				++
			
 
				++static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
			
 
				++{
			
 
				++	struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
			
 
				++
			
 
				++	if (pgdat && current_is_kswapd()) {
			
 
				++		VM_WARN_ON_ONCE(walk);
			
 
				++
			
 
				++		walk = &pgdat->mm_walk;
			
 
				++	} else if (!pgdat && !walk) {
			
 
				++		VM_WARN_ON_ONCE(current_is_kswapd());
			
 
				++
			
 
				++		walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
			
 
				++	}
			
 
				++
			
 
				++	current->reclaim_state->mm_walk = walk;
			
 
				++
			
 
				++	return walk;
			
 
				++}
			
 
				++
			
 
				++static void clear_mm_walk(void)
			
 
				++{
			
 
				++	struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(walk && memchr_inv(walk->nr_pages, 0, sizeof(walk->nr_pages)));
			
 
				++	VM_WARN_ON_ONCE(walk && memchr_inv(walk->mm_stats, 0, sizeof(walk->mm_stats)));
			
 
				++
			
 
				++	current->reclaim_state->mm_walk = NULL;
			
 
				++
			
 
				++	if (!current_is_kswapd())
			
 
				++		kfree(walk);
			
 
				++}
			
 
				++
			
 
				+ static void inc_min_seq(struct lruvec *lruvec, int type)
			
 
				+ {
			
 
				+ 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				+@@ -3136,7 +4001,7 @@ next:
			
 
				+ 	return success;
			
 
				+ }
			
 
				+ 
			
 
				+-static void inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, bool can_swap)
			
 
				++static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
			
 
				+ {
			
 
				+ 	int prev, next;
			
 
				+ 	int type, zone;
			
 
				+@@ -3146,9 +4011,6 @@ static void inc_max_seq(struct lruvec *l
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
			
 
				+ 
			
 
				+-	if (max_seq != lrugen->max_seq)
			
 
				+-		goto unlock;
			
 
				+-
			
 
				+ 	for (type = ANON_AND_FILE - 1; type >= 0; type--) {
			
 
				+ 		if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
			
 
				+ 			continue;
			
 
				+@@ -3186,10 +4048,76 @@ static void inc_max_seq(struct lruvec *l
			
 
				+ 
			
 
				+ 	/* make sure preceding modifications appear */
			
 
				+ 	smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
			
 
				+-unlock:
			
 
				++
			
 
				+ 	spin_unlock_irq(&lruvec->lru_lock);
			
 
				+ }
			
 
				+ 
			
 
				++static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
			
 
				++			       struct scan_control *sc, bool can_swap)
			
 
				++{
			
 
				++	bool success;
			
 
				++	struct lru_gen_mm_walk *walk;
			
 
				++	struct mm_struct *mm = NULL;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
			
 
				++
			
 
				++	/* see the comment in iterate_mm_list() */
			
 
				++	if (max_seq <= READ_ONCE(lruvec->mm_state.seq)) {
			
 
				++		success = false;
			
 
				++		goto done;
			
 
				++	}
			
 
				++
			
 
				++	/*
			
 
				++	 * If the hardware doesn't automatically set the accessed bit, fallback
			
 
				++	 * to lru_gen_look_around(), which only clears the accessed bit in a
			
 
				++	 * handful of PTEs. Spreading the work out over a period of time usually
			
 
				++	 * is less efficient, but it avoids bursty page faults.
			
 
				++	 */
			
 
				++	if (!arch_has_hw_pte_young()) {
			
 
				++		success = iterate_mm_list_nowalk(lruvec, max_seq);
			
 
				++		goto done;
			
 
				++	}
			
 
				++
			
 
				++	walk = set_mm_walk(NULL);
			
 
				++	if (!walk) {
			
 
				++		success = iterate_mm_list_nowalk(lruvec, max_seq);
			
 
				++		goto done;
			
 
				++	}
			
 
				++
			
 
				++	walk->lruvec = lruvec;
			
 
				++	walk->max_seq = max_seq;
			
 
				++	walk->can_swap = can_swap;
			
 
				++	walk->force_scan = false;
			
 
				++
			
 
				++	do {
			
 
				++		success = iterate_mm_list(lruvec, walk, &mm);
			
 
				++		if (mm)
			
 
				++			walk_mm(lruvec, mm, walk);
			
 
				++
			
 
				++		cond_resched();
			
 
				++	} while (mm);
			
 
				++done:
			
 
				++	if (!success) {
			
 
				++		if (sc->priority <= DEF_PRIORITY - 2)
			
 
				++			wait_event_killable(lruvec->mm_state.wait,
			
 
				++					    max_seq < READ_ONCE(lrugen->max_seq));
			
 
				++
			
 
				++		return max_seq < READ_ONCE(lrugen->max_seq);
			
 
				++	}
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
			
 
				++
			
 
				++	inc_max_seq(lruvec, can_swap);
			
 
				++	/* either this sees any waiters or they will see updated max_seq */
			
 
				++	if (wq_has_sleeper(&lruvec->mm_state.wait))
			
 
				++		wake_up_all(&lruvec->mm_state.wait);
			
 
				++
			
 
				++	wakeup_flusher_threads(WB_REASON_VMSCAN);
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				+ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
			
 
				+ 			     struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
			
 
				+ {
			
 
				+@@ -3265,7 +4193,7 @@ static void age_lruvec(struct lruvec *lr
			
 
				+ 
			
 
				+ 	need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
			
 
				+ 	if (need_aging)
			
 
				+-		inc_max_seq(lruvec, max_seq, swappiness);
			
 
				++		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
			
 
				+ }
			
 
				+ 
			
 
				+ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				+@@ -3274,6 +4202,8 @@ static void lru_gen_age_node(struct pgli
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(!current_is_kswapd());
			
 
				+ 
			
 
				++	set_mm_walk(pgdat);
			
 
				++
			
 
				+ 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
			
 
				+ 	do {
			
 
				+ 		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
			
 
				+@@ -3282,11 +4212,16 @@ static void lru_gen_age_node(struct pgli
			
 
				+ 
			
 
				+ 		cond_resched();
			
 
				+ 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
			
 
				++
			
 
				++	clear_mm_walk();
			
 
				+ }
			
 
				+ 
			
 
				+ /*
			
 
				+  * This function exploits spatial locality when shrink_page_list() walks the
			
 
				+- * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages.
			
 
				++ * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. If
			
 
				++ * the scan was done cacheline efficiently, it adds the PMD entry pointing to
			
 
				++ * the PTE table to the Bloom filter. This forms a feedback loop between the
			
 
				++ * eviction and the aging.
			
 
				+  */
			
 
				+ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
			
 
				+ {
			
 
				+@@ -3295,6 +4230,8 @@ void lru_gen_look_around(struct page_vma
			
 
				+ 	unsigned long start;
			
 
				+ 	unsigned long end;
			
 
				+ 	unsigned long addr;
			
 
				++	struct lru_gen_mm_walk *walk;
			
 
				++	int young = 0;
			
 
				+ 	unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
			
 
				+ 	struct page *page = pvmw->page;
			
 
				+ 	struct mem_cgroup *memcg = page_memcg(page);
			
 
				+@@ -3309,6 +4246,9 @@ void lru_gen_look_around(struct page_vma
			
 
				+ 	if (spin_is_contended(pvmw->ptl))
			
 
				+ 		return;
			
 
				+ 
			
 
				++	/* avoid taking the LRU lock under the PTL when possible */
			
 
				++	walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
			
 
				++
			
 
				+ 	start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
			
 
				+ 	end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
			
 
				+ 
			
 
				+@@ -3338,13 +4278,15 @@ void lru_gen_look_around(struct page_vma
			
 
				+ 		if (!pte_young(pte[i]))
			
 
				+ 			continue;
			
 
				+ 
			
 
				+-		page = get_pfn_page(pfn, memcg, pgdat);
			
 
				++		page = get_pfn_page(pfn, memcg, pgdat, !walk || walk->can_swap);
			
 
				+ 		if (!page)
			
 
				+ 			continue;
			
 
				+ 
			
 
				+ 		if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
			
 
				+ 			VM_WARN_ON_ONCE(true);
			
 
				+ 
			
 
				++		young++;
			
 
				++
			
 
				+ 		if (pte_dirty(pte[i]) && !PageDirty(page) &&
			
 
				+ 		    !(PageAnon(page) && PageSwapBacked(page) &&
			
 
				+ 		      !PageSwapCache(page)))
			
 
				+@@ -3360,7 +4302,11 @@ void lru_gen_look_around(struct page_vma
			
 
				+ 	arch_leave_lazy_mmu_mode();
			
 
				+ 	rcu_read_unlock();
			
 
				+ 
			
 
				+-	if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
			
 
				++	/* feedback from rmap walkers to page table walkers */
			
 
				++	if (suitable_to_scan(i, young))
			
 
				++		update_bloom_filter(lruvec, max_seq, pvmw->pmd);
			
 
				++
			
 
				++	if (!walk && bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
			
 
				+ 		for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
			
 
				+ 			page = pte_page(pte[i]);
			
 
				+ 			activate_page(page);
			
 
				+@@ -3372,8 +4318,10 @@ void lru_gen_look_around(struct page_vma
			
 
				+ 	if (!mem_cgroup_trylock_pages(memcg))
			
 
				+ 		return;
			
 
				+ 
			
 
				+-	spin_lock_irq(&lruvec->lru_lock);
			
 
				+-	new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
			
 
				++	if (!walk) {
			
 
				++		spin_lock_irq(&lruvec->lru_lock);
			
 
				++		new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
			
 
				++	}
			
 
				+ 
			
 
				+ 	for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
			
 
				+ 		page = compound_head(pte_page(pte[i]));
			
 
				+@@ -3384,10 +4332,14 @@ void lru_gen_look_around(struct page_vma
			
 
				+ 		if (old_gen < 0 || old_gen == new_gen)
			
 
				+ 			continue;
			
 
				+ 
			
 
				+-		lru_gen_update_size(lruvec, page, old_gen, new_gen);
			
 
				++		if (walk)
			
 
				++			update_batch_size(walk, page, old_gen, new_gen);
			
 
				++		else
			
 
				++			lru_gen_update_size(lruvec, page, old_gen, new_gen);
			
 
				+ 	}
			
 
				+ 
			
 
				+-	spin_unlock_irq(&lruvec->lru_lock);
			
 
				++	if (!walk)
			
 
				++		spin_unlock_irq(&lruvec->lru_lock);
			
 
				+ 
			
 
				+ 	mem_cgroup_unlock_pages();
			
 
				+ }
			
 
				+@@ -3670,6 +4622,7 @@ static int evict_pages(struct lruvec *lr
			
 
				+ 	struct page *page;
			
 
				+ 	enum vm_event_item item;
			
 
				+ 	struct reclaim_stat stat;
			
 
				++	struct lru_gen_mm_walk *walk;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
			
 
				+ 
			
 
				+@@ -3706,6 +4659,10 @@ static int evict_pages(struct lruvec *lr
			
 
				+ 
			
 
				+ 	move_pages_to_lru(lruvec, &list);
			
 
				+ 
			
 
				++	walk = current->reclaim_state->mm_walk;
			
 
				++	if (walk && walk->batched)
			
 
				++		reset_batch_size(lruvec, walk);
			
 
				++
			
 
				+ 	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
			
 
				+ 	if (!cgroup_reclaim(sc))
			
 
				+ 		__count_vm_events(item, reclaimed);
			
 
				+@@ -3722,6 +4679,11 @@ static int evict_pages(struct lruvec *lr
			
 
				+ 	return scanned;
			
 
				+ }
			
 
				+ 
			
 
				++/*
			
 
				++ * For future optimizations:
			
 
				++ * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
			
 
				++ *    reclaim.
			
 
				++ */
			
 
				+ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
			
 
				+ 				    bool can_swap)
			
 
				+ {
			
 
				+@@ -3747,7 +4709,8 @@ static unsigned long get_nr_to_scan(stru
			
 
				+ 	if (current_is_kswapd())
			
 
				+ 		return 0;
			
 
				+ 
			
 
				+-	inc_max_seq(lruvec, max_seq, can_swap);
			
 
				++	if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap))
			
 
				++		return nr_to_scan;
			
 
				+ done:
			
 
				+ 	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
			
 
				+ }
			
 
				+@@ -3761,6 +4724,8 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 
			
 
				+ 	blk_start_plug(&plug);
			
 
				+ 
			
 
				++	set_mm_walk(lruvec_pgdat(lruvec));
			
 
				++
			
 
				+ 	while (true) {
			
 
				+ 		int delta;
			
 
				+ 		int swappiness;
			
 
				+@@ -3788,6 +4753,8 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 		cond_resched();
			
 
				+ 	}
			
 
				+ 
			
 
				++	clear_mm_walk();
			
 
				++
			
 
				+ 	blk_finish_plug(&plug);
			
 
				+ }
			
 
				+ 
			
 
				+@@ -3804,15 +4771,21 @@ void lru_gen_init_lruvec(struct lruvec *
			
 
				+ 
			
 
				+ 	for_each_gen_type_zone(gen, type, zone)
			
 
				+ 		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
			
 
				++
			
 
				++	lruvec->mm_state.seq = MIN_NR_GENS;
			
 
				++	init_waitqueue_head(&lruvec->mm_state.wait);
			
 
				+ }
			
 
				+ 
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				+ void lru_gen_init_memcg(struct mem_cgroup *memcg)
			
 
				+ {
			
 
				++	INIT_LIST_HEAD(&memcg->mm_list.fifo);
			
 
				++	spin_lock_init(&memcg->mm_list.lock);
			
 
				+ }
			
 
				+ 
			
 
				+ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
			
 
				+ {
			
 
				++	int i;
			
 
				+ 	int nid;
			
 
				+ 
			
 
				+ 	for_each_node(nid) {
			
 
				+@@ -3820,6 +4793,11 @@ void lru_gen_exit_memcg(struct mem_cgrou
			
 
				+ 
			
 
				+ 		VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
			
 
				+ 					   sizeof(lruvec->lrugen.nr_pages)));
			
 
				++
			
 
				++		for (i = 0; i < NR_BLOOM_FILTERS; i++) {
			
 
				++			bitmap_free(lruvec->mm_state.filters[i]);
			
 
				++			lruvec->mm_state.filters[i] = NULL;
			
 
				++		}
			
 
				+ 	}
			
 
				+ }
			
 
				+ #endif
			
--- a/target/linux/generic/backport-6.1/020-v6.1-09-mm-multi-gen-LRU-optimize-multiple-memcgs.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-09-mm-multi-gen-LRU-optimize-multiple-memcgs.patch
@@ -0,0 +1,315 @@
 
				+From 36a18a68ea458e8f4db2ca86b00091daf32c6c74 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:06 -0600
			
 
				+Subject: [PATCH 09/29] mm: multi-gen LRU: optimize multiple memcgs
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+When multiple memcgs are available, it is possible to use generations as a
			
 
				+frame of reference to make better choices and improve overall performance
			
 
				+under global memory pressure.  This patch adds a basic optimization to
			
 
				+select memcgs that can drop single-use unmapped clean pages first.  Doing
			
 
				+so reduces the chance of going into the aging path or swapping, which can
			
 
				+be costly.
			
 
				+
			
 
				+A typical example that benefits from this optimization is a server running
			
 
				+mixed types of workloads, e.g., heavy anon workload in one memcg and heavy
			
 
				+buffered I/O workload in the other.
			
 
				+
			
 
				+Though this optimization can be applied to both kswapd and direct reclaim,
			
 
				+it is only added to kswapd to keep the patchset manageable.  Later
			
 
				+improvements may cover the direct reclaim path.
			
 
				+
			
 
				+While ensuring certain fairness to all eligible memcgs, proportional scans
			
 
				+of individual memcgs also require proper backoff to avoid overshooting
			
 
				+their aggregate reclaim target by too much.  Otherwise it can cause high
			
 
				+direct reclaim latency.  The conditions for backoff are:
			
 
				+
			
 
				+1. At low priorities, for direct reclaim, if aging fairness or direct
			
 
				+   reclaim latency is at risk, i.e., aging one memcg multiple times or
			
 
				+   swapping after the target is met.
			
 
				+2. At high priorities, for global reclaim, if per-zone free pages are
			
 
				+   above respective watermarks.
			
 
				+
			
 
				+Server benchmark results:
			
 
				+  Mixed workloads:
			
 
				+    fio (buffered I/O): +[19, 21]%
			
 
				+                IOPS         BW
			
 
				+      patch1-8: 1880k        7343MiB/s
			
 
				+      patch1-9: 2252k        8796MiB/s
			
 
				+
			
 
				+    memcached (anon): +[119, 123]%
			
 
				+                Ops/sec      KB/sec
			
 
				+      patch1-8: 862768.65    33514.68
			
 
				+      patch1-9: 1911022.12   74234.54
			
 
				+
			
 
				+  Mixed workloads:
			
 
				+    fio (buffered I/O): +[75, 77]%
			
 
				+                IOPS         BW
			
 
				+      5.19-rc1: 1279k        4996MiB/s
			
 
				+      patch1-9: 2252k        8796MiB/s
			
 
				+
			
 
				+    memcached (anon): +[13, 15]%
			
 
				+                Ops/sec      KB/sec
			
 
				+      5.19-rc1: 1673524.04   65008.87
			
 
				+      patch1-9: 1911022.12   74234.54
			
 
				+
			
 
				+  Configurations:
			
 
				+    (changes since patch 6)
			
 
				+
			
 
				+    cat mixed.sh
			
 
				+    modprobe brd rd_nr=2 rd_size=56623104
			
 
				+
			
 
				+    swapoff -a
			
 
				+    mkswap /dev/ram0
			
 
				+    swapon /dev/ram0
			
 
				+
			
 
				+    mkfs.ext4 /dev/ram1
			
 
				+    mount -t ext4 /dev/ram1 /mnt
			
 
				+
			
 
				+    memtier_benchmark -S /var/run/memcached/memcached.sock \
			
 
				+      -P memcache_binary -n allkeys --key-minimum=1 \
			
 
				+      --key-maximum=50000000 --key-pattern=P:P -c 1 -t 36 \
			
 
				+      --ratio 1:0 --pipeline 8 -d 2000
			
 
				+
			
 
				+    fio -name=mglru --numjobs=36 --directory=/mnt --size=1408m \
			
 
				+      --buffered=1 --ioengine=io_uring --iodepth=128 \
			
 
				+      --iodepth_batch_submit=32 --iodepth_batch_complete=32 \
			
 
				+      --rw=randread --random_distribution=random --norandommap \
			
 
				+      --time_based --ramp_time=10m --runtime=90m --group_reporting &
			
 
				+    pid=$!
			
 
				+
			
 
				+    sleep 200
			
 
				+
			
 
				+    memtier_benchmark -S /var/run/memcached/memcached.sock \
			
 
				+      -P memcache_binary -n allkeys --key-minimum=1 \
			
 
				+      --key-maximum=50000000 --key-pattern=R:R -c 1 -t 36 \
			
 
				+      --ratio 0:1 --pipeline 8 --randomize --distinct-client-seed
			
 
				+
			
 
				+    kill -INT $pid
			
 
				+    wait
			
 
				+
			
 
				+Client benchmark results:
			
 
				+  no change (CONFIG_MEMCG=n)
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Barry Song <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++-----
			
 
				+ 1 file changed, 96 insertions(+), 9 deletions(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -127,6 +127,12 @@ struct scan_control {
			
 
				+ 	/* Always discard instead of demoting to lower tier memory */
			
 
				+ 	unsigned int no_demotion:1;
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN
			
 
				++	/* help kswapd make better choices among multiple memcgs */
			
 
				++	unsigned int memcgs_need_aging:1;
			
 
				++	unsigned long last_reclaimed;
			
 
				++#endif
			
 
				++
			
 
				+ 	/* Allocation order */
			
 
				+ 	s8 order;
			
 
				+ 
			
 
				+@@ -4202,6 +4208,19 @@ static void lru_gen_age_node(struct pgli
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(!current_is_kswapd());
			
 
				+ 
			
 
				++	sc->last_reclaimed = sc->nr_reclaimed;
			
 
				++
			
 
				++	/*
			
 
				++	 * To reduce the chance of going into the aging path, which can be
			
 
				++	 * costly, optimistically skip it if the flag below was cleared in the
			
 
				++	 * eviction path. This improves the overall performance when multiple
			
 
				++	 * memcgs are available.
			
 
				++	 */
			
 
				++	if (!sc->memcgs_need_aging) {
			
 
				++		sc->memcgs_need_aging = true;
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				+ 	set_mm_walk(pgdat);
			
 
				+ 
			
 
				+ 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
			
 
				+@@ -4613,7 +4632,8 @@ static int isolate_pages(struct lruvec *
			
 
				+ 	return scanned;
			
 
				+ }
			
 
				+ 
			
 
				+-static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
			
 
				++static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
			
 
				++		       bool *need_swapping)
			
 
				+ {
			
 
				+ 	int type;
			
 
				+ 	int scanned;
			
 
				+@@ -4676,6 +4696,9 @@ static int evict_pages(struct lruvec *lr
			
 
				+ 
			
 
				+ 	sc->nr_reclaimed += reclaimed;
			
 
				+ 
			
 
				++	if (need_swapping && type == LRU_GEN_ANON)
			
 
				++		*need_swapping = true;
			
 
				++
			
 
				+ 	return scanned;
			
 
				+ }
			
 
				+ 
			
 
				+@@ -4685,9 +4708,8 @@ static int evict_pages(struct lruvec *lr
			
 
				+  *    reclaim.
			
 
				+  */
			
 
				+ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
			
 
				+-				    bool can_swap)
			
 
				++				    bool can_swap, bool *need_aging)
			
 
				+ {
			
 
				+-	bool need_aging;
			
 
				+ 	unsigned long nr_to_scan;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 	DEFINE_MAX_SEQ(lruvec);
			
 
				+@@ -4697,8 +4719,8 @@ static unsigned long get_nr_to_scan(stru
			
 
				+ 	    (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
			
 
				+ 		return 0;
			
 
				+ 
			
 
				+-	need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
			
 
				+-	if (!need_aging)
			
 
				++	*need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
			
 
				++	if (!*need_aging)
			
 
				+ 		return nr_to_scan;
			
 
				+ 
			
 
				+ 	/* skip the aging path at the default priority */
			
 
				+@@ -4715,10 +4737,68 @@ done:
			
 
				+ 	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
			
 
				+ }
			
 
				+ 
			
 
				++static bool should_abort_scan(struct lruvec *lruvec, unsigned long seq,
			
 
				++			      struct scan_control *sc, bool need_swapping)
			
 
				++{
			
 
				++	int i;
			
 
				++	DEFINE_MAX_SEQ(lruvec);
			
 
				++
			
 
				++	if (!current_is_kswapd()) {
			
 
				++		/* age each memcg once to ensure fairness */
			
 
				++		if (max_seq - seq > 1)
			
 
				++			return true;
			
 
				++
			
 
				++		/* over-swapping can increase allocation latency */
			
 
				++		if (sc->nr_reclaimed >= sc->nr_to_reclaim && need_swapping)
			
 
				++			return true;
			
 
				++
			
 
				++		/* give this thread a chance to exit and free its memory */
			
 
				++		if (fatal_signal_pending(current)) {
			
 
				++			sc->nr_reclaimed += MIN_LRU_BATCH;
			
 
				++			return true;
			
 
				++		}
			
 
				++
			
 
				++		if (cgroup_reclaim(sc))
			
 
				++			return false;
			
 
				++	} else if (sc->nr_reclaimed - sc->last_reclaimed < sc->nr_to_reclaim)
			
 
				++		return false;
			
 
				++
			
 
				++	/* keep scanning at low priorities to ensure fairness */
			
 
				++	if (sc->priority > DEF_PRIORITY - 2)
			
 
				++		return false;
			
 
				++
			
 
				++	/*
			
 
				++	 * A minimum amount of work was done under global memory pressure. For
			
 
				++	 * kswapd, it may be overshooting. For direct reclaim, the target isn't
			
 
				++	 * met, and yet the allocation may still succeed, since kswapd may have
			
 
				++	 * caught up. In either case, it's better to stop now, and restart if
			
 
				++	 * necessary.
			
 
				++	 */
			
 
				++	for (i = 0; i <= sc->reclaim_idx; i++) {
			
 
				++		unsigned long wmark;
			
 
				++		struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
			
 
				++
			
 
				++		if (!managed_zone(zone))
			
 
				++			continue;
			
 
				++
			
 
				++		wmark = current_is_kswapd() ? high_wmark_pages(zone) : low_wmark_pages(zone);
			
 
				++		if (wmark > zone_page_state(zone, NR_FREE_PAGES))
			
 
				++			return false;
			
 
				++	}
			
 
				++
			
 
				++	sc->nr_reclaimed += MIN_LRU_BATCH;
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				+ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				+ {
			
 
				+ 	struct blk_plug plug;
			
 
				++	bool need_aging = false;
			
 
				++	bool need_swapping = false;
			
 
				+ 	unsigned long scanned = 0;
			
 
				++	unsigned long reclaimed = sc->nr_reclaimed;
			
 
				++	DEFINE_MAX_SEQ(lruvec);
			
 
				+ 
			
 
				+ 	lru_add_drain();
			
 
				+ 
			
 
				+@@ -4738,21 +4818,28 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 		else
			
 
				+ 			swappiness = 0;
			
 
				+ 
			
 
				+-		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
			
 
				++		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
			
 
				+ 		if (!nr_to_scan)
			
 
				+-			break;
			
 
				++			goto done;
			
 
				+ 
			
 
				+-		delta = evict_pages(lruvec, sc, swappiness);
			
 
				++		delta = evict_pages(lruvec, sc, swappiness, &need_swapping);
			
 
				+ 		if (!delta)
			
 
				+-			break;
			
 
				++			goto done;
			
 
				+ 
			
 
				+ 		scanned += delta;
			
 
				+ 		if (scanned >= nr_to_scan)
			
 
				+ 			break;
			
 
				+ 
			
 
				++		if (should_abort_scan(lruvec, max_seq, sc, need_swapping))
			
 
				++			break;
			
 
				++
			
 
				+ 		cond_resched();
			
 
				+ 	}
			
 
				+ 
			
 
				++	/* see the comment in lru_gen_age_node() */
			
 
				++	if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
			
 
				++		sc->memcgs_need_aging = false;
			
 
				++done:
			
 
				+ 	clear_mm_walk();
			
 
				+ 
			
 
				+ 	blk_finish_plug(&plug);
			
--- a/target/linux/generic/backport-6.1/020-v6.1-10-mm-multi-gen-LRU-kill-switch.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-10-mm-multi-gen-LRU-kill-switch.patch
@@ -0,0 +1,498 @@
 
				+From 640db3a029dca909af47157ca18f52b29d34a1b9 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:07 -0600
			
 
				+Subject: [PATCH 10/29] mm: multi-gen LRU: kill switch
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Add /sys/kernel/mm/lru_gen/enabled as a kill switch. Components that
			
 
				+can be disabled include:
			
 
				+  0x0001: the multi-gen LRU core
			
 
				+  0x0002: walking page table, when arch_has_hw_pte_young() returns
			
 
				+          true
			
 
				+  0x0004: clearing the accessed bit in non-leaf PMD entries, when
			
 
				+          CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y
			
 
				+  [yYnN]: apply to all the components above
			
 
				+E.g.,
			
 
				+  echo y >/sys/kernel/mm/lru_gen/enabled
			
 
				+  cat /sys/kernel/mm/lru_gen/enabled
			
 
				+  0x0007
			
 
				+  echo 5 >/sys/kernel/mm/lru_gen/enabled
			
 
				+  cat /sys/kernel/mm/lru_gen/enabled
			
 
				+  0x0005
			
 
				+
			
 
				+NB: the page table walks happen on the scale of seconds under heavy memory
			
 
				+pressure, in which case the mmap_lock contention is a lesser concern,
			
 
				+compared with the LRU lock contention and the I/O congestion.  So far the
			
 
				+only well-known case of the mmap_lock contention happens on Android, due
			
 
				+to Scudo [1] which allocates several thousand VMAs for merely a few
			
 
				+hundred MBs.  The SPF and the Maple Tree also have provided their own
			
 
				+assessments [2][3].  However, if walking page tables does worsen the
			
 
				+mmap_lock contention, the kill switch can be used to disable it.  In this
			
 
				+case the multi-gen LRU will suffer a minor performance degradation, as
			
 
				+shown previously.
			
 
				+
			
 
				+Clearing the accessed bit in non-leaf PMD entries can also be disabled,
			
 
				+since this behavior was not tested on x86 varieties other than Intel and
			
 
				+AMD.
			
 
				+
			
 
				+[1] https://source.android.com/devices/tech/debug/scudo
			
 
				+[2] https://lore.kernel.org/r/[email protected]/
			
 
				+[3] https://lore.kernel.org/r/[email protected]/
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Barry Song <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/cgroup.h          |  15 ++-
			
 
				+ include/linux/mm_inline.h       |  15 ++-
			
 
				+ include/linux/mmzone.h          |   9 ++
			
 
				+ kernel/cgroup/cgroup-internal.h |   1 -
			
 
				+ mm/Kconfig                      |   6 +
			
 
				+ mm/vmscan.c                     | 228 +++++++++++++++++++++++++++++++-
			
 
				+ 6 files changed, 265 insertions(+), 9 deletions(-)
			
 
				+
			
 
				+--- a/include/linux/cgroup.h
			
 
				++++ b/include/linux/cgroup.h
			
 
				+@@ -433,6 +433,18 @@ static inline void cgroup_put(struct cgr
			
 
				+ 	css_put(&cgrp->self);
			
 
				+ }
			
 
				+ 
			
 
				++extern struct mutex cgroup_mutex;
			
 
				++
			
 
				++static inline void cgroup_lock(void)
			
 
				++{
			
 
				++	mutex_lock(&cgroup_mutex);
			
 
				++}
			
 
				++
			
 
				++static inline void cgroup_unlock(void)
			
 
				++{
			
 
				++	mutex_unlock(&cgroup_mutex);
			
 
				++}
			
 
				++
			
 
				+ /**
			
 
				+  * task_css_set_check - obtain a task's css_set with extra access conditions
			
 
				+  * @task: the task to obtain css_set for
			
 
				+@@ -447,7 +459,6 @@ static inline void cgroup_put(struct cgr
			
 
				+  * as locks used during the cgroup_subsys::attach() methods.
			
 
				+  */
			
 
				+ #ifdef CONFIG_PROVE_RCU
			
 
				+-extern struct mutex cgroup_mutex;
			
 
				+ extern spinlock_t css_set_lock;
			
 
				+ #define task_css_set_check(task, __c)					\
			
 
				+ 	rcu_dereference_check((task)->cgroups,				\
			
 
				+@@ -708,6 +719,8 @@ struct cgroup;
			
 
				+ static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
			
 
				+ static inline void css_get(struct cgroup_subsys_state *css) {}
			
 
				+ static inline void css_put(struct cgroup_subsys_state *css) {}
			
 
				++static inline void cgroup_lock(void) {}
			
 
				++static inline void cgroup_unlock(void) {}
			
 
				+ static inline int cgroup_attach_task_all(struct task_struct *from,
			
 
				+ 					 struct task_struct *t) { return 0; }
			
 
				+ static inline int cgroupstats_build(struct cgroupstats *stats,
			
 
				+--- a/include/linux/mm_inline.h
			
 
				++++ b/include/linux/mm_inline.h
			
 
				+@@ -91,10 +91,21 @@ static __always_inline enum lru_list pag
			
 
				+ 
			
 
				+ #ifdef CONFIG_LRU_GEN
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN_ENABLED
			
 
				+ static inline bool lru_gen_enabled(void)
			
 
				+ {
			
 
				+-	return true;
			
 
				++	DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);
			
 
				++
			
 
				++	return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
			
 
				++}
			
 
				++#else
			
 
				++static inline bool lru_gen_enabled(void)
			
 
				++{
			
 
				++	DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);
			
 
				++
			
 
				++	return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
			
 
				+ }
			
 
				++#endif
			
 
				+ 
			
 
				+ static inline bool lru_gen_in_fault(void)
			
 
				+ {
			
 
				+@@ -207,7 +218,7 @@ static inline bool lru_gen_add_page(stru
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE_PAGE(gen != -1, page);
			
 
				+ 
			
 
				+-	if (PageUnevictable(page))
			
 
				++	if (PageUnevictable(page) || !lrugen->enabled)
			
 
				+ 		return false;
			
 
				+ 	/*
			
 
				+ 	 * There are three common cases for this page:
			
 
				+--- a/include/linux/mmzone.h
			
 
				++++ b/include/linux/mmzone.h
			
 
				+@@ -364,6 +364,13 @@ enum {
			
 
				+ 	LRU_GEN_FILE,
			
 
				+ };
			
 
				+ 
			
 
				++enum {
			
 
				++	LRU_GEN_CORE,
			
 
				++	LRU_GEN_MM_WALK,
			
 
				++	LRU_GEN_NONLEAF_YOUNG,
			
 
				++	NR_LRU_GEN_CAPS
			
 
				++};
			
 
				++
			
 
				+ #define MIN_LRU_BATCH		BITS_PER_LONG
			
 
				+ #define MAX_LRU_BATCH		(MIN_LRU_BATCH * 64)
			
 
				+ 
			
 
				+@@ -405,6 +412,8 @@ struct lru_gen_struct {
			
 
				+ 	/* can be modified without holding the LRU lock */
			
 
				+ 	atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
			
 
				+ 	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
			
 
				++	/* whether the multi-gen LRU is enabled */
			
 
				++	bool enabled;
			
 
				+ };
			
 
				+ 
			
 
				+ enum {
			
 
				+--- a/kernel/cgroup/cgroup-internal.h
			
 
				++++ b/kernel/cgroup/cgroup-internal.h
			
 
				+@@ -165,7 +165,6 @@ struct cgroup_mgctx {
			
 
				+ #define DEFINE_CGROUP_MGCTX(name)						\
			
 
				+ 	struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
			
 
				+ 
			
 
				+-extern struct mutex cgroup_mutex;
			
 
				+ extern spinlock_t css_set_lock;
			
 
				+ extern struct cgroup_subsys *cgroup_subsys[];
			
 
				+ extern struct list_head cgroup_roots;
			
 
				+--- a/mm/Kconfig
			
 
				++++ b/mm/Kconfig
			
 
				+@@ -906,6 +906,12 @@ config LRU_GEN
			
 
				+ 	help
			
 
				+ 	  A high performance LRU implementation to overcommit memory.
			
 
				+ 
			
 
				++config LRU_GEN_ENABLED
			
 
				++	bool "Enable by default"
			
 
				++	depends on LRU_GEN
			
 
				++	help
			
 
				++	  This option enables the multi-gen LRU by default.
			
 
				++
			
 
				+ config LRU_GEN_STATS
			
 
				+ 	bool "Full stats for debugging"
			
 
				+ 	depends on LRU_GEN
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -52,6 +52,7 @@
			
 
				+ #include <linux/psi.h>
			
 
				+ #include <linux/pagewalk.h>
			
 
				+ #include <linux/shmem_fs.h>
			
 
				++#include <linux/ctype.h>
			
 
				+ 
			
 
				+ #include <asm/tlbflush.h>
			
 
				+ #include <asm/div64.h>
			
 
				+@@ -2841,6 +2842,14 @@ static bool can_age_anon_pages(struct pg
			
 
				+ 
			
 
				+ #ifdef CONFIG_LRU_GEN
			
 
				+ 
			
 
				++#ifdef CONFIG_LRU_GEN_ENABLED
			
 
				++DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS);
			
 
				++#define get_cap(cap)	static_branch_likely(&lru_gen_caps[cap])
			
 
				++#else
			
 
				++DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
			
 
				++#define get_cap(cap)	static_branch_unlikely(&lru_gen_caps[cap])
			
 
				++#endif
			
 
				++
			
 
				+ /******************************************************************************
			
 
				+  *                          shorthand helpers
			
 
				+  ******************************************************************************/
			
 
				+@@ -3717,7 +3726,8 @@ static void walk_pmd_range_locked(pud_t
			
 
				+ 			goto next;
			
 
				+ 
			
 
				+ 		if (!pmd_trans_huge(pmd[i])) {
			
 
				+-			if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG))
			
 
				++			if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
			
 
				++			    get_cap(LRU_GEN_NONLEAF_YOUNG))
			
 
				+ 				pmdp_test_and_clear_young(vma, addr, pmd + i);
			
 
				+ 			goto next;
			
 
				+ 		}
			
 
				+@@ -3815,10 +3825,12 @@ restart:
			
 
				+ 		walk->mm_stats[MM_NONLEAF_TOTAL]++;
			
 
				+ 
			
 
				+ #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
			
 
				+-		if (!pmd_young(val))
			
 
				+-			continue;
			
 
				++		if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
			
 
				++			if (!pmd_young(val))
			
 
				++				continue;
			
 
				+ 
			
 
				+-		walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
			
 
				++			walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
			
 
				++		}
			
 
				+ #endif
			
 
				+ 		if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
			
 
				+ 			continue;
			
 
				+@@ -4080,7 +4092,7 @@ static bool try_to_inc_max_seq(struct lr
			
 
				+ 	 * handful of PTEs. Spreading the work out over a period of time usually
			
 
				+ 	 * is less efficient, but it avoids bursty page faults.
			
 
				+ 	 */
			
 
				+-	if (!arch_has_hw_pte_young()) {
			
 
				++	if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
			
 
				+ 		success = iterate_mm_list_nowalk(lruvec, max_seq);
			
 
				+ 		goto done;
			
 
				+ 	}
			
 
				+@@ -4846,6 +4858,208 @@ done:
			
 
				+ }
			
 
				+ 
			
 
				+ /******************************************************************************
			
 
				++ *                          state change
			
 
				++ ******************************************************************************/
			
 
				++
			
 
				++static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
			
 
				++{
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	if (lrugen->enabled) {
			
 
				++		enum lru_list lru;
			
 
				++
			
 
				++		for_each_evictable_lru(lru) {
			
 
				++			if (!list_empty(&lruvec->lists[lru]))
			
 
				++				return false;
			
 
				++		}
			
 
				++	} else {
			
 
				++		int gen, type, zone;
			
 
				++
			
 
				++		for_each_gen_type_zone(gen, type, zone) {
			
 
				++			if (!list_empty(&lrugen->lists[gen][type][zone]))
			
 
				++				return false;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				++static bool fill_evictable(struct lruvec *lruvec)
			
 
				++{
			
 
				++	enum lru_list lru;
			
 
				++	int remaining = MAX_LRU_BATCH;
			
 
				++
			
 
				++	for_each_evictable_lru(lru) {
			
 
				++		int type = is_file_lru(lru);
			
 
				++		bool active = is_active_lru(lru);
			
 
				++		struct list_head *head = &lruvec->lists[lru];
			
 
				++
			
 
				++		while (!list_empty(head)) {
			
 
				++			bool success;
			
 
				++			struct page *page = lru_to_page(head);
			
 
				++
			
 
				++			VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(PageActive(page) != active, page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(page_is_file_lru(page) != type, page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(page_lru_gen(page) != -1, page);
			
 
				++
			
 
				++			del_page_from_lru_list(page, lruvec);
			
 
				++			success = lru_gen_add_page(lruvec, page, false);
			
 
				++			VM_WARN_ON_ONCE(!success);
			
 
				++
			
 
				++			if (!--remaining)
			
 
				++				return false;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				++static bool drain_evictable(struct lruvec *lruvec)
			
 
				++{
			
 
				++	int gen, type, zone;
			
 
				++	int remaining = MAX_LRU_BATCH;
			
 
				++
			
 
				++	for_each_gen_type_zone(gen, type, zone) {
			
 
				++		struct list_head *head = &lruvec->lrugen.lists[gen][type][zone];
			
 
				++
			
 
				++		while (!list_empty(head)) {
			
 
				++			bool success;
			
 
				++			struct page *page = lru_to_page(head);
			
 
				++
			
 
				++			VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(PageActive(page), page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(page_is_file_lru(page) != type, page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page);
			
 
				++
			
 
				++			success = lru_gen_del_page(lruvec, page, false);
			
 
				++			VM_WARN_ON_ONCE(!success);
			
 
				++			add_page_to_lru_list(page, lruvec);
			
 
				++
			
 
				++			if (!--remaining)
			
 
				++				return false;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_change_state(bool enabled)
			
 
				++{
			
 
				++	static DEFINE_MUTEX(state_mutex);
			
 
				++
			
 
				++	struct mem_cgroup *memcg;
			
 
				++
			
 
				++	cgroup_lock();
			
 
				++	cpus_read_lock();
			
 
				++	get_online_mems();
			
 
				++	mutex_lock(&state_mutex);
			
 
				++
			
 
				++	if (enabled == lru_gen_enabled())
			
 
				++		goto unlock;
			
 
				++
			
 
				++	if (enabled)
			
 
				++		static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
			
 
				++	else
			
 
				++		static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
			
 
				++
			
 
				++	memcg = mem_cgroup_iter(NULL, NULL, NULL);
			
 
				++	do {
			
 
				++		int nid;
			
 
				++
			
 
				++		for_each_node(nid) {
			
 
				++			struct lruvec *lruvec = get_lruvec(memcg, nid);
			
 
				++
			
 
				++			if (!lruvec)
			
 
				++				continue;
			
 
				++
			
 
				++			spin_lock_irq(&lruvec->lru_lock);
			
 
				++
			
 
				++			VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
			
 
				++			VM_WARN_ON_ONCE(!state_is_valid(lruvec));
			
 
				++
			
 
				++			lruvec->lrugen.enabled = enabled;
			
 
				++
			
 
				++			while (!(enabled ? fill_evictable(lruvec) : drain_evictable(lruvec))) {
			
 
				++				spin_unlock_irq(&lruvec->lru_lock);
			
 
				++				cond_resched();
			
 
				++				spin_lock_irq(&lruvec->lru_lock);
			
 
				++			}
			
 
				++
			
 
				++			spin_unlock_irq(&lruvec->lru_lock);
			
 
				++		}
			
 
				++
			
 
				++		cond_resched();
			
 
				++	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
			
 
				++unlock:
			
 
				++	mutex_unlock(&state_mutex);
			
 
				++	put_online_mems();
			
 
				++	cpus_read_unlock();
			
 
				++	cgroup_unlock();
			
 
				++}
			
 
				++
			
 
				++/******************************************************************************
			
 
				++ *                          sysfs interface
			
 
				++ ******************************************************************************/
			
 
				++
			
 
				++static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
			
 
				++{
			
 
				++	unsigned int caps = 0;
			
 
				++
			
 
				++	if (get_cap(LRU_GEN_CORE))
			
 
				++		caps |= BIT(LRU_GEN_CORE);
			
 
				++
			
 
				++	if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
			
 
				++		caps |= BIT(LRU_GEN_MM_WALK);
			
 
				++
			
 
				++	if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
			
 
				++		caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
			
 
				++
			
 
				++	return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
			
 
				++}
			
 
				++
			
 
				++static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
			
 
				++			     const char *buf, size_t len)
			
 
				++{
			
 
				++	int i;
			
 
				++	unsigned int caps;
			
 
				++
			
 
				++	if (tolower(*buf) == 'n')
			
 
				++		caps = 0;
			
 
				++	else if (tolower(*buf) == 'y')
			
 
				++		caps = -1;
			
 
				++	else if (kstrtouint(buf, 0, &caps))
			
 
				++		return -EINVAL;
			
 
				++
			
 
				++	for (i = 0; i < NR_LRU_GEN_CAPS; i++) {
			
 
				++		bool enabled = caps & BIT(i);
			
 
				++
			
 
				++		if (i == LRU_GEN_CORE)
			
 
				++			lru_gen_change_state(enabled);
			
 
				++		else if (enabled)
			
 
				++			static_branch_enable(&lru_gen_caps[i]);
			
 
				++		else
			
 
				++			static_branch_disable(&lru_gen_caps[i]);
			
 
				++	}
			
 
				++
			
 
				++	return len;
			
 
				++}
			
 
				++
			
 
				++static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
			
 
				++	enabled, 0644, show_enabled, store_enabled
			
 
				++);
			
 
				++
			
 
				++static struct attribute *lru_gen_attrs[] = {
			
 
				++	&lru_gen_enabled_attr.attr,
			
 
				++	NULL
			
 
				++};
			
 
				++
			
 
				++static struct attribute_group lru_gen_attr_group = {
			
 
				++	.name = "lru_gen",
			
 
				++	.attrs = lru_gen_attrs,
			
 
				++};
			
 
				++
			
 
				++/******************************************************************************
			
 
				+  *                          initialization
			
 
				+  ******************************************************************************/
			
 
				+ 
			
 
				+@@ -4855,6 +5069,7 @@ void lru_gen_init_lruvec(struct lruvec *
			
 
				+ 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	lrugen->max_seq = MIN_NR_GENS + 1;
			
 
				++	lrugen->enabled = lru_gen_enabled();
			
 
				+ 
			
 
				+ 	for_each_gen_type_zone(gen, type, zone)
			
 
				+ 		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
			
 
				+@@ -4894,6 +5109,9 @@ static int __init init_lru_gen(void)
			
 
				+ 	BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
			
 
				+ 	BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
			
 
				+ 
			
 
				++	if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
			
 
				++		pr_err("lru_gen: failed to create sysfs group\n");
			
 
				++
			
 
				+ 	return 0;
			
 
				+ };
			
 
				+ late_initcall(init_lru_gen);
			
--- a/target/linux/generic/backport-6.1/020-v6.1-11-mm-multi-gen-LRU-thrashing-prevention.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-11-mm-multi-gen-LRU-thrashing-prevention.patch
@@ -0,0 +1,226 @@
 
				+From 73d1ff551760f0c79c47ab70faa4c2ca91413f5c Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:08 -0600
			
 
				+Subject: [PATCH 11/29] mm: multi-gen LRU: thrashing prevention
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Add /sys/kernel/mm/lru_gen/min_ttl_ms for thrashing prevention, as
			
 
				+requested by many desktop users [1].
			
 
				+
			
 
				+When set to value N, it prevents the working set of N milliseconds from
			
 
				+getting evicted.  The OOM killer is triggered if this working set cannot
			
 
				+be kept in memory.  Based on the average human detectable lag (~100ms),
			
 
				+N=1000 usually eliminates intolerable lags due to thrashing.  Larger
			
 
				+values like N=3000 make lags less noticeable at the risk of premature OOM
			
 
				+kills.
			
 
				+
			
 
				+Compared with the size-based approach [2], this time-based approach
			
 
				+has the following advantages:
			
 
				+
			
 
				+1. It is easier to configure because it is agnostic to applications
			
 
				+   and memory sizes.
			
 
				+2. It is more reliable because it is directly wired to the OOM killer.
			
 
				+
			
 
				+[1] https://lore.kernel.org/r/Ydza%[email protected]/
			
 
				+[2] https://lore.kernel.org/r/[email protected]/
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Barry Song <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Qi Zheng <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/mmzone.h |  2 ++
			
 
				+ mm/vmscan.c            | 74 ++++++++++++++++++++++++++++++++++++++++--
			
 
				+ 2 files changed, 73 insertions(+), 3 deletions(-)
			
 
				+
			
 
				+--- a/include/linux/mmzone.h
			
 
				++++ b/include/linux/mmzone.h
			
 
				+@@ -399,6 +399,8 @@ struct lru_gen_struct {
			
 
				+ 	unsigned long max_seq;
			
 
				+ 	/* the eviction increments the oldest generation numbers */
			
 
				+ 	unsigned long min_seq[ANON_AND_FILE];
			
 
				++	/* the birth time of each generation in jiffies */
			
 
				++	unsigned long timestamps[MAX_NR_GENS];
			
 
				+ 	/* the multi-gen LRU lists, lazily sorted on eviction */
			
 
				+ 	struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
			
 
				+ 	/* the multi-gen LRU sizes, eventually consistent */
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -4064,6 +4064,7 @@ static void inc_max_seq(struct lruvec *l
			
 
				+ 	for (type = 0; type < ANON_AND_FILE; type++)
			
 
				+ 		reset_ctrl_pos(lruvec, type, false);
			
 
				+ 
			
 
				++	WRITE_ONCE(lrugen->timestamps[next], jiffies);
			
 
				+ 	/* make sure preceding modifications appear */
			
 
				+ 	smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
			
 
				+ 
			
 
				+@@ -4193,7 +4194,7 @@ static bool should_run_aging(struct lruv
			
 
				+ 	return false;
			
 
				+ }
			
 
				+ 
			
 
				+-static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				++static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
			
 
				+ {
			
 
				+ 	bool need_aging;
			
 
				+ 	unsigned long nr_to_scan;
			
 
				+@@ -4207,16 +4208,36 @@ static void age_lruvec(struct lruvec *lr
			
 
				+ 	mem_cgroup_calculate_protection(NULL, memcg);
			
 
				+ 
			
 
				+ 	if (mem_cgroup_below_min(memcg))
			
 
				+-		return;
			
 
				++		return false;
			
 
				+ 
			
 
				+ 	need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
			
 
				++
			
 
				++	if (min_ttl) {
			
 
				++		int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
			
 
				++		unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
			
 
				++
			
 
				++		if (time_is_after_jiffies(birth + min_ttl))
			
 
				++			return false;
			
 
				++
			
 
				++		/* the size is likely too small to be helpful */
			
 
				++		if (!nr_to_scan && sc->priority != DEF_PRIORITY)
			
 
				++			return false;
			
 
				++	}
			
 
				++
			
 
				+ 	if (need_aging)
			
 
				+ 		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
			
 
				++
			
 
				++	return true;
			
 
				+ }
			
 
				+ 
			
 
				++/* to protect the working set of the last N jiffies */
			
 
				++static unsigned long lru_gen_min_ttl __read_mostly;
			
 
				++
			
 
				+ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				+ {
			
 
				+ 	struct mem_cgroup *memcg;
			
 
				++	bool success = false;
			
 
				++	unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(!current_is_kswapd());
			
 
				+ 
			
 
				+@@ -4239,12 +4260,32 @@ static void lru_gen_age_node(struct pgli
			
 
				+ 	do {
			
 
				+ 		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
			
 
				+ 
			
 
				+-		age_lruvec(lruvec, sc);
			
 
				++		if (age_lruvec(lruvec, sc, min_ttl))
			
 
				++			success = true;
			
 
				+ 
			
 
				+ 		cond_resched();
			
 
				+ 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
			
 
				+ 
			
 
				+ 	clear_mm_walk();
			
 
				++
			
 
				++	/* check the order to exclude compaction-induced reclaim */
			
 
				++	if (success || !min_ttl || sc->order)
			
 
				++		return;
			
 
				++
			
 
				++	/*
			
 
				++	 * The main goal is to OOM kill if every generation from all memcgs is
			
 
				++	 * younger than min_ttl. However, another possibility is all memcgs are
			
 
				++	 * either below min or empty.
			
 
				++	 */
			
 
				++	if (mutex_trylock(&oom_lock)) {
			
 
				++		struct oom_control oc = {
			
 
				++			.gfp_mask = sc->gfp_mask,
			
 
				++		};
			
 
				++
			
 
				++		out_of_memory(&oc);
			
 
				++
			
 
				++		mutex_unlock(&oom_lock);
			
 
				++	}
			
 
				+ }
			
 
				+ 
			
 
				+ /*
			
 
				+@@ -5002,6 +5043,28 @@ unlock:
			
 
				+  *                          sysfs interface
			
 
				+  ******************************************************************************/
			
 
				+ 
			
 
				++static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
			
 
				++{
			
 
				++	return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
			
 
				++}
			
 
				++
			
 
				++static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
			
 
				++			     const char *buf, size_t len)
			
 
				++{
			
 
				++	unsigned int msecs;
			
 
				++
			
 
				++	if (kstrtouint(buf, 0, &msecs))
			
 
				++		return -EINVAL;
			
 
				++
			
 
				++	WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs));
			
 
				++
			
 
				++	return len;
			
 
				++}
			
 
				++
			
 
				++static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR(
			
 
				++	min_ttl_ms, 0644, show_min_ttl, store_min_ttl
			
 
				++);
			
 
				++
			
 
				+ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
			
 
				+ {
			
 
				+ 	unsigned int caps = 0;
			
 
				+@@ -5050,6 +5113,7 @@ static struct kobj_attribute lru_gen_ena
			
 
				+ );
			
 
				+ 
			
 
				+ static struct attribute *lru_gen_attrs[] = {
			
 
				++	&lru_gen_min_ttl_attr.attr,
			
 
				+ 	&lru_gen_enabled_attr.attr,
			
 
				+ 	NULL
			
 
				+ };
			
 
				+@@ -5065,12 +5129,16 @@ static struct attribute_group lru_gen_at
			
 
				+ 
			
 
				+ void lru_gen_init_lruvec(struct lruvec *lruvec)
			
 
				+ {
			
 
				++	int i;
			
 
				+ 	int gen, type, zone;
			
 
				+ 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	lrugen->max_seq = MIN_NR_GENS + 1;
			
 
				+ 	lrugen->enabled = lru_gen_enabled();
			
 
				+ 
			
 
				++	for (i = 0; i <= MIN_NR_GENS + 1; i++)
			
 
				++		lrugen->timestamps[i] = jiffies;
			
 
				++
			
 
				+ 	for_each_gen_type_zone(gen, type, zone)
			
 
				+ 		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/020-v6.1-12-mm-multi-gen-LRU-debugfs-interface.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-12-mm-multi-gen-LRU-debugfs-interface.patch
@@ -0,0 +1,579 @@
 
				+From 530716d008ca26315f246cd70dc1cefc636beaa4 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 18 Sep 2022 02:00:09 -0600
			
 
				+Subject: [PATCH 12/29] mm: multi-gen LRU: debugfs interface
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Add /sys/kernel/debug/lru_gen for working set estimation and proactive
			
 
				+reclaim.  These techniques are commonly used to optimize job scheduling
			
 
				+(bin packing) in data centers [1][2].
			
 
				+
			
 
				+Compared with the page table-based approach and the PFN-based
			
 
				+approach, this lruvec-based approach has the following advantages:
			
 
				+1. It offers better choices because it is aware of memcgs, NUMA nodes,
			
 
				+   shared mappings and unmapped page cache.
			
 
				+2. It is more scalable because it is O(nr_hot_pages), whereas the
			
 
				+   PFN-based approach is O(nr_total_pages).
			
 
				+
			
 
				+Add /sys/kernel/debug/lru_gen_full for debugging.
			
 
				+
			
 
				+[1] https://dl.acm.org/doi/10.1145/3297858.3304053
			
 
				+[2] https://dl.acm.org/doi/10.1145/3503222.3507731
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Reviewed-by: Qi Zheng <[email protected]>
			
 
				+Acked-by: Brian Geffon <[email protected]>
			
 
				+Acked-by: Jan Alexander Steffens (heftig) <[email protected]>
			
 
				+Acked-by: Oleksandr Natalenko <[email protected]>
			
 
				+Acked-by: Steven Barrett <[email protected]>
			
 
				+Acked-by: Suleiman Souhlal <[email protected]>
			
 
				+Tested-by: Daniel Byrne <[email protected]>
			
 
				+Tested-by: Donald Carr <[email protected]>
			
 
				+Tested-by: Holger Hoffstätte <[email protected]>
			
 
				+Tested-by: Konstantin Kharlamov <[email protected]>
			
 
				+Tested-by: Shuang Zhai <[email protected]>
			
 
				+Tested-by: Sofia Trinh <[email protected]>
			
 
				+Tested-by: Vaibhav Jain <[email protected]>
			
 
				+Cc: Andi Kleen <[email protected]>
			
 
				+Cc: Aneesh Kumar K.V <[email protected]>
			
 
				+Cc: Barry Song <[email protected]>
			
 
				+Cc: Catalin Marinas <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Hillf Danton <[email protected]>
			
 
				+Cc: Jens Axboe <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Linus Torvalds <[email protected]>
			
 
				+Cc: Matthew Wilcox <[email protected]>
			
 
				+Cc: Mel Gorman <[email protected]>
			
 
				+Cc: Miaohe Lin <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Peter Zijlstra <[email protected]>
			
 
				+Cc: Tejun Heo <[email protected]>
			
 
				+Cc: Vlastimil Babka <[email protected]>
			
 
				+Cc: Will Deacon <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/nodemask.h |   1 +
			
 
				+ mm/vmscan.c              | 411 ++++++++++++++++++++++++++++++++++++++-
			
 
				+ 2 files changed, 402 insertions(+), 10 deletions(-)
			
 
				+
			
 
				+--- a/include/linux/nodemask.h
			
 
				++++ b/include/linux/nodemask.h
			
 
				+@@ -485,6 +485,7 @@ static inline int num_node_state(enum no
			
 
				+ #define first_online_node	0
			
 
				+ #define first_memory_node	0
			
 
				+ #define next_online_node(nid)	(MAX_NUMNODES)
			
 
				++#define next_memory_node(nid)	(MAX_NUMNODES)
			
 
				+ #define nr_node_ids		1U
			
 
				+ #define nr_online_nodes		1U
			
 
				+ 
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -53,6 +53,7 @@
			
 
				+ #include <linux/pagewalk.h>
			
 
				+ #include <linux/shmem_fs.h>
			
 
				+ #include <linux/ctype.h>
			
 
				++#include <linux/debugfs.h>
			
 
				+ 
			
 
				+ #include <asm/tlbflush.h>
			
 
				+ #include <asm/div64.h>
			
 
				+@@ -3968,12 +3969,40 @@ static void clear_mm_walk(void)
			
 
				+ 		kfree(walk);
			
 
				+ }
			
 
				+ 
			
 
				+-static void inc_min_seq(struct lruvec *lruvec, int type)
			
 
				++static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
			
 
				+ {
			
 
				++	int zone;
			
 
				++	int remaining = MAX_LRU_BATCH;
			
 
				+ 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
			
 
				++
			
 
				++	if (type == LRU_GEN_ANON && !can_swap)
			
 
				++		goto done;
			
 
				++
			
 
				++	/* prevent cold/hot inversion if force_scan is true */
			
 
				++	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
			
 
				++		struct list_head *head = &lrugen->lists[old_gen][type][zone];
			
 
				++
			
 
				++		while (!list_empty(head)) {
			
 
				++			struct page *page = lru_to_page(head);
			
 
				++
			
 
				++			VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(PageActive(page), page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(page_is_file_lru(page) != type, page);
			
 
				++			VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page);
			
 
				+ 
			
 
				++			new_gen = page_inc_gen(lruvec, page, false);
			
 
				++			list_move_tail(&page->lru, &lrugen->lists[new_gen][type][zone]);
			
 
				++
			
 
				++			if (!--remaining)
			
 
				++				return false;
			
 
				++		}
			
 
				++	}
			
 
				++done:
			
 
				+ 	reset_ctrl_pos(lruvec, type, true);
			
 
				+ 	WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
			
 
				++
			
 
				++	return true;
			
 
				+ }
			
 
				+ 
			
 
				+ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
			
 
				+@@ -4019,7 +4048,7 @@ next:
			
 
				+ 	return success;
			
 
				+ }
			
 
				+ 
			
 
				+-static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
			
 
				++static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
			
 
				+ {
			
 
				+ 	int prev, next;
			
 
				+ 	int type, zone;
			
 
				+@@ -4033,9 +4062,13 @@ static void inc_max_seq(struct lruvec *l
			
 
				+ 		if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
			
 
				+ 			continue;
			
 
				+ 
			
 
				+-		VM_WARN_ON_ONCE(type == LRU_GEN_FILE || can_swap);
			
 
				++		VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
			
 
				+ 
			
 
				+-		inc_min_seq(lruvec, type);
			
 
				++		while (!inc_min_seq(lruvec, type, can_swap)) {
			
 
				++			spin_unlock_irq(&lruvec->lru_lock);
			
 
				++			cond_resched();
			
 
				++			spin_lock_irq(&lruvec->lru_lock);
			
 
				++		}
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	/*
			
 
				+@@ -4072,7 +4105,7 @@ static void inc_max_seq(struct lruvec *l
			
 
				+ }
			
 
				+ 
			
 
				+ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
			
 
				+-			       struct scan_control *sc, bool can_swap)
			
 
				++			       struct scan_control *sc, bool can_swap, bool force_scan)
			
 
				+ {
			
 
				+ 	bool success;
			
 
				+ 	struct lru_gen_mm_walk *walk;
			
 
				+@@ -4093,7 +4126,7 @@ static bool try_to_inc_max_seq(struct lr
			
 
				+ 	 * handful of PTEs. Spreading the work out over a period of time usually
			
 
				+ 	 * is less efficient, but it avoids bursty page faults.
			
 
				+ 	 */
			
 
				+-	if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
			
 
				++	if (!force_scan && !(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
			
 
				+ 		success = iterate_mm_list_nowalk(lruvec, max_seq);
			
 
				+ 		goto done;
			
 
				+ 	}
			
 
				+@@ -4107,7 +4140,7 @@ static bool try_to_inc_max_seq(struct lr
			
 
				+ 	walk->lruvec = lruvec;
			
 
				+ 	walk->max_seq = max_seq;
			
 
				+ 	walk->can_swap = can_swap;
			
 
				+-	walk->force_scan = false;
			
 
				++	walk->force_scan = force_scan;
			
 
				+ 
			
 
				+ 	do {
			
 
				+ 		success = iterate_mm_list(lruvec, walk, &mm);
			
 
				+@@ -4127,7 +4160,7 @@ done:
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
			
 
				+ 
			
 
				+-	inc_max_seq(lruvec, can_swap);
			
 
				++	inc_max_seq(lruvec, can_swap, force_scan);
			
 
				+ 	/* either this sees any waiters or they will see updated max_seq */
			
 
				+ 	if (wq_has_sleeper(&lruvec->mm_state.wait))
			
 
				+ 		wake_up_all(&lruvec->mm_state.wait);
			
 
				+@@ -4225,7 +4258,7 @@ static bool age_lruvec(struct lruvec *lr
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	if (need_aging)
			
 
				+-		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
			
 
				++		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
			
 
				+ 
			
 
				+ 	return true;
			
 
				+ }
			
 
				+@@ -4784,7 +4817,7 @@ static unsigned long get_nr_to_scan(stru
			
 
				+ 	if (current_is_kswapd())
			
 
				+ 		return 0;
			
 
				+ 
			
 
				+-	if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap))
			
 
				++	if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
			
 
				+ 		return nr_to_scan;
			
 
				+ done:
			
 
				+ 	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
			
 
				+@@ -5124,6 +5157,361 @@ static struct attribute_group lru_gen_at
			
 
				+ };
			
 
				+ 
			
 
				+ /******************************************************************************
			
 
				++ *                          debugfs interface
			
 
				++ ******************************************************************************/
			
 
				++
			
 
				++static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
			
 
				++{
			
 
				++	struct mem_cgroup *memcg;
			
 
				++	loff_t nr_to_skip = *pos;
			
 
				++
			
 
				++	m->private = kvmalloc(PATH_MAX, GFP_KERNEL);
			
 
				++	if (!m->private)
			
 
				++		return ERR_PTR(-ENOMEM);
			
 
				++
			
 
				++	memcg = mem_cgroup_iter(NULL, NULL, NULL);
			
 
				++	do {
			
 
				++		int nid;
			
 
				++
			
 
				++		for_each_node_state(nid, N_MEMORY) {
			
 
				++			if (!nr_to_skip--)
			
 
				++				return get_lruvec(memcg, nid);
			
 
				++		}
			
 
				++	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
			
 
				++
			
 
				++	return NULL;
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_seq_stop(struct seq_file *m, void *v)
			
 
				++{
			
 
				++	if (!IS_ERR_OR_NULL(v))
			
 
				++		mem_cgroup_iter_break(NULL, lruvec_memcg(v));
			
 
				++
			
 
				++	kvfree(m->private);
			
 
				++	m->private = NULL;
			
 
				++}
			
 
				++
			
 
				++static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
			
 
				++{
			
 
				++	int nid = lruvec_pgdat(v)->node_id;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(v);
			
 
				++
			
 
				++	++*pos;
			
 
				++
			
 
				++	nid = next_memory_node(nid);
			
 
				++	if (nid == MAX_NUMNODES) {
			
 
				++		memcg = mem_cgroup_iter(NULL, memcg, NULL);
			
 
				++		if (!memcg)
			
 
				++			return NULL;
			
 
				++
			
 
				++		nid = first_memory_node;
			
 
				++	}
			
 
				++
			
 
				++	return get_lruvec(memcg, nid);
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
			
 
				++				  unsigned long max_seq, unsigned long *min_seq,
			
 
				++				  unsigned long seq)
			
 
				++{
			
 
				++	int i;
			
 
				++	int type, tier;
			
 
				++	int hist = lru_hist_from_seq(seq);
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++
			
 
				++	for (tier = 0; tier < MAX_NR_TIERS; tier++) {
			
 
				++		seq_printf(m, "            %10d", tier);
			
 
				++		for (type = 0; type < ANON_AND_FILE; type++) {
			
 
				++			const char *s = "   ";
			
 
				++			unsigned long n[3] = {};
			
 
				++
			
 
				++			if (seq == max_seq) {
			
 
				++				s = "RT ";
			
 
				++				n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
			
 
				++				n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
			
 
				++			} else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
			
 
				++				s = "rep";
			
 
				++				n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
			
 
				++				n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
			
 
				++				if (tier)
			
 
				++					n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
			
 
				++			}
			
 
				++
			
 
				++			for (i = 0; i < 3; i++)
			
 
				++				seq_printf(m, " %10lu%c", n[i], s[i]);
			
 
				++		}
			
 
				++		seq_putc(m, '\n');
			
 
				++	}
			
 
				++
			
 
				++	seq_puts(m, "                      ");
			
 
				++	for (i = 0; i < NR_MM_STATS; i++) {
			
 
				++		const char *s = "      ";
			
 
				++		unsigned long n = 0;
			
 
				++
			
 
				++		if (seq == max_seq && NR_HIST_GENS == 1) {
			
 
				++			s = "LOYNFA";
			
 
				++			n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
			
 
				++		} else if (seq != max_seq && NR_HIST_GENS > 1) {
			
 
				++			s = "loynfa";
			
 
				++			n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
			
 
				++		}
			
 
				++
			
 
				++		seq_printf(m, " %10lu%c", n, s[i]);
			
 
				++	}
			
 
				++	seq_putc(m, '\n');
			
 
				++}
			
 
				++
			
 
				++static int lru_gen_seq_show(struct seq_file *m, void *v)
			
 
				++{
			
 
				++	unsigned long seq;
			
 
				++	bool full = !debugfs_real_fops(m->file)->write;
			
 
				++	struct lruvec *lruvec = v;
			
 
				++	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	int nid = lruvec_pgdat(lruvec)->node_id;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	DEFINE_MAX_SEQ(lruvec);
			
 
				++	DEFINE_MIN_SEQ(lruvec);
			
 
				++
			
 
				++	if (nid == first_memory_node) {
			
 
				++		const char *path = memcg ? m->private : "";
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++		if (memcg)
			
 
				++			cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
			
 
				++#endif
			
 
				++		seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path);
			
 
				++	}
			
 
				++
			
 
				++	seq_printf(m, " node %5d\n", nid);
			
 
				++
			
 
				++	if (!full)
			
 
				++		seq = min_seq[LRU_GEN_ANON];
			
 
				++	else if (max_seq >= MAX_NR_GENS)
			
 
				++		seq = max_seq - MAX_NR_GENS + 1;
			
 
				++	else
			
 
				++		seq = 0;
			
 
				++
			
 
				++	for (; seq <= max_seq; seq++) {
			
 
				++		int type, zone;
			
 
				++		int gen = lru_gen_from_seq(seq);
			
 
				++		unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
			
 
				++
			
 
				++		seq_printf(m, " %10lu %10u", seq, jiffies_to_msecs(jiffies - birth));
			
 
				++
			
 
				++		for (type = 0; type < ANON_AND_FILE; type++) {
			
 
				++			unsigned long size = 0;
			
 
				++			char mark = full && seq < min_seq[type] ? 'x' : ' ';
			
 
				++
			
 
				++			for (zone = 0; zone < MAX_NR_ZONES; zone++)
			
 
				++				size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
			
 
				++
			
 
				++			seq_printf(m, " %10lu%c", size, mark);
			
 
				++		}
			
 
				++
			
 
				++		seq_putc(m, '\n');
			
 
				++
			
 
				++		if (full)
			
 
				++			lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
			
 
				++	}
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static const struct seq_operations lru_gen_seq_ops = {
			
 
				++	.start = lru_gen_seq_start,
			
 
				++	.stop = lru_gen_seq_stop,
			
 
				++	.next = lru_gen_seq_next,
			
 
				++	.show = lru_gen_seq_show,
			
 
				++};
			
 
				++
			
 
				++static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
			
 
				++		     bool can_swap, bool force_scan)
			
 
				++{
			
 
				++	DEFINE_MAX_SEQ(lruvec);
			
 
				++	DEFINE_MIN_SEQ(lruvec);
			
 
				++
			
 
				++	if (seq < max_seq)
			
 
				++		return 0;
			
 
				++
			
 
				++	if (seq > max_seq)
			
 
				++		return -EINVAL;
			
 
				++
			
 
				++	if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
			
 
				++		return -ERANGE;
			
 
				++
			
 
				++	try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
			
 
				++			int swappiness, unsigned long nr_to_reclaim)
			
 
				++{
			
 
				++	DEFINE_MAX_SEQ(lruvec);
			
 
				++
			
 
				++	if (seq + MIN_NR_GENS > max_seq)
			
 
				++		return -EINVAL;
			
 
				++
			
 
				++	sc->nr_reclaimed = 0;
			
 
				++
			
 
				++	while (!signal_pending(current)) {
			
 
				++		DEFINE_MIN_SEQ(lruvec);
			
 
				++
			
 
				++		if (seq < min_seq[!swappiness])
			
 
				++			return 0;
			
 
				++
			
 
				++		if (sc->nr_reclaimed >= nr_to_reclaim)
			
 
				++			return 0;
			
 
				++
			
 
				++		if (!evict_pages(lruvec, sc, swappiness, NULL))
			
 
				++			return 0;
			
 
				++
			
 
				++		cond_resched();
			
 
				++	}
			
 
				++
			
 
				++	return -EINTR;
			
 
				++}
			
 
				++
			
 
				++static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
			
 
				++		   struct scan_control *sc, int swappiness, unsigned long opt)
			
 
				++{
			
 
				++	struct lruvec *lruvec;
			
 
				++	int err = -EINVAL;
			
 
				++	struct mem_cgroup *memcg = NULL;
			
 
				++
			
 
				++	if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
			
 
				++		return -EINVAL;
			
 
				++
			
 
				++	if (!mem_cgroup_disabled()) {
			
 
				++		rcu_read_lock();
			
 
				++		memcg = mem_cgroup_from_id(memcg_id);
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++		if (memcg && !css_tryget(&memcg->css))
			
 
				++			memcg = NULL;
			
 
				++#endif
			
 
				++		rcu_read_unlock();
			
 
				++
			
 
				++		if (!memcg)
			
 
				++			return -EINVAL;
			
 
				++	}
			
 
				++
			
 
				++	if (memcg_id != mem_cgroup_id(memcg))
			
 
				++		goto done;
			
 
				++
			
 
				++	lruvec = get_lruvec(memcg, nid);
			
 
				++
			
 
				++	if (swappiness < 0)
			
 
				++		swappiness = get_swappiness(lruvec, sc);
			
 
				++	else if (swappiness > 200)
			
 
				++		goto done;
			
 
				++
			
 
				++	switch (cmd) {
			
 
				++	case '+':
			
 
				++		err = run_aging(lruvec, seq, sc, swappiness, opt);
			
 
				++		break;
			
 
				++	case '-':
			
 
				++		err = run_eviction(lruvec, seq, sc, swappiness, opt);
			
 
				++		break;
			
 
				++	}
			
 
				++done:
			
 
				++	mem_cgroup_put(memcg);
			
 
				++
			
 
				++	return err;
			
 
				++}
			
 
				++
			
 
				++static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
			
 
				++				 size_t len, loff_t *pos)
			
 
				++{
			
 
				++	void *buf;
			
 
				++	char *cur, *next;
			
 
				++	unsigned int flags;
			
 
				++	struct blk_plug plug;
			
 
				++	int err = -EINVAL;
			
 
				++	struct scan_control sc = {
			
 
				++		.may_writepage = true,
			
 
				++		.may_unmap = true,
			
 
				++		.may_swap = true,
			
 
				++		.reclaim_idx = MAX_NR_ZONES - 1,
			
 
				++		.gfp_mask = GFP_KERNEL,
			
 
				++	};
			
 
				++
			
 
				++	buf = kvmalloc(len + 1, GFP_KERNEL);
			
 
				++	if (!buf)
			
 
				++		return -ENOMEM;
			
 
				++
			
 
				++	if (copy_from_user(buf, src, len)) {
			
 
				++		kvfree(buf);
			
 
				++		return -EFAULT;
			
 
				++	}
			
 
				++
			
 
				++	set_task_reclaim_state(current, &sc.reclaim_state);
			
 
				++	flags = memalloc_noreclaim_save();
			
 
				++	blk_start_plug(&plug);
			
 
				++	if (!set_mm_walk(NULL)) {
			
 
				++		err = -ENOMEM;
			
 
				++		goto done;
			
 
				++	}
			
 
				++
			
 
				++	next = buf;
			
 
				++	next[len] = '\0';
			
 
				++
			
 
				++	while ((cur = strsep(&next, ",;\n"))) {
			
 
				++		int n;
			
 
				++		int end;
			
 
				++		char cmd;
			
 
				++		unsigned int memcg_id;
			
 
				++		unsigned int nid;
			
 
				++		unsigned long seq;
			
 
				++		unsigned int swappiness = -1;
			
 
				++		unsigned long opt = -1;
			
 
				++
			
 
				++		cur = skip_spaces(cur);
			
 
				++		if (!*cur)
			
 
				++			continue;
			
 
				++
			
 
				++		n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
			
 
				++			   &seq, &end, &swappiness, &end, &opt, &end);
			
 
				++		if (n < 4 || cur[end]) {
			
 
				++			err = -EINVAL;
			
 
				++			break;
			
 
				++		}
			
 
				++
			
 
				++		err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt);
			
 
				++		if (err)
			
 
				++			break;
			
 
				++	}
			
 
				++done:
			
 
				++	clear_mm_walk();
			
 
				++	blk_finish_plug(&plug);
			
 
				++	memalloc_noreclaim_restore(flags);
			
 
				++	set_task_reclaim_state(current, NULL);
			
 
				++
			
 
				++	kvfree(buf);
			
 
				++
			
 
				++	return err ? : len;
			
 
				++}
			
 
				++
			
 
				++static int lru_gen_seq_open(struct inode *inode, struct file *file)
			
 
				++{
			
 
				++	return seq_open(file, &lru_gen_seq_ops);
			
 
				++}
			
 
				++
			
 
				++static const struct file_operations lru_gen_rw_fops = {
			
 
				++	.open = lru_gen_seq_open,
			
 
				++	.read = seq_read,
			
 
				++	.write = lru_gen_seq_write,
			
 
				++	.llseek = seq_lseek,
			
 
				++	.release = seq_release,
			
 
				++};
			
 
				++
			
 
				++static const struct file_operations lru_gen_ro_fops = {
			
 
				++	.open = lru_gen_seq_open,
			
 
				++	.read = seq_read,
			
 
				++	.llseek = seq_lseek,
			
 
				++	.release = seq_release,
			
 
				++};
			
 
				++
			
 
				++/******************************************************************************
			
 
				+  *                          initialization
			
 
				+  ******************************************************************************/
			
 
				+ 
			
 
				+@@ -5180,6 +5568,9 @@ static int __init init_lru_gen(void)
			
 
				+ 	if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
			
 
				+ 		pr_err("lru_gen: failed to create sysfs group\n");
			
 
				+ 
			
 
				++	debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
			
 
				++	debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
			
 
				++
			
 
				+ 	return 0;
			
 
				+ };
			
 
				+ late_initcall(init_lru_gen);
			
--- a/target/linux/generic/backport-6.1/020-v6.1-13-mm-mglru-don-t-sync-disk-for-each-aging-cycle.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-13-mm-mglru-don-t-sync-disk-for-each-aging-cycle.patch
@@ -0,0 +1,32 @@
 
				+From 92d430e8955c976eacb7cc91d7ff849c0dd009af Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Wed, 28 Sep 2022 13:36:58 -0600
			
 
				+Subject: [PATCH 13/29] mm/mglru: don't sync disk for each aging cycle
			
 
				+
			
 
				+wakeup_flusher_threads() was added under the assumption that if a system
			
 
				+runs out of clean cold pages, it might want to write back dirty pages more
			
 
				+aggressively so that they can become clean and be dropped.
			
 
				+
			
 
				+However, doing so can breach the rate limit a system wants to impose on
			
 
				+writeback, resulting in early SSD wearout.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Fixes: bd74fdaea146 ("mm: multi-gen LRU: support page table walks")
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Reported-by: Axel Rasmussen <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 2 --
			
 
				+ 1 file changed, 2 deletions(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -4165,8 +4165,6 @@ done:
			
 
				+ 	if (wq_has_sleeper(&lruvec->mm_state.wait))
			
 
				+ 		wake_up_all(&lruvec->mm_state.wait);
			
 
				+ 
			
 
				+-	wakeup_flusher_threads(WB_REASON_VMSCAN);
			
 
				+-
			
 
				+ 	return true;
			
 
				+ }
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/020-v6.1-14-mm-multi-gen-LRU-retry-pages-written-back-while-isol.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-14-mm-multi-gen-LRU-retry-pages-written-back-while-isol.patch
@@ -0,0 +1,124 @@
 
				+From 6f315879ad750391a0b1fab8c9170bc054a5f5d7 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Tue, 15 Nov 2022 18:38:07 -0700
			
 
				+Subject: [PATCH 14/29] mm: multi-gen LRU: retry pages written back while
			
 
				+ isolated
			
 
				+
			
 
				+The page reclaim isolates a batch of pages from the tail of one of the
			
 
				+LRU lists and works on those pages one by one.  For a suitable
			
 
				+swap-backed page, if the swap device is async, it queues that page for
			
 
				+writeback.  After the page reclaim finishes an entire batch, it puts back
			
 
				+the pages it queued for writeback to the head of the original LRU list.
			
 
				+
			
 
				+In the meantime, the page writeback flushes the queued pages also by
			
 
				+batches.  Its batching logic is independent from that of the page reclaim.
			
 
				+For each of the pages it writes back, the page writeback calls
			
 
				+rotate_reclaimable_page() which tries to rotate a page to the tail.
			
 
				+
			
 
				+rotate_reclaimable_page() only works for a page after the page reclaim
			
 
				+has put it back.  If an async swap device is fast enough, the page
			
 
				+writeback can finish with that page while the page reclaim is still
			
 
				+working on the rest of the batch containing it.  In this case, that page
			
 
				+will remain at the head and the page reclaim will not retry it before
			
 
				+reaching there.
			
 
				+
			
 
				+This patch adds a retry to evict_pages().  After evict_pages() has
			
 
				+finished an entire batch and before it puts back pages it cannot free
			
 
				+immediately, it retries those that may have missed the rotation.
			
 
				+
			
 
				+Before this patch, ~60% of pages swapped to an Intel Optane missed
			
 
				+rotate_reclaimable_page().  After this patch, ~99% of missed pages were
			
 
				+reclaimed upon retry.
			
 
				+
			
 
				+This problem affects relatively slow async swap devices like Samsung 980
			
 
				+Pro much less and does not affect sync swap devices like zram or zswap at
			
 
				+all.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Fixes: ac35a4902374 ("mm: multi-gen LRU: minimal implementation")
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: "Yin, Fengwei" <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 48 +++++++++++++++++++++++++++++++++++++-----------
			
 
				+ 1 file changed, 37 insertions(+), 11 deletions(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -4723,10 +4723,13 @@ static int evict_pages(struct lruvec *lr
			
 
				+ 	int scanned;
			
 
				+ 	int reclaimed;
			
 
				+ 	LIST_HEAD(list);
			
 
				++	LIST_HEAD(clean);
			
 
				+ 	struct page *page;
			
 
				++	struct page *next;
			
 
				+ 	enum vm_event_item item;
			
 
				+ 	struct reclaim_stat stat;
			
 
				+ 	struct lru_gen_mm_walk *walk;
			
 
				++	bool skip_retry = false;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
			
 
				+ 
			
 
				+@@ -4743,20 +4746,37 @@ static int evict_pages(struct lruvec *lr
			
 
				+ 
			
 
				+ 	if (list_empty(&list))
			
 
				+ 		return scanned;
			
 
				+-
			
 
				++retry:
			
 
				+ 	reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
			
 
				++	sc->nr_reclaimed += reclaimed;
			
 
				+ 
			
 
				+-	list_for_each_entry(page, &list, lru) {
			
 
				+-		/* restore LRU_REFS_FLAGS cleared by isolate_page() */
			
 
				+-		if (PageWorkingset(page))
			
 
				+-			SetPageReferenced(page);
			
 
				++	list_for_each_entry_safe_reverse(page, next, &list, lru) {
			
 
				++		if (!page_evictable(page)) {
			
 
				++			list_del(&page->lru);
			
 
				++			putback_lru_page(page);
			
 
				++			continue;
			
 
				++		}
			
 
				+ 
			
 
				+-		/* don't add rejected pages to the oldest generation */
			
 
				+ 		if (PageReclaim(page) &&
			
 
				+-		    (PageDirty(page) || PageWriteback(page)))
			
 
				+-			ClearPageActive(page);
			
 
				+-		else
			
 
				+-			SetPageActive(page);
			
 
				++		    (PageDirty(page) || PageWriteback(page))) {
			
 
				++			/* restore LRU_REFS_FLAGS cleared by isolate_page() */
			
 
				++			if (PageWorkingset(page))
			
 
				++				SetPageReferenced(page);
			
 
				++			continue;
			
 
				++		}
			
 
				++
			
 
				++		if (skip_retry || PageActive(page) || PageReferenced(page) ||
			
 
				++		    page_mapped(page) || PageLocked(page) ||
			
 
				++		    PageDirty(page) || PageWriteback(page)) {
			
 
				++			/* don't add rejected pages to the oldest generation */
			
 
				++			set_mask_bits(&page->flags, LRU_REFS_MASK | LRU_REFS_FLAGS,
			
 
				++				      BIT(PG_active));
			
 
				++			continue;
			
 
				++		}
			
 
				++
			
 
				++		/* retry pages that may have missed rotate_reclaimable_page() */
			
 
				++		list_move(&page->lru, &clean);
			
 
				++		sc->nr_scanned -= thp_nr_pages(page);
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	spin_lock_irq(&lruvec->lru_lock);
			
 
				+@@ -4778,7 +4798,13 @@ static int evict_pages(struct lruvec *lr
			
 
				+ 	mem_cgroup_uncharge_list(&list);
			
 
				+ 	free_unref_page_list(&list);
			
 
				+ 
			
 
				+-	sc->nr_reclaimed += reclaimed;
			
 
				++	INIT_LIST_HEAD(&list);
			
 
				++	list_splice_init(&clean, &list);
			
 
				++
			
 
				++	if (!list_empty(&list)) {
			
 
				++		skip_retry = true;
			
 
				++		goto retry;
			
 
				++	}
			
 
				+ 
			
 
				+ 	if (need_swapping && type == LRU_GEN_ANON)
			
 
				+ 		*need_swapping = true;
			
--- a/target/linux/generic/backport-6.1/020-v6.1-15-mm-multi-gen-LRU-move-lru_gen_add_mm-out-of-IRQ-off-.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-15-mm-multi-gen-LRU-move-lru_gen_add_mm-out-of-IRQ-off-.patch
@@ -0,0 +1,49 @@
 
				+From 255bb0ac393f1c2818cd75af45a9226300ab3daf Mon Sep 17 00:00:00 2001
			
 
				+From: Sebastian Andrzej Siewior <[email protected]>
			
 
				+Date: Wed, 26 Oct 2022 15:48:30 +0200
			
 
				+Subject: [PATCH 15/29] mm: multi-gen LRU: move lru_gen_add_mm() out of IRQ-off
			
 
				+ region
			
 
				+
			
 
				+lru_gen_add_mm() has been added within an IRQ-off region in the commit
			
 
				+mentioned below.  The other invocations of lru_gen_add_mm() are not within
			
 
				+an IRQ-off region.
			
 
				+
			
 
				+The invocation within IRQ-off region is problematic on PREEMPT_RT because
			
 
				+the function is using a spin_lock_t which must not be used within
			
 
				+IRQ-disabled regions.
			
 
				+
			
 
				+The other invocations of lru_gen_add_mm() occur while
			
 
				+task_struct::alloc_lock is acquired.  Move lru_gen_add_mm() after
			
 
				+interrupts are enabled and before task_unlock().
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Fixes: bd74fdaea1460 ("mm: multi-gen LRU: support page table walks")
			
 
				+Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
			
 
				+Acked-by: Yu Zhao <[email protected]>
			
 
				+Cc: Al Viro <[email protected]>
			
 
				+Cc: "Eric W . Biederman" <[email protected]>
			
 
				+Cc: Kees Cook <[email protected]>
			
 
				+Cc: Thomas Gleixner <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ fs/exec.c | 2 +-
			
 
				+ 1 file changed, 1 insertion(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/fs/exec.c
			
 
				++++ b/fs/exec.c
			
 
				+@@ -1013,7 +1013,6 @@ static int exec_mmap(struct mm_struct *m
			
 
				+ 	active_mm = tsk->active_mm;
			
 
				+ 	tsk->active_mm = mm;
			
 
				+ 	tsk->mm = mm;
			
 
				+-	lru_gen_add_mm(mm);
			
 
				+ 	/*
			
 
				+ 	 * This prevents preemption while active_mm is being loaded and
			
 
				+ 	 * it and mm are being updated, which could cause problems for
			
 
				+@@ -1028,6 +1027,7 @@ static int exec_mmap(struct mm_struct *m
			
 
				+ 		local_irq_enable();
			
 
				+ 	tsk->mm->vmacache_seqnum = 0;
			
 
				+ 	vmacache_flush(tsk);
			
 
				++	lru_gen_add_mm(mm);
			
 
				+ 	task_unlock(tsk);
			
 
				+ 	lru_gen_use_mm(mm);
			
 
				+ 	if (old_mm) {
			
--- a/target/linux/generic/backport-6.1/020-v6.1-17-mm-add-dummy-pmd_young-for-architectures-not-having-.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-17-mm-add-dummy-pmd_young-for-architectures-not-having-.patch
@@ -0,0 +1,96 @@
 
				+From c5ec455ebd2b488d91de9d8915a0c8036a2a04dd Mon Sep 17 00:00:00 2001
			
 
				+From: Juergen Gross <[email protected]>
			
 
				+Date: Wed, 30 Nov 2022 14:49:41 -0800
			
 
				+Subject: [PATCH 17/29] mm: add dummy pmd_young() for architectures not having
			
 
				+ it
			
 
				+
			
 
				+In order to avoid #ifdeffery add a dummy pmd_young() implementation as a
			
 
				+fallback.  This is required for the later patch "mm: introduce
			
 
				+arch_has_hw_nonleaf_pmd_young()".
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Juergen Gross <[email protected]>
			
 
				+Acked-by: Yu Zhao <[email protected]>
			
 
				+Cc: Borislav Petkov <[email protected]>
			
 
				+Cc: Dave Hansen <[email protected]>
			
 
				+Cc: Geert Uytterhoeven <[email protected]>
			
 
				+Cc: "H. Peter Anvin" <[email protected]>
			
 
				+Cc: Ingo Molnar <[email protected]>
			
 
				+Cc: Sander Eikelenboom <[email protected]>
			
 
				+Cc: Thomas Gleixner <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ arch/mips/include/asm/pgtable.h     | 1 +
			
 
				+ arch/riscv/include/asm/pgtable.h    | 1 +
			
 
				+ arch/s390/include/asm/pgtable.h     | 1 +
			
 
				+ arch/sparc/include/asm/pgtable_64.h | 1 +
			
 
				+ arch/x86/include/asm/pgtable.h      | 1 +
			
 
				+ include/linux/pgtable.h             | 7 +++++++
			
 
				+ 6 files changed, 12 insertions(+)
			
 
				+
			
 
				+--- a/arch/mips/include/asm/pgtable.h
			
 
				++++ b/arch/mips/include/asm/pgtable.h
			
 
				+@@ -632,6 +632,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pm
			
 
				+ 	return pmd;
			
 
				+ }
			
 
				+ 
			
 
				++#define pmd_young pmd_young
			
 
				+ static inline int pmd_young(pmd_t pmd)
			
 
				+ {
			
 
				+ 	return !!(pmd_val(pmd) & _PAGE_ACCESSED);
			
 
				+--- a/arch/riscv/include/asm/pgtable.h
			
 
				++++ b/arch/riscv/include/asm/pgtable.h
			
 
				+@@ -535,6 +535,7 @@ static inline int pmd_dirty(pmd_t pmd)
			
 
				+ 	return pte_dirty(pmd_pte(pmd));
			
 
				+ }
			
 
				+ 
			
 
				++#define pmd_young pmd_young
			
 
				+ static inline int pmd_young(pmd_t pmd)
			
 
				+ {
			
 
				+ 	return pte_young(pmd_pte(pmd));
			
 
				+--- a/arch/s390/include/asm/pgtable.h
			
 
				++++ b/arch/s390/include/asm/pgtable.h
			
 
				+@@ -748,6 +748,7 @@ static inline int pmd_dirty(pmd_t pmd)
			
 
				+ 	return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
			
 
				+ }
			
 
				+ 
			
 
				++#define pmd_young pmd_young
			
 
				+ static inline int pmd_young(pmd_t pmd)
			
 
				+ {
			
 
				+ 	return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
			
 
				+--- a/arch/sparc/include/asm/pgtable_64.h
			
 
				++++ b/arch/sparc/include/asm/pgtable_64.h
			
 
				+@@ -712,6 +712,7 @@ static inline unsigned long pmd_dirty(pm
			
 
				+ 	return pte_dirty(pte);
			
 
				+ }
			
 
				+ 
			
 
				++#define pmd_young pmd_young
			
 
				+ static inline unsigned long pmd_young(pmd_t pmd)
			
 
				+ {
			
 
				+ 	pte_t pte = __pte(pmd_val(pmd));
			
 
				+--- a/arch/x86/include/asm/pgtable.h
			
 
				++++ b/arch/x86/include/asm/pgtable.h
			
 
				+@@ -136,6 +136,7 @@ static inline int pmd_dirty(pmd_t pmd)
			
 
				+ 	return pmd_flags(pmd) & _PAGE_DIRTY;
			
 
				+ }
			
 
				+ 
			
 
				++#define pmd_young pmd_young
			
 
				+ static inline int pmd_young(pmd_t pmd)
			
 
				+ {
			
 
				+ 	return pmd_flags(pmd) & _PAGE_ACCESSED;
			
 
				+--- a/include/linux/pgtable.h
			
 
				++++ b/include/linux/pgtable.h
			
 
				+@@ -164,6 +164,13 @@ static inline pte_t *virt_to_kpte(unsign
			
 
				+ 	return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
			
 
				+ }
			
 
				+ 
			
 
				++#ifndef pmd_young
			
 
				++static inline int pmd_young(pmd_t pmd)
			
 
				++{
			
 
				++	return 0;
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				+ #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
			
 
				+ extern int ptep_set_access_flags(struct vm_area_struct *vma,
			
 
				+ 				 unsigned long address, pte_t *ptep,
			
--- a/target/linux/generic/backport-6.1/020-v6.1-18-mm-introduce-arch_has_hw_nonleaf_pmd_young.patch
+++ b/target/linux/generic/backport-6.1/020-v6.1-18-mm-introduce-arch_has_hw_nonleaf_pmd_young.patch
@@ -0,0 +1,113 @@
 
				+From 46cbda7b65998a5af4493f745d94417af697bd68 Mon Sep 17 00:00:00 2001
			
 
				+From: Juergen Gross <[email protected]>
			
 
				+Date: Wed, 23 Nov 2022 07:45:10 +0100
			
 
				+Subject: [PATCH 18/29] mm: introduce arch_has_hw_nonleaf_pmd_young()
			
 
				+
			
 
				+When running as a Xen PV guests commit eed9a328aa1a ("mm: x86: add
			
 
				+CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") can cause a protection violation in
			
 
				+pmdp_test_and_clear_young():
			
 
				+
			
 
				+ BUG: unable to handle page fault for address: ffff8880083374d0
			
 
				+ #PF: supervisor write access in kernel mode
			
 
				+ #PF: error_code(0x0003) - permissions violation
			
 
				+ PGD 3026067 P4D 3026067 PUD 3027067 PMD 7fee5067 PTE 8010000008337065
			
 
				+ Oops: 0003 [#1] PREEMPT SMP NOPTI
			
 
				+ CPU: 7 PID: 158 Comm: kswapd0 Not tainted 6.1.0-rc5-20221118-doflr+ #1
			
 
				+ RIP: e030:pmdp_test_and_clear_young+0x25/0x40
			
 
				+
			
 
				+This happens because the Xen hypervisor can't emulate direct writes to
			
 
				+page table entries other than PTEs.
			
 
				+
			
 
				+This can easily be fixed by introducing arch_has_hw_nonleaf_pmd_young()
			
 
				+similar to arch_has_hw_pte_young() and test that instead of
			
 
				+CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Fixes: eed9a328aa1a ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG")
			
 
				+Signed-off-by: Juergen Gross <[email protected]>
			
 
				+Reported-by: Sander Eikelenboom <[email protected]>
			
 
				+Acked-by: Yu Zhao <[email protected]>
			
 
				+Tested-by: Sander Eikelenboom <[email protected]>
			
 
				+Acked-by: David Hildenbrand <[email protected]>	[core changes]
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ arch/x86/include/asm/pgtable.h |  8 ++++++++
			
 
				+ include/linux/pgtable.h        | 11 +++++++++++
			
 
				+ mm/vmscan.c                    | 10 +++++-----
			
 
				+ 3 files changed, 24 insertions(+), 5 deletions(-)
			
 
				+
			
 
				+--- a/arch/x86/include/asm/pgtable.h
			
 
				++++ b/arch/x86/include/asm/pgtable.h
			
 
				+@@ -1405,6 +1405,14 @@ static inline bool arch_has_hw_pte_young
			
 
				+ 	return true;
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_XEN_PV
			
 
				++#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
			
 
				++static inline bool arch_has_hw_nonleaf_pmd_young(void)
			
 
				++{
			
 
				++	return !cpu_feature_enabled(X86_FEATURE_XENPV);
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				+ #endif	/* __ASSEMBLY__ */
			
 
				+ 
			
 
				+ #endif /* _ASM_X86_PGTABLE_H */
			
 
				+--- a/include/linux/pgtable.h
			
 
				++++ b/include/linux/pgtable.h
			
 
				+@@ -266,6 +266,17 @@ static inline int pmdp_clear_flush_young
			
 
				+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
			
 
				+ #endif
			
 
				+ 
			
 
				++#ifndef arch_has_hw_nonleaf_pmd_young
			
 
				++/*
			
 
				++ * Return whether the accessed bit in non-leaf PMD entries is supported on the
			
 
				++ * local CPU.
			
 
				++ */
			
 
				++static inline bool arch_has_hw_nonleaf_pmd_young(void)
			
 
				++{
			
 
				++	return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				+ #ifndef arch_has_hw_pte_young
			
 
				+ /*
			
 
				+  * Return whether the accessed bit is supported on the local CPU.
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -3727,7 +3727,7 @@ static void walk_pmd_range_locked(pud_t
			
 
				+ 			goto next;
			
 
				+ 
			
 
				+ 		if (!pmd_trans_huge(pmd[i])) {
			
 
				+-			if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
			
 
				++			if (arch_has_hw_nonleaf_pmd_young() &&
			
 
				+ 			    get_cap(LRU_GEN_NONLEAF_YOUNG))
			
 
				+ 				pmdp_test_and_clear_young(vma, addr, pmd + i);
			
 
				+ 			goto next;
			
 
				+@@ -3825,14 +3825,14 @@ restart:
			
 
				+ #endif
			
 
				+ 		walk->mm_stats[MM_NONLEAF_TOTAL]++;
			
 
				+ 
			
 
				+-#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
			
 
				+-		if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
			
 
				++		if (arch_has_hw_nonleaf_pmd_young() &&
			
 
				++		    get_cap(LRU_GEN_NONLEAF_YOUNG)) {
			
 
				+ 			if (!pmd_young(val))
			
 
				+ 				continue;
			
 
				+ 
			
 
				+ 			walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
			
 
				+ 		}
			
 
				+-#endif
			
 
				++
			
 
				+ 		if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
			
 
				+ 			continue;
			
 
				+ 
			
 
				+@@ -5132,7 +5132,7 @@ static ssize_t show_enabled(struct kobje
			
 
				+ 	if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
			
 
				+ 		caps |= BIT(LRU_GEN_MM_WALK);
			
 
				+ 
			
 
				+-	if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
			
 
				++	if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
			
 
				+ 		caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
			
 
				+ 
			
 
				+ 	return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
			
--- a/target/linux/generic/backport-6.1/020-v6.2-16-mm-multi-gen-LRU-fix-crash-during-cgroup-migration.patch
+++ b/target/linux/generic/backport-6.1/020-v6.2-16-mm-multi-gen-LRU-fix-crash-during-cgroup-migration.patch
@@ -0,0 +1,56 @@
 
				+From c7dfefd4bdfba3d5171038d1cc2d4160288e6ee4 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Sun, 15 Jan 2023 20:44:05 -0700
			
 
				+Subject: [PATCH 16/29] mm: multi-gen LRU: fix crash during cgroup migration
			
 
				+
			
 
				+lru_gen_migrate_mm() assumes lru_gen_add_mm() runs prior to itself.  This
			
 
				+isn't true for the following scenario:
			
 
				+
			
 
				+    CPU 1                         CPU 2
			
 
				+
			
 
				+  clone()
			
 
				+    cgroup_can_fork()
			
 
				+                                cgroup_procs_write()
			
 
				+    cgroup_post_fork()
			
 
				+                                  task_lock()
			
 
				+                                  lru_gen_migrate_mm()
			
 
				+                                  task_unlock()
			
 
				+    task_lock()
			
 
				+    lru_gen_add_mm()
			
 
				+    task_unlock()
			
 
				+
			
 
				+And when the above happens, kernel crashes because of linked list
			
 
				+corruption (mm_struct->lru_gen.list).
			
 
				+
			
 
				+Link: https://lore.kernel.org/r/[email protected]/
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Fixes: bd74fdaea146 ("mm: multi-gen LRU: support page table walks")
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Reported-by: msizanoen <[email protected]>
			
 
				+Tested-by: msizanoen <[email protected]>
			
 
				+Cc: <[email protected]>	[6.1+]
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 5 ++++-
			
 
				+ 1 file changed, 4 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -3024,13 +3024,16 @@ void lru_gen_migrate_mm(struct mm_struct
			
 
				+ 	if (mem_cgroup_disabled())
			
 
				+ 		return;
			
 
				+ 
			
 
				++	/* migration can happen before addition */
			
 
				++	if (!mm->lru_gen.memcg)
			
 
				++		return;
			
 
				++
			
 
				+ 	rcu_read_lock();
			
 
				+ 	memcg = mem_cgroup_from_task(task);
			
 
				+ 	rcu_read_unlock();
			
 
				+ 	if (memcg == mm->lru_gen.memcg)
			
 
				+ 		return;
			
 
				+ 
			
 
				+-	VM_WARN_ON_ONCE(!mm->lru_gen.memcg);
			
 
				+ 	VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list));
			
 
				+ 
			
 
				+ 	lru_gen_del_mm(mm);
			
--- a/target/linux/generic/backport-6.1/020-v6.3-19-mm-add-vma_has_recency.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-19-mm-add-vma_has_recency.patch
@@ -0,0 +1,196 @@
 
				+From 6c7f552a48b49a8612786a28a2239fbc24fac289 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Fri, 30 Dec 2022 14:52:51 -0700
			
 
				+Subject: [PATCH 19/29] mm: add vma_has_recency()
			
 
				+
			
 
				+Add vma_has_recency() to indicate whether a VMA may exhibit temporal
			
 
				+locality that the LRU algorithm relies on.
			
 
				+
			
 
				+This function returns false for VMAs marked by VM_SEQ_READ or
			
 
				+VM_RAND_READ.  While the former flag indicates linear access, i.e., a
			
 
				+special case of spatial locality, both flags indicate a lack of temporal
			
 
				+locality, i.e., the reuse of an area within a relatively small duration.
			
 
				+
			
 
				+"Recency" is chosen over "locality" to avoid confusion between temporal
			
 
				+and spatial localities.
			
 
				+
			
 
				+Before this patch, the active/inactive LRU only ignored the accessed bit
			
 
				+from VMAs marked by VM_SEQ_READ.  After this patch, the active/inactive
			
 
				+LRU and MGLRU share the same logic: they both ignore the accessed bit if
			
 
				+vma_has_recency() returns false.
			
 
				+
			
 
				+For the active/inactive LRU, the following fio test showed a [6, 8]%
			
 
				+increase in IOPS when randomly accessing mapped files under memory
			
 
				+pressure.
			
 
				+
			
 
				+  kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo)
			
 
				+  kb=$((kb - 8*1024*1024))
			
 
				+
			
 
				+  modprobe brd rd_nr=1 rd_size=$kb
			
 
				+  dd if=/dev/zero of=/dev/ram0 bs=1M
			
 
				+
			
 
				+  mkfs.ext4 /dev/ram0
			
 
				+  mount /dev/ram0 /mnt/
			
 
				+  swapoff -a
			
 
				+
			
 
				+  fio --name=test --directory=/mnt/ --ioengine=mmap --numjobs=8 \
			
 
				+      --size=8G --rw=randrw --time_based --runtime=10m \
			
 
				+      --group_reporting
			
 
				+
			
 
				+The discussion that led to this patch is here [1].  Additional test
			
 
				+results are available in that thread.
			
 
				+
			
 
				+[1] https://lore.kernel.org/r/Y31s%[email protected]/
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Alexander Viro <[email protected]>
			
 
				+Cc: Andrea Righi <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/mm_inline.h |  9 +++++++++
			
 
				+ mm/memory.c               |  8 ++++----
			
 
				+ mm/rmap.c                 | 42 +++++++++++++++++----------------------
			
 
				+ mm/vmscan.c               |  5 ++++-
			
 
				+ 4 files changed, 35 insertions(+), 29 deletions(-)
			
 
				+
			
 
				+--- a/include/linux/mm_inline.h
			
 
				++++ b/include/linux/mm_inline.h
			
 
				+@@ -333,4 +333,13 @@ static __always_inline void del_page_fro
			
 
				+ 	update_lru_size(lruvec, page_lru(page), page_zonenum(page),
			
 
				+ 			-thp_nr_pages(page));
			
 
				+ }
			
 
				++
			
 
				++static inline bool vma_has_recency(struct vm_area_struct *vma)
			
 
				++{
			
 
				++	if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
			
 
				++		return false;
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				+ #endif
			
 
				+--- a/mm/memory.c
			
 
				++++ b/mm/memory.c
			
 
				+@@ -41,6 +41,7 @@
			
 
				+ 
			
 
				+ #include <linux/kernel_stat.h>
			
 
				+ #include <linux/mm.h>
			
 
				++#include <linux/mm_inline.h>
			
 
				+ #include <linux/sched/mm.h>
			
 
				+ #include <linux/sched/coredump.h>
			
 
				+ #include <linux/sched/numa_balancing.h>
			
 
				+@@ -1353,8 +1354,7 @@ again:
			
 
				+ 					force_flush = 1;
			
 
				+ 					set_page_dirty(page);
			
 
				+ 				}
			
 
				+-				if (pte_young(ptent) &&
			
 
				+-				    likely(!(vma->vm_flags & VM_SEQ_READ)))
			
 
				++				if (pte_young(ptent) && likely(vma_has_recency(vma)))
			
 
				+ 					mark_page_accessed(page);
			
 
				+ 			}
			
 
				+ 			rss[mm_counter(page)]--;
			
 
				+@@ -4795,8 +4795,8 @@ static inline void mm_account_fault(stru
			
 
				+ #ifdef CONFIG_LRU_GEN
			
 
				+ static void lru_gen_enter_fault(struct vm_area_struct *vma)
			
 
				+ {
			
 
				+-	/* the LRU algorithm doesn't apply to sequential or random reads */
			
 
				+-	current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
			
 
				++	/* the LRU algorithm only applies to accesses with recency */
			
 
				++	current->in_lru_fault = vma_has_recency(vma);
			
 
				+ }
			
 
				+ 
			
 
				+ static void lru_gen_exit_fault(void)
			
 
				+--- a/mm/rmap.c
			
 
				++++ b/mm/rmap.c
			
 
				+@@ -794,25 +794,14 @@ static bool page_referenced_one(struct p
			
 
				+ 		}
			
 
				+ 
			
 
				+ 		if (pvmw.pte) {
			
 
				+-			if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
			
 
				+-			    !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
			
 
				++			if (lru_gen_enabled() && pte_young(*pvmw.pte)) {
			
 
				+ 				lru_gen_look_around(&pvmw);
			
 
				+ 				referenced++;
			
 
				+ 			}
			
 
				+ 
			
 
				+ 			if (ptep_clear_flush_young_notify(vma, address,
			
 
				+-						pvmw.pte)) {
			
 
				+-				/*
			
 
				+-				 * Don't treat a reference through
			
 
				+-				 * a sequentially read mapping as such.
			
 
				+-				 * If the page has been used in another mapping,
			
 
				+-				 * we will catch it; if this other mapping is
			
 
				+-				 * already gone, the unmap path will have set
			
 
				+-				 * PG_referenced or activated the page.
			
 
				+-				 */
			
 
				+-				if (likely(!(vma->vm_flags & VM_SEQ_READ)))
			
 
				+-					referenced++;
			
 
				+-			}
			
 
				++						pvmw.pte))
			
 
				++				referenced++;
			
 
				+ 		} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
			
 
				+ 			if (pmdp_clear_flush_young_notify(vma, address,
			
 
				+ 						pvmw.pmd))
			
 
				+@@ -846,7 +835,20 @@ static bool invalid_page_referenced_vma(
			
 
				+ 	struct page_referenced_arg *pra = arg;
			
 
				+ 	struct mem_cgroup *memcg = pra->memcg;
			
 
				+ 
			
 
				+-	if (!mm_match_cgroup(vma->vm_mm, memcg))
			
 
				++	/*
			
 
				++	 * Ignore references from this mapping if it has no recency. If the
			
 
				++	 * page has been used in another mapping, we will catch it; if this
			
 
				++	 * other mapping is already gone, the unmap path will have set the
			
 
				++	 * referenced flag or activated the page in zap_pte_range().
			
 
				++	 */
			
 
				++	if (!vma_has_recency(vma))
			
 
				++		return true;
			
 
				++
			
 
				++	/*
			
 
				++	 * If we are reclaiming on behalf of a cgroup, skip counting on behalf
			
 
				++	 * of references from different cgroups.
			
 
				++	 */
			
 
				++	if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
			
 
				+ 		return true;
			
 
				+ 
			
 
				+ 	return false;
			
 
				+@@ -876,6 +878,7 @@ int page_referenced(struct page *page,
			
 
				+ 		.rmap_one = page_referenced_one,
			
 
				+ 		.arg = (void *)&pra,
			
 
				+ 		.anon_lock = page_lock_anon_vma_read,
			
 
				++		.invalid_vma = invalid_page_referenced_vma,
			
 
				+ 	};
			
 
				+ 
			
 
				+ 	*vm_flags = 0;
			
 
				+@@ -891,15 +894,6 @@ int page_referenced(struct page *page,
			
 
				+ 			return 1;
			
 
				+ 	}
			
 
				+ 
			
 
				+-	/*
			
 
				+-	 * If we are reclaiming on behalf of a cgroup, skip
			
 
				+-	 * counting on behalf of references from different
			
 
				+-	 * cgroups
			
 
				+-	 */
			
 
				+-	if (memcg) {
			
 
				+-		rwc.invalid_vma = invalid_page_referenced_vma;
			
 
				+-	}
			
 
				+-
			
 
				+ 	rmap_walk(page, &rwc);
			
 
				+ 	*vm_flags = pra.vm_flags;
			
 
				+ 
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -3486,7 +3486,10 @@ static int should_skip_vma(unsigned long
			
 
				+ 	if (is_vm_hugetlb_page(vma))
			
 
				+ 		return true;
			
 
				+ 
			
 
				+-	if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ))
			
 
				++	if (!vma_has_recency(vma))
			
 
				++		return true;
			
 
				++
			
 
				++	if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))
			
 
				+ 		return true;
			
 
				+ 
			
 
				+ 	if (vma == get_gate_vma(vma->vm_mm))
			
--- a/target/linux/generic/backport-6.1/020-v6.3-20-mm-support-POSIX_FADV_NOREUSE.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-20-mm-support-POSIX_FADV_NOREUSE.patch
@@ -0,0 +1,125 @@
 
				+From 686c3d4f71de9e0e7a27f03a5617a712385f90cd Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Fri, 30 Dec 2022 14:52:52 -0700
			
 
				+Subject: [PATCH 20/29] mm: support POSIX_FADV_NOREUSE
			
 
				+
			
 
				+This patch adds POSIX_FADV_NOREUSE to vma_has_recency() so that the LRU
			
 
				+algorithm can ignore access to mapped files marked by this flag.
			
 
				+
			
 
				+The advantages of POSIX_FADV_NOREUSE are:
			
 
				+1. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not alter the
			
 
				+   default readahead behavior.
			
 
				+2. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not split VMAs and
			
 
				+   therefore does not take mmap_lock.
			
 
				+3. Unlike MADV_COLD, setting it has a negligible cost, regardless of
			
 
				+   how many pages it affects.
			
 
				+
			
 
				+Its limitations are:
			
 
				+1. Like POSIX_FADV_RANDOM and POSIX_FADV_SEQUENTIAL, it currently does
			
 
				+   not support range. IOW, its scope is the entire file.
			
 
				+2. It currently does not ignore access through file descriptors.
			
 
				+   Specifically, for the active/inactive LRU, given a file page shared
			
 
				+   by two users and one of them having set POSIX_FADV_NOREUSE on the
			
 
				+   file, this page will be activated upon the second user accessing
			
 
				+   it. This corner case can be covered by checking POSIX_FADV_NOREUSE
			
 
				+   before calling mark_page_accessed() on the read path. But it is
			
 
				+   considered not worth the effort.
			
 
				+
			
 
				+There have been a few attempts to support POSIX_FADV_NOREUSE, e.g., [1].
			
 
				+This time the goal is to fill a niche: a few desktop applications, e.g.,
			
 
				+large file transferring and video encoding/decoding, want fast file
			
 
				+streaming with mmap() rather than direct IO.  Among those applications, an
			
 
				+SVT-AV1 regression was reported when running with MGLRU [2].  The
			
 
				+following test can reproduce that regression.
			
 
				+
			
 
				+  kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo)
			
 
				+  kb=$((kb - 8*1024*1024))
			
 
				+
			
 
				+  modprobe brd rd_nr=1 rd_size=$kb
			
 
				+  dd if=/dev/zero of=/dev/ram0 bs=1M
			
 
				+
			
 
				+  mkfs.ext4 /dev/ram0
			
 
				+  mount /dev/ram0 /mnt/
			
 
				+  swapoff -a
			
 
				+
			
 
				+  fallocate -l 8G /mnt/swapfile
			
 
				+  mkswap /mnt/swapfile
			
 
				+  swapon /mnt/swapfile
			
 
				+
			
 
				+  wget http://ultravideo.cs.tut.fi/video/Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z
			
 
				+  7z e -o/mnt/ Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z
			
 
				+  SvtAv1EncApp --preset 12 -w 3840 -h 2160 \
			
 
				+               -i /mnt/Bosphorus_3840x2160.y4m
			
 
				+
			
 
				+For MGLRU, the following change showed a [9-11]% increase in FPS,
			
 
				+which makes it on par with the active/inactive LRU.
			
 
				+
			
 
				+  patch Source/App/EncApp/EbAppMain.c <<EOF
			
 
				+  31a32
			
 
				+  > #include <fcntl.h>
			
 
				+  35d35
			
 
				+  < #include <fcntl.h> /* _O_BINARY */
			
 
				+  117a118
			
 
				+  >             posix_fadvise(config->mmap.fd, 0, 0, POSIX_FADV_NOREUSE);
			
 
				+  EOF
			
 
				+
			
 
				+[1] https://lore.kernel.org/r/[email protected]/
			
 
				+[2] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Alexander Viro <[email protected]>
			
 
				+Cc: Andrea Righi <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/fs.h        | 2 ++
			
 
				+ include/linux/mm_inline.h | 3 +++
			
 
				+ mm/fadvise.c              | 5 ++++-
			
 
				+ 3 files changed, 9 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/include/linux/fs.h
			
 
				++++ b/include/linux/fs.h
			
 
				+@@ -167,6 +167,8 @@ typedef int (dio_iodone_t)(struct kiocb
			
 
				+ /* File is stream-like */
			
 
				+ #define FMODE_STREAM		((__force fmode_t)0x200000)
			
 
				+ 
			
 
				++#define	FMODE_NOREUSE		((__force fmode_t)0x400000)
			
 
				++
			
 
				+ /* File was opened by fanotify and shouldn't generate fanotify events */
			
 
				+ #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
			
 
				+ 
			
 
				+--- a/include/linux/mm_inline.h
			
 
				++++ b/include/linux/mm_inline.h
			
 
				+@@ -339,6 +339,9 @@ static inline bool vma_has_recency(struc
			
 
				+ 	if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
			
 
				+ 		return false;
			
 
				+ 
			
 
				++	if (vma->vm_file && (vma->vm_file->f_mode & FMODE_NOREUSE))
			
 
				++		return false;
			
 
				++
			
 
				+ 	return true;
			
 
				+ }
			
 
				+ 
			
 
				+--- a/mm/fadvise.c
			
 
				++++ b/mm/fadvise.c
			
 
				+@@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, l
			
 
				+ 	case POSIX_FADV_NORMAL:
			
 
				+ 		file->f_ra.ra_pages = bdi->ra_pages;
			
 
				+ 		spin_lock(&file->f_lock);
			
 
				+-		file->f_mode &= ~FMODE_RANDOM;
			
 
				++		file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE);
			
 
				+ 		spin_unlock(&file->f_lock);
			
 
				+ 		break;
			
 
				+ 	case POSIX_FADV_RANDOM:
			
 
				+@@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, l
			
 
				+ 		force_page_cache_readahead(mapping, file, start_index, nrpages);
			
 
				+ 		break;
			
 
				+ 	case POSIX_FADV_NOREUSE:
			
 
				++		spin_lock(&file->f_lock);
			
 
				++		file->f_mode |= FMODE_NOREUSE;
			
 
				++		spin_unlock(&file->f_lock);
			
 
				+ 		break;
			
 
				+ 	case POSIX_FADV_DONTNEED:
			
 
				+ 		if (!inode_write_congested(mapping->host))
			
--- a/target/linux/generic/backport-6.1/020-v6.3-21-mm-multi-gen-LRU-rename-lru_gen_struct-to-lru_gen_pa.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-21-mm-multi-gen-LRU-rename-lru_gen_struct-to-lru_gen_pa.patch
@@ -0,0 +1,348 @@
 
				+From 348fdbada9fb3f0bf1a53651be46319105af187f Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Wed, 21 Dec 2022 21:18:59 -0700
			
 
				+Subject: [PATCH 21/29] mm: multi-gen LRU: rename lru_gen_struct to
			
 
				+ lru_gen_page
			
 
				+
			
 
				+Patch series "mm: multi-gen LRU: memcg LRU", v3.
			
 
				+
			
 
				+Overview
			
 
				+========
			
 
				+
			
 
				+An memcg LRU is a per-node LRU of memcgs.  It is also an LRU of LRUs,
			
 
				+since each node and memcg combination has an LRU of pages (see
			
 
				+mem_cgroup_lruvec()).
			
 
				+
			
 
				+Its goal is to improve the scalability of global reclaim, which is
			
 
				+critical to system-wide memory overcommit in data centers.  Note that
			
 
				+memcg reclaim is currently out of scope.
			
 
				+
			
 
				+Its memory bloat is a pointer to each lruvec and negligible to each
			
 
				+pglist_data.  In terms of traversing memcgs during global reclaim, it
			
 
				+improves the best-case complexity from O(n) to O(1) and does not affect
			
 
				+the worst-case complexity O(n).  Therefore, on average, it has a sublinear
			
 
				+complexity in contrast to the current linear complexity.
			
 
				+
			
 
				+The basic structure of an memcg LRU can be understood by an analogy to
			
 
				+the active/inactive LRU (of pages):
			
 
				+1. It has the young and the old (generations), i.e., the counterparts
			
 
				+   to the active and the inactive;
			
 
				+2. The increment of max_seq triggers promotion, i.e., the counterpart
			
 
				+   to activation;
			
 
				+3. Other events trigger similar operations, e.g., offlining an memcg
			
 
				+   triggers demotion, i.e., the counterpart to deactivation.
			
 
				+
			
 
				+In terms of global reclaim, it has two distinct features:
			
 
				+1. Sharding, which allows each thread to start at a random memcg (in
			
 
				+   the old generation) and improves parallelism;
			
 
				+2. Eventual fairness, which allows direct reclaim to bail out at will
			
 
				+   and reduces latency without affecting fairness over some time.
			
 
				+
			
 
				+The commit message in patch 6 details the workflow:
			
 
				+https://lore.kernel.org/r/[email protected]/
			
 
				+
			
 
				+The following is a simple test to quickly verify its effectiveness.
			
 
				+
			
 
				+  Test design:
			
 
				+  1. Create multiple memcgs.
			
 
				+  2. Each memcg contains a job (fio).
			
 
				+  3. All jobs access the same amount of memory randomly.
			
 
				+  4. The system does not experience global memory pressure.
			
 
				+  5. Periodically write to the root memory.reclaim.
			
 
				+
			
 
				+  Desired outcome:
			
 
				+  1. All memcgs have similar pgsteal counts, i.e., stddev(pgsteal)
			
 
				+     over mean(pgsteal) is close to 0%.
			
 
				+  2. The total pgsteal is close to the total requested through
			
 
				+     memory.reclaim, i.e., sum(pgsteal) over sum(requested) is close
			
 
				+     to 100%.
			
 
				+
			
 
				+  Actual outcome [1]:
			
 
				+                                     MGLRU off    MGLRU on
			
 
				+  stddev(pgsteal) / mean(pgsteal)    75%          20%
			
 
				+  sum(pgsteal) / sum(requested)      425%         95%
			
 
				+
			
 
				+  ####################################################################
			
 
				+  MEMCGS=128
			
 
				+
			
 
				+  for ((memcg = 0; memcg < $MEMCGS; memcg++)); do
			
 
				+      mkdir /sys/fs/cgroup/memcg$memcg
			
 
				+  done
			
 
				+
			
 
				+  start() {
			
 
				+      echo $BASHPID > /sys/fs/cgroup/memcg$memcg/cgroup.procs
			
 
				+
			
 
				+      fio -name=memcg$memcg --numjobs=1 --ioengine=mmap \
			
 
				+          --filename=/dev/zero --size=1920M --rw=randrw \
			
 
				+          --rate=64m,64m --random_distribution=random \
			
 
				+          --fadvise_hint=0 --time_based --runtime=10h \
			
 
				+          --group_reporting --minimal
			
 
				+  }
			
 
				+
			
 
				+  for ((memcg = 0; memcg < $MEMCGS; memcg++)); do
			
 
				+      start &
			
 
				+  done
			
 
				+
			
 
				+  sleep 600
			
 
				+
			
 
				+  for ((i = 0; i < 600; i++)); do
			
 
				+      echo 256m >/sys/fs/cgroup/memory.reclaim
			
 
				+      sleep 6
			
 
				+  done
			
 
				+
			
 
				+  for ((memcg = 0; memcg < $MEMCGS; memcg++)); do
			
 
				+      grep "pgsteal " /sys/fs/cgroup/memcg$memcg/memory.stat
			
 
				+  done
			
 
				+  ####################################################################
			
 
				+
			
 
				+[1]: This was obtained from running the above script (touches less
			
 
				+     than 256GB memory) on an EPYC 7B13 with 512GB DRAM for over an
			
 
				+     hour.
			
 
				+
			
 
				+This patch (of 8):
			
 
				+
			
 
				+The new name lru_gen_page will be more distinct from the coming
			
 
				+lru_gen_memcg.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Roman Gushchin <[email protected]>
			
 
				+Cc: Suren Baghdasaryan <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/mm_inline.h |  4 ++--
			
 
				+ include/linux/mmzone.h    |  6 +++---
			
 
				+ mm/vmscan.c               | 34 +++++++++++++++++-----------------
			
 
				+ mm/workingset.c           |  4 ++--
			
 
				+ 4 files changed, 24 insertions(+), 24 deletions(-)
			
 
				+
			
 
				+--- a/include/linux/mm_inline.h
			
 
				++++ b/include/linux/mm_inline.h
			
 
				+@@ -168,7 +168,7 @@ static inline void lru_gen_update_size(s
			
 
				+ 	int zone = page_zonenum(page);
			
 
				+ 	int delta = thp_nr_pages(page);
			
 
				+ 	enum lru_list lru = type * LRU_INACTIVE_FILE;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
			
 
				+ 	VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
			
 
				+@@ -214,7 +214,7 @@ static inline bool lru_gen_add_page(stru
			
 
				+ 	int gen = page_lru_gen(page);
			
 
				+ 	int type = page_is_file_lru(page);
			
 
				+ 	int zone = page_zonenum(page);
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE_PAGE(gen != -1, page);
			
 
				+ 
			
 
				+--- a/include/linux/mmzone.h
			
 
				++++ b/include/linux/mmzone.h
			
 
				+@@ -394,7 +394,7 @@ enum {
			
 
				+  * The number of pages in each generation is eventually consistent and therefore
			
 
				+  * can be transiently negative when reset_batch_size() is pending.
			
 
				+  */
			
 
				+-struct lru_gen_struct {
			
 
				++struct lru_gen_page {
			
 
				+ 	/* the aging increments the youngest generation number */
			
 
				+ 	unsigned long max_seq;
			
 
				+ 	/* the eviction increments the oldest generation numbers */
			
 
				+@@ -451,7 +451,7 @@ struct lru_gen_mm_state {
			
 
				+ struct lru_gen_mm_walk {
			
 
				+ 	/* the lruvec under reclaim */
			
 
				+ 	struct lruvec *lruvec;
			
 
				+-	/* unstable max_seq from lru_gen_struct */
			
 
				++	/* unstable max_seq from lru_gen_page */
			
 
				+ 	unsigned long max_seq;
			
 
				+ 	/* the next address within an mm to scan */
			
 
				+ 	unsigned long next_addr;
			
 
				+@@ -514,7 +514,7 @@ struct lruvec {
			
 
				+ 	unsigned long			flags;
			
 
				+ #ifdef CONFIG_LRU_GEN
			
 
				+ 	/* evictable pages divided into generations */
			
 
				+-	struct lru_gen_struct		lrugen;
			
 
				++	struct lru_gen_page		lrugen;
			
 
				+ 	/* to concurrently iterate lru_gen_mm_list */
			
 
				+ 	struct lru_gen_mm_state		mm_state;
			
 
				+ #endif
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -2910,7 +2910,7 @@ static int get_nr_gens(struct lruvec *lr
			
 
				+ 
			
 
				+ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
			
 
				+ {
			
 
				+-	/* see the comment on lru_gen_struct */
			
 
				++	/* see the comment on lru_gen_page */
			
 
				+ 	return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
			
 
				+ 	       get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
			
 
				+ 	       get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
			
 
				+@@ -3316,7 +3316,7 @@ struct ctrl_pos {
			
 
				+ static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
			
 
				+ 			  struct ctrl_pos *pos)
			
 
				+ {
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	int hist = lru_hist_from_seq(lrugen->min_seq[type]);
			
 
				+ 
			
 
				+ 	pos->refaulted = lrugen->avg_refaulted[type][tier] +
			
 
				+@@ -3331,7 +3331,7 @@ static void read_ctrl_pos(struct lruvec
			
 
				+ static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
			
 
				+ {
			
 
				+ 	int hist, tier;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
			
 
				+ 	unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
			
 
				+ 
			
 
				+@@ -3408,7 +3408,7 @@ static int page_update_gen(struct page *
			
 
				+ static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming)
			
 
				+ {
			
 
				+ 	int type = page_is_file_lru(page);
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
			
 
				+ 	unsigned long new_flags, old_flags = READ_ONCE(page->flags);
			
 
				+ 
			
 
				+@@ -3453,7 +3453,7 @@ static void update_batch_size(struct lru
			
 
				+ static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
			
 
				+ {
			
 
				+ 	int gen, type, zone;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	walk->batched = 0;
			
 
				+ 
			
 
				+@@ -3979,7 +3979,7 @@ static bool inc_min_seq(struct lruvec *l
			
 
				+ {
			
 
				+ 	int zone;
			
 
				+ 	int remaining = MAX_LRU_BATCH;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
			
 
				+ 
			
 
				+ 	if (type == LRU_GEN_ANON && !can_swap)
			
 
				+@@ -4015,7 +4015,7 @@ static bool try_to_inc_min_seq(struct lr
			
 
				+ {
			
 
				+ 	int gen, type, zone;
			
 
				+ 	bool success = false;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	DEFINE_MIN_SEQ(lruvec);
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
			
 
				+@@ -4036,7 +4036,7 @@ next:
			
 
				+ 		;
			
 
				+ 	}
			
 
				+ 
			
 
				+-	/* see the comment on lru_gen_struct */
			
 
				++	/* see the comment on lru_gen_page */
			
 
				+ 	if (can_swap) {
			
 
				+ 		min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
			
 
				+ 		min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
			
 
				+@@ -4058,7 +4058,7 @@ static void inc_max_seq(struct lruvec *l
			
 
				+ {
			
 
				+ 	int prev, next;
			
 
				+ 	int type, zone;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	spin_lock_irq(&lruvec->lru_lock);
			
 
				+ 
			
 
				+@@ -4116,7 +4116,7 @@ static bool try_to_inc_max_seq(struct lr
			
 
				+ 	bool success;
			
 
				+ 	struct lru_gen_mm_walk *walk;
			
 
				+ 	struct mm_struct *mm = NULL;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
			
 
				+ 
			
 
				+@@ -4181,7 +4181,7 @@ static bool should_run_aging(struct lruv
			
 
				+ 	unsigned long old = 0;
			
 
				+ 	unsigned long young = 0;
			
 
				+ 	unsigned long total = 0;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 
			
 
				+ 	for (type = !can_swap; type < ANON_AND_FILE; type++) {
			
 
				+@@ -4466,7 +4466,7 @@ static bool sort_page(struct lruvec *lru
			
 
				+ 	int delta = thp_nr_pages(page);
			
 
				+ 	int refs = page_lru_refs(page);
			
 
				+ 	int tier = lru_tier_from_refs(refs);
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE_PAGE(gen >= MAX_NR_GENS, page);
			
 
				+ 
			
 
				+@@ -4566,7 +4566,7 @@ static int scan_pages(struct lruvec *lru
			
 
				+ 	int scanned = 0;
			
 
				+ 	int isolated = 0;
			
 
				+ 	int remaining = MAX_LRU_BATCH;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(!list_empty(list));
			
 
				+@@ -4967,7 +4967,7 @@ done:
			
 
				+ 
			
 
				+ static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
			
 
				+ {
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	if (lrugen->enabled) {
			
 
				+ 		enum lru_list lru;
			
 
				+@@ -5247,7 +5247,7 @@ static void lru_gen_seq_show_full(struct
			
 
				+ 	int i;
			
 
				+ 	int type, tier;
			
 
				+ 	int hist = lru_hist_from_seq(seq);
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	for (tier = 0; tier < MAX_NR_TIERS; tier++) {
			
 
				+ 		seq_printf(m, "            %10d", tier);
			
 
				+@@ -5296,7 +5296,7 @@ static int lru_gen_seq_show(struct seq_f
			
 
				+ 	unsigned long seq;
			
 
				+ 	bool full = !debugfs_real_fops(m->file)->write;
			
 
				+ 	struct lruvec *lruvec = v;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	int nid = lruvec_pgdat(lruvec)->node_id;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 	DEFINE_MAX_SEQ(lruvec);
			
 
				+@@ -5549,7 +5549,7 @@ void lru_gen_init_lruvec(struct lruvec *
			
 
				+ {
			
 
				+ 	int i;
			
 
				+ 	int gen, type, zone;
			
 
				+-	struct lru_gen_struct *lrugen = &lruvec->lrugen;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 
			
 
				+ 	lrugen->max_seq = MIN_NR_GENS + 1;
			
 
				+ 	lrugen->enabled = lru_gen_enabled();
			
 
				+--- a/mm/workingset.c
			
 
				++++ b/mm/workingset.c
			
 
				+@@ -223,7 +223,7 @@ static void *lru_gen_eviction(struct pag
			
 
				+ 	unsigned long token;
			
 
				+ 	unsigned long min_seq;
			
 
				+ 	struct lruvec *lruvec;
			
 
				+-	struct lru_gen_struct *lrugen;
			
 
				++	struct lru_gen_page *lrugen;
			
 
				+ 	int type = page_is_file_lru(page);
			
 
				+ 	int delta = thp_nr_pages(page);
			
 
				+ 	int refs = page_lru_refs(page);
			
 
				+@@ -252,7 +252,7 @@ static void lru_gen_refault(struct page
			
 
				+ 	unsigned long token;
			
 
				+ 	unsigned long min_seq;
			
 
				+ 	struct lruvec *lruvec;
			
 
				+-	struct lru_gen_struct *lrugen;
			
 
				++	struct lru_gen_page *lrugen;
			
 
				+ 	struct mem_cgroup *memcg;
			
 
				+ 	struct pglist_data *pgdat;
			
 
				+ 	int type = page_is_file_lru(page);
			
--- a/target/linux/generic/backport-6.1/020-v6.3-22-mm-multi-gen-LRU-rename-lrugen-lists-to-lrugen-pages.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-22-mm-multi-gen-LRU-rename-lrugen-lists-to-lrugen-pages.patch
@@ -0,0 +1,162 @@
 
				+From afd37e73db04c7e6b47411120ac5f6a7eca51fec Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Wed, 21 Dec 2022 21:19:00 -0700
			
 
				+Subject: [PATCH 22/29] mm: multi-gen LRU: rename lrugen->lists[] to
			
 
				+ lrugen->pages[]
			
 
				+
			
 
				+lru_gen_page will be chained into per-node lists by the coming
			
 
				+lrugen->list.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Roman Gushchin <[email protected]>
			
 
				+Cc: Suren Baghdasaryan <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/mm_inline.h |  4 ++--
			
 
				+ include/linux/mmzone.h    |  8 ++++----
			
 
				+ mm/vmscan.c               | 20 ++++++++++----------
			
 
				+ 3 files changed, 16 insertions(+), 16 deletions(-)
			
 
				+
			
 
				+--- a/include/linux/mm_inline.h
			
 
				++++ b/include/linux/mm_inline.h
			
 
				+@@ -246,9 +246,9 @@ static inline bool lru_gen_add_page(stru
			
 
				+ 	lru_gen_update_size(lruvec, page, -1, gen);
			
 
				+ 	/* for rotate_reclaimable_page() */
			
 
				+ 	if (reclaiming)
			
 
				+-		list_add_tail(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++		list_add_tail(&page->lru, &lrugen->pages[gen][type][zone]);
			
 
				+ 	else
			
 
				+-		list_add(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++		list_add(&page->lru, &lrugen->pages[gen][type][zone]);
			
 
				+ 
			
 
				+ 	return true;
			
 
				+ }
			
 
				+--- a/include/linux/mmzone.h
			
 
				++++ b/include/linux/mmzone.h
			
 
				+@@ -302,7 +302,7 @@ enum lruvec_flags {
			
 
				+  * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
			
 
				+  * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
			
 
				+  * corresponding generation. The gen counter in page->flags stores gen+1 while
			
 
				+- * a page is on one of lrugen->lists[]. Otherwise it stores 0.
			
 
				++ * a page is on one of lrugen->pages[]. Otherwise it stores 0.
			
 
				+  *
			
 
				+  * A page is added to the youngest generation on faulting. The aging needs to
			
 
				+  * check the accessed bit at least twice before handing this page over to the
			
 
				+@@ -314,8 +314,8 @@ enum lruvec_flags {
			
 
				+  * rest of generations, if they exist, are considered inactive. See
			
 
				+  * lru_gen_is_active().
			
 
				+  *
			
 
				+- * PG_active is always cleared while a page is on one of lrugen->lists[] so that
			
 
				+- * the aging needs not to worry about it. And it's set again when a page
			
 
				++ * PG_active is always cleared while a page is on one of lrugen->pages[] so
			
 
				++ * that the aging needs not to worry about it. And it's set again when a page
			
 
				+  * considered active is isolated for non-reclaiming purposes, e.g., migration.
			
 
				+  * See lru_gen_add_page() and lru_gen_del_page().
			
 
				+  *
			
 
				+@@ -402,7 +402,7 @@ struct lru_gen_page {
			
 
				+ 	/* the birth time of each generation in jiffies */
			
 
				+ 	unsigned long timestamps[MAX_NR_GENS];
			
 
				+ 	/* the multi-gen LRU lists, lazily sorted on eviction */
			
 
				+-	struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
			
 
				++	struct list_head pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
			
 
				+ 	/* the multi-gen LRU sizes, eventually consistent */
			
 
				+ 	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
			
 
				+ 	/* the exponential moving average of refaulted */
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -3987,7 +3987,7 @@ static bool inc_min_seq(struct lruvec *l
			
 
				+ 
			
 
				+ 	/* prevent cold/hot inversion if force_scan is true */
			
 
				+ 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
			
 
				+-		struct list_head *head = &lrugen->lists[old_gen][type][zone];
			
 
				++		struct list_head *head = &lrugen->pages[old_gen][type][zone];
			
 
				+ 
			
 
				+ 		while (!list_empty(head)) {
			
 
				+ 			struct page *page = lru_to_page(head);
			
 
				+@@ -3998,7 +3998,7 @@ static bool inc_min_seq(struct lruvec *l
			
 
				+ 			VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page);
			
 
				+ 
			
 
				+ 			new_gen = page_inc_gen(lruvec, page, false);
			
 
				+-			list_move_tail(&page->lru, &lrugen->lists[new_gen][type][zone]);
			
 
				++			list_move_tail(&page->lru, &lrugen->pages[new_gen][type][zone]);
			
 
				+ 
			
 
				+ 			if (!--remaining)
			
 
				+ 				return false;
			
 
				+@@ -4026,7 +4026,7 @@ static bool try_to_inc_min_seq(struct lr
			
 
				+ 			gen = lru_gen_from_seq(min_seq[type]);
			
 
				+ 
			
 
				+ 			for (zone = 0; zone < MAX_NR_ZONES; zone++) {
			
 
				+-				if (!list_empty(&lrugen->lists[gen][type][zone]))
			
 
				++				if (!list_empty(&lrugen->pages[gen][type][zone]))
			
 
				+ 					goto next;
			
 
				+ 			}
			
 
				+ 
			
 
				+@@ -4491,7 +4491,7 @@ static bool sort_page(struct lruvec *lru
			
 
				+ 
			
 
				+ 	/* promoted */
			
 
				+ 	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
			
 
				+-		list_move(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++		list_move(&page->lru, &lrugen->pages[gen][type][zone]);
			
 
				+ 		return true;
			
 
				+ 	}
			
 
				+ 
			
 
				+@@ -4500,7 +4500,7 @@ static bool sort_page(struct lruvec *lru
			
 
				+ 		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
			
 
				+ 
			
 
				+ 		gen = page_inc_gen(lruvec, page, false);
			
 
				+-		list_move_tail(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++		list_move_tail(&page->lru, &lrugen->pages[gen][type][zone]);
			
 
				+ 
			
 
				+ 		WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
			
 
				+ 			   lrugen->protected[hist][type][tier - 1] + delta);
			
 
				+@@ -4512,7 +4512,7 @@ static bool sort_page(struct lruvec *lru
			
 
				+ 	if (PageLocked(page) || PageWriteback(page) ||
			
 
				+ 	    (type == LRU_GEN_FILE && PageDirty(page))) {
			
 
				+ 		gen = page_inc_gen(lruvec, page, true);
			
 
				+-		list_move(&page->lru, &lrugen->lists[gen][type][zone]);
			
 
				++		list_move(&page->lru, &lrugen->pages[gen][type][zone]);
			
 
				+ 		return true;
			
 
				+ 	}
			
 
				+ 
			
 
				+@@ -4579,7 +4579,7 @@ static int scan_pages(struct lruvec *lru
			
 
				+ 	for (zone = sc->reclaim_idx; zone >= 0; zone--) {
			
 
				+ 		LIST_HEAD(moved);
			
 
				+ 		int skipped = 0;
			
 
				+-		struct list_head *head = &lrugen->lists[gen][type][zone];
			
 
				++		struct list_head *head = &lrugen->pages[gen][type][zone];
			
 
				+ 
			
 
				+ 		while (!list_empty(head)) {
			
 
				+ 			struct page *page = lru_to_page(head);
			
 
				+@@ -4980,7 +4980,7 @@ static bool __maybe_unused state_is_vali
			
 
				+ 		int gen, type, zone;
			
 
				+ 
			
 
				+ 		for_each_gen_type_zone(gen, type, zone) {
			
 
				+-			if (!list_empty(&lrugen->lists[gen][type][zone]))
			
 
				++			if (!list_empty(&lrugen->pages[gen][type][zone]))
			
 
				+ 				return false;
			
 
				+ 		}
			
 
				+ 	}
			
 
				+@@ -5025,7 +5025,7 @@ static bool drain_evictable(struct lruve
			
 
				+ 	int remaining = MAX_LRU_BATCH;
			
 
				+ 
			
 
				+ 	for_each_gen_type_zone(gen, type, zone) {
			
 
				+-		struct list_head *head = &lruvec->lrugen.lists[gen][type][zone];
			
 
				++		struct list_head *head = &lruvec->lrugen.pages[gen][type][zone];
			
 
				+ 
			
 
				+ 		while (!list_empty(head)) {
			
 
				+ 			bool success;
			
 
				+@@ -5558,7 +5558,7 @@ void lru_gen_init_lruvec(struct lruvec *
			
 
				+ 		lrugen->timestamps[i] = jiffies;
			
 
				+ 
			
 
				+ 	for_each_gen_type_zone(gen, type, zone)
			
 
				+-		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
			
 
				++		INIT_LIST_HEAD(&lrugen->pages[gen][type][zone]);
			
 
				+ 
			
 
				+ 	lruvec->mm_state.seq = MIN_NR_GENS;
			
 
				+ 	init_waitqueue_head(&lruvec->mm_state.wait);
			
--- a/target/linux/generic/backport-6.1/020-v6.3-23-mm-multi-gen-LRU-remove-eviction-fairness-safeguard.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-23-mm-multi-gen-LRU-remove-eviction-fairness-safeguard.patch
@@ -0,0 +1,188 @@
 
				+From ce45f1c4b32cf69b166f56ef5bc6c761e06ed4e5 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Wed, 21 Dec 2022 21:19:01 -0700
			
 
				+Subject: [PATCH 23/29] mm: multi-gen LRU: remove eviction fairness safeguard
			
 
				+
			
 
				+Recall that the eviction consumes the oldest generation: first it
			
 
				+bucket-sorts pages whose gen counters were updated by the aging and
			
 
				+reclaims the rest; then it increments lrugen->min_seq.
			
 
				+
			
 
				+The current eviction fairness safeguard for global reclaim has a
			
 
				+dilemma: when there are multiple eligible memcgs, should it continue
			
 
				+or stop upon meeting the reclaim goal? If it continues, it overshoots
			
 
				+and increases direct reclaim latency; if it stops, it loses fairness
			
 
				+between memcgs it has taken memory away from and those it has yet to.
			
 
				+
			
 
				+With memcg LRU, the eviction, while ensuring eventual fairness, will
			
 
				+stop upon meeting its goal. Therefore the current eviction fairness
			
 
				+safeguard for global reclaim will not be needed.
			
 
				+
			
 
				+Note that memcg LRU only applies to global reclaim. For memcg reclaim,
			
 
				+the eviction will continue, even if it is overshooting. This becomes
			
 
				+unconditional due to code simplification.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Roman Gushchin <[email protected]>
			
 
				+Cc: Suren Baghdasaryan <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 82 +++++++++++++++--------------------------------------
			
 
				+ 1 file changed, 23 insertions(+), 59 deletions(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -443,6 +443,11 @@ static bool cgroup_reclaim(struct scan_c
			
 
				+ 	return sc->target_mem_cgroup;
			
 
				+ }
			
 
				+ 
			
 
				++static bool global_reclaim(struct scan_control *sc)
			
 
				++{
			
 
				++	return !sc->target_mem_cgroup || mem_cgroup_is_root(sc->target_mem_cgroup);
			
 
				++}
			
 
				++
			
 
				+ /**
			
 
				+  * writeback_throttling_sane - is the usual dirty throttling mechanism available?
			
 
				+  * @sc: scan_control in question
			
 
				+@@ -493,6 +498,11 @@ static bool cgroup_reclaim(struct scan_c
			
 
				+ 	return false;
			
 
				+ }
			
 
				+ 
			
 
				++static bool global_reclaim(struct scan_control *sc)
			
 
				++{
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				+ static bool writeback_throttling_sane(struct scan_control *sc)
			
 
				+ {
			
 
				+ 	return true;
			
 
				+@@ -4722,8 +4732,7 @@ static int isolate_pages(struct lruvec *
			
 
				+ 	return scanned;
			
 
				+ }
			
 
				+ 
			
 
				+-static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
			
 
				+-		       bool *need_swapping)
			
 
				++static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
			
 
				+ {
			
 
				+ 	int type;
			
 
				+ 	int scanned;
			
 
				+@@ -4812,9 +4821,6 @@ retry:
			
 
				+ 		goto retry;
			
 
				+ 	}
			
 
				+ 
			
 
				+-	if (need_swapping && type == LRU_GEN_ANON)
			
 
				+-		*need_swapping = true;
			
 
				+-
			
 
				+ 	return scanned;
			
 
				+ }
			
 
				+ 
			
 
				+@@ -4853,68 +4859,26 @@ done:
			
 
				+ 	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
			
 
				+ }
			
 
				+ 
			
 
				+-static bool should_abort_scan(struct lruvec *lruvec, unsigned long seq,
			
 
				+-			      struct scan_control *sc, bool need_swapping)
			
 
				++static unsigned long get_nr_to_reclaim(struct scan_control *sc)
			
 
				+ {
			
 
				+-	int i;
			
 
				+-	DEFINE_MAX_SEQ(lruvec);
			
 
				+-
			
 
				+-	if (!current_is_kswapd()) {
			
 
				+-		/* age each memcg once to ensure fairness */
			
 
				+-		if (max_seq - seq > 1)
			
 
				+-			return true;
			
 
				+-
			
 
				+-		/* over-swapping can increase allocation latency */
			
 
				+-		if (sc->nr_reclaimed >= sc->nr_to_reclaim && need_swapping)
			
 
				+-			return true;
			
 
				+-
			
 
				+-		/* give this thread a chance to exit and free its memory */
			
 
				+-		if (fatal_signal_pending(current)) {
			
 
				+-			sc->nr_reclaimed += MIN_LRU_BATCH;
			
 
				+-			return true;
			
 
				+-		}
			
 
				+-
			
 
				+-		if (cgroup_reclaim(sc))
			
 
				+-			return false;
			
 
				+-	} else if (sc->nr_reclaimed - sc->last_reclaimed < sc->nr_to_reclaim)
			
 
				+-		return false;
			
 
				+-
			
 
				+-	/* keep scanning at low priorities to ensure fairness */
			
 
				+-	if (sc->priority > DEF_PRIORITY - 2)
			
 
				+-		return false;
			
 
				+-
			
 
				+-	/*
			
 
				+-	 * A minimum amount of work was done under global memory pressure. For
			
 
				+-	 * kswapd, it may be overshooting. For direct reclaim, the target isn't
			
 
				+-	 * met, and yet the allocation may still succeed, since kswapd may have
			
 
				+-	 * caught up. In either case, it's better to stop now, and restart if
			
 
				+-	 * necessary.
			
 
				+-	 */
			
 
				+-	for (i = 0; i <= sc->reclaim_idx; i++) {
			
 
				+-		unsigned long wmark;
			
 
				+-		struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
			
 
				+-
			
 
				+-		if (!managed_zone(zone))
			
 
				+-			continue;
			
 
				+-
			
 
				+-		wmark = current_is_kswapd() ? high_wmark_pages(zone) : low_wmark_pages(zone);
			
 
				+-		if (wmark > zone_page_state(zone, NR_FREE_PAGES))
			
 
				+-			return false;
			
 
				+-	}
			
 
				++	/* don't abort memcg reclaim to ensure fairness */
			
 
				++	if (!global_reclaim(sc))
			
 
				++		return -1;
			
 
				+ 
			
 
				+-	sc->nr_reclaimed += MIN_LRU_BATCH;
			
 
				++	/* discount the previous progress for kswapd */
			
 
				++	if (current_is_kswapd())
			
 
				++		return sc->nr_to_reclaim + sc->last_reclaimed;
			
 
				+ 
			
 
				+-	return true;
			
 
				++	return max(sc->nr_to_reclaim, compact_gap(sc->order));
			
 
				+ }
			
 
				+ 
			
 
				+ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				+ {
			
 
				+ 	struct blk_plug plug;
			
 
				+ 	bool need_aging = false;
			
 
				+-	bool need_swapping = false;
			
 
				+ 	unsigned long scanned = 0;
			
 
				+ 	unsigned long reclaimed = sc->nr_reclaimed;
			
 
				+-	DEFINE_MAX_SEQ(lruvec);
			
 
				++	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
			
 
				+ 
			
 
				+ 	lru_add_drain();
			
 
				+ 
			
 
				+@@ -4938,7 +4902,7 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 		if (!nr_to_scan)
			
 
				+ 			goto done;
			
 
				+ 
			
 
				+-		delta = evict_pages(lruvec, sc, swappiness, &need_swapping);
			
 
				++		delta = evict_pages(lruvec, sc, swappiness);
			
 
				+ 		if (!delta)
			
 
				+ 			goto done;
			
 
				+ 
			
 
				+@@ -4946,7 +4910,7 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 		if (scanned >= nr_to_scan)
			
 
				+ 			break;
			
 
				+ 
			
 
				+-		if (should_abort_scan(lruvec, max_seq, sc, need_swapping))
			
 
				++		if (sc->nr_reclaimed >= nr_to_reclaim)
			
 
				+ 			break;
			
 
				+ 
			
 
				+ 		cond_resched();
			
 
				+@@ -5393,7 +5357,7 @@ static int run_eviction(struct lruvec *l
			
 
				+ 		if (sc->nr_reclaimed >= nr_to_reclaim)
			
 
				+ 			return 0;
			
 
				+ 
			
 
				+-		if (!evict_pages(lruvec, sc, swappiness, NULL))
			
 
				++		if (!evict_pages(lruvec, sc, swappiness))
			
 
				+ 			return 0;
			
 
				+ 
			
 
				+ 		cond_resched();
			
--- a/target/linux/generic/backport-6.1/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch
@@ -0,0 +1,287 @@
 
				+From e20b7386fccc18c791796eb1dc1a91eee3ccf801 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Wed, 21 Dec 2022 21:19:02 -0700
			
 
				+Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard
			
 
				+
			
 
				+Recall that the aging produces the youngest generation: first it scans
			
 
				+for accessed pages and updates their gen counters; then it increments
			
 
				+lrugen->max_seq.
			
 
				+
			
 
				+The current aging fairness safeguard for kswapd uses two passes to
			
 
				+ensure the fairness to multiple eligible memcgs. On the first pass,
			
 
				+which is shared with the eviction, it checks whether all eligible
			
 
				+memcgs are low on cold pages. If so, it requires a second pass, on
			
 
				+which it ages all those memcgs at the same time.
			
 
				+
			
 
				+With memcg LRU, the aging, while ensuring eventual fairness, will run
			
 
				+when necessary. Therefore the current aging fairness safeguard for
			
 
				+kswapd will not be needed.
			
 
				+
			
 
				+Note that memcg LRU only applies to global reclaim. For memcg reclaim,
			
 
				+the aging can be unfair to different memcgs, i.e., their
			
 
				+lrugen->max_seq can be incremented at different paces.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Roman Gushchin <[email protected]>
			
 
				+Cc: Suren Baghdasaryan <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 126 ++++++++++++++++++++++++----------------------------
			
 
				+ 1 file changed, 59 insertions(+), 67 deletions(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -131,7 +131,6 @@ struct scan_control {
			
 
				+ 
			
 
				+ #ifdef CONFIG_LRU_GEN
			
 
				+ 	/* help kswapd make better choices among multiple memcgs */
			
 
				+-	unsigned int memcgs_need_aging:1;
			
 
				+ 	unsigned long last_reclaimed;
			
 
				+ #endif
			
 
				+ 
			
 
				+@@ -4184,7 +4183,7 @@ done:
			
 
				+ 	return true;
			
 
				+ }
			
 
				+ 
			
 
				+-static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
			
 
				++static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
			
 
				+ 			     struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
			
 
				+ {
			
 
				+ 	int gen, type, zone;
			
 
				+@@ -4193,6 +4192,13 @@ static bool should_run_aging(struct lruv
			
 
				+ 	unsigned long total = 0;
			
 
				+ 	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	DEFINE_MIN_SEQ(lruvec);
			
 
				++
			
 
				++	/* whether this lruvec is completely out of cold pages */
			
 
				++	if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
			
 
				++		*nr_to_scan = 0;
			
 
				++		return true;
			
 
				++	}
			
 
				+ 
			
 
				+ 	for (type = !can_swap; type < ANON_AND_FILE; type++) {
			
 
				+ 		unsigned long seq;
			
 
				+@@ -4221,8 +4227,6 @@ static bool should_run_aging(struct lruv
			
 
				+ 	 * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
			
 
				+ 	 * ideal number of generations is MIN_NR_GENS+1.
			
 
				+ 	 */
			
 
				+-	if (min_seq[!can_swap] + MIN_NR_GENS > max_seq)
			
 
				+-		return true;
			
 
				+ 	if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
			
 
				+ 		return false;
			
 
				+ 
			
 
				+@@ -4241,40 +4245,54 @@ static bool should_run_aging(struct lruv
			
 
				+ 	return false;
			
 
				+ }
			
 
				+ 
			
 
				+-static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
			
 
				++static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
			
 
				+ {
			
 
				+-	bool need_aging;
			
 
				+-	unsigned long nr_to_scan;
			
 
				+-	int swappiness = get_swappiness(lruvec, sc);
			
 
				++	int gen, type, zone;
			
 
				++	unsigned long total = 0;
			
 
				++	bool can_swap = get_swappiness(lruvec, sc);
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 	DEFINE_MAX_SEQ(lruvec);
			
 
				+ 	DEFINE_MIN_SEQ(lruvec);
			
 
				+ 
			
 
				+-	VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
			
 
				++	for (type = !can_swap; type < ANON_AND_FILE; type++) {
			
 
				++		unsigned long seq;
			
 
				+ 
			
 
				+-	mem_cgroup_calculate_protection(NULL, memcg);
			
 
				++		for (seq = min_seq[type]; seq <= max_seq; seq++) {
			
 
				++			gen = lru_gen_from_seq(seq);
			
 
				+ 
			
 
				+-	if (mem_cgroup_below_min(memcg))
			
 
				+-		return false;
			
 
				++			for (zone = 0; zone < MAX_NR_ZONES; zone++)
			
 
				++				total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
			
 
				++		}
			
 
				++	}
			
 
				+ 
			
 
				+-	need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
			
 
				++	/* whether the size is big enough to be helpful */
			
 
				++	return mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
			
 
				++}
			
 
				+ 
			
 
				+-	if (min_ttl) {
			
 
				+-		int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
			
 
				+-		unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
			
 
				++static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc,
			
 
				++				  unsigned long min_ttl)
			
 
				++{
			
 
				++	int gen;
			
 
				++	unsigned long birth;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	DEFINE_MIN_SEQ(lruvec);
			
 
				+ 
			
 
				+-		if (time_is_after_jiffies(birth + min_ttl))
			
 
				+-			return false;
			
 
				++	VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
			
 
				+ 
			
 
				+-		/* the size is likely too small to be helpful */
			
 
				+-		if (!nr_to_scan && sc->priority != DEF_PRIORITY)
			
 
				+-			return false;
			
 
				+-	}
			
 
				++	/* see the comment on lru_gen_page */
			
 
				++	gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
			
 
				++	birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
			
 
				+ 
			
 
				+-	if (need_aging)
			
 
				+-		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
			
 
				++	if (time_is_after_jiffies(birth + min_ttl))
			
 
				++		return false;
			
 
				+ 
			
 
				+-	return true;
			
 
				++	if (!lruvec_is_sizable(lruvec, sc))
			
 
				++		return false;
			
 
				++
			
 
				++	mem_cgroup_calculate_protection(NULL, memcg);
			
 
				++
			
 
				++	return !mem_cgroup_below_min(memcg);
			
 
				+ }
			
 
				+ 
			
 
				+ /* to protect the working set of the last N jiffies */
			
 
				+@@ -4283,46 +4301,32 @@ static unsigned long lru_gen_min_ttl __r
			
 
				+ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				+ {
			
 
				+ 	struct mem_cgroup *memcg;
			
 
				+-	bool success = false;
			
 
				+ 	unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(!current_is_kswapd());
			
 
				+ 
			
 
				+ 	sc->last_reclaimed = sc->nr_reclaimed;
			
 
				+ 
			
 
				+-	/*
			
 
				+-	 * To reduce the chance of going into the aging path, which can be
			
 
				+-	 * costly, optimistically skip it if the flag below was cleared in the
			
 
				+-	 * eviction path. This improves the overall performance when multiple
			
 
				+-	 * memcgs are available.
			
 
				+-	 */
			
 
				+-	if (!sc->memcgs_need_aging) {
			
 
				+-		sc->memcgs_need_aging = true;
			
 
				++	/* check the order to exclude compaction-induced reclaim */
			
 
				++	if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
			
 
				+ 		return;
			
 
				+-	}
			
 
				+-
			
 
				+-	set_mm_walk(pgdat);
			
 
				+ 
			
 
				+ 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
			
 
				+ 	do {
			
 
				+ 		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
			
 
				+ 
			
 
				+-		if (age_lruvec(lruvec, sc, min_ttl))
			
 
				+-			success = true;
			
 
				++		if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
			
 
				++			mem_cgroup_iter_break(NULL, memcg);
			
 
				++			return;
			
 
				++		}
			
 
				+ 
			
 
				+ 		cond_resched();
			
 
				+ 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
			
 
				+ 
			
 
				+-	clear_mm_walk();
			
 
				+-
			
 
				+-	/* check the order to exclude compaction-induced reclaim */
			
 
				+-	if (success || !min_ttl || sc->order)
			
 
				+-		return;
			
 
				+-
			
 
				+ 	/*
			
 
				+ 	 * The main goal is to OOM kill if every generation from all memcgs is
			
 
				+ 	 * younger than min_ttl. However, another possibility is all memcgs are
			
 
				+-	 * either below min or empty.
			
 
				++	 * either too small or below min.
			
 
				+ 	 */
			
 
				+ 	if (mutex_trylock(&oom_lock)) {
			
 
				+ 		struct oom_control oc = {
			
 
				+@@ -4830,33 +4834,27 @@ retry:
			
 
				+  *    reclaim.
			
 
				+  */
			
 
				+ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
			
 
				+-				    bool can_swap, bool *need_aging)
			
 
				++				    bool can_swap)
			
 
				+ {
			
 
				+ 	unsigned long nr_to_scan;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 	DEFINE_MAX_SEQ(lruvec);
			
 
				+-	DEFINE_MIN_SEQ(lruvec);
			
 
				+ 
			
 
				+ 	if (mem_cgroup_below_min(memcg) ||
			
 
				+ 	    (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
			
 
				+ 		return 0;
			
 
				+ 
			
 
				+-	*need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
			
 
				+-	if (!*need_aging)
			
 
				++	if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
			
 
				+ 		return nr_to_scan;
			
 
				+ 
			
 
				+ 	/* skip the aging path at the default priority */
			
 
				+ 	if (sc->priority == DEF_PRIORITY)
			
 
				+-		goto done;
			
 
				++		return nr_to_scan;
			
 
				+ 
			
 
				+-	/* leave the work to lru_gen_age_node() */
			
 
				+-	if (current_is_kswapd())
			
 
				+-		return 0;
			
 
				++	try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
			
 
				+ 
			
 
				+-	if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
			
 
				+-		return nr_to_scan;
			
 
				+-done:
			
 
				+-	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
			
 
				++	/* skip this lruvec as it's low on cold pages */
			
 
				++	return 0;
			
 
				+ }
			
 
				+ 
			
 
				+ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
			
 
				+@@ -4875,9 +4873,7 @@ static unsigned long get_nr_to_reclaim(s
			
 
				+ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				+ {
			
 
				+ 	struct blk_plug plug;
			
 
				+-	bool need_aging = false;
			
 
				+ 	unsigned long scanned = 0;
			
 
				+-	unsigned long reclaimed = sc->nr_reclaimed;
			
 
				+ 	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
			
 
				+ 
			
 
				+ 	lru_add_drain();
			
 
				+@@ -4898,13 +4894,13 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 		else
			
 
				+ 			swappiness = 0;
			
 
				+ 
			
 
				+-		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
			
 
				++		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
			
 
				+ 		if (!nr_to_scan)
			
 
				+-			goto done;
			
 
				++			break;
			
 
				+ 
			
 
				+ 		delta = evict_pages(lruvec, sc, swappiness);
			
 
				+ 		if (!delta)
			
 
				+-			goto done;
			
 
				++			break;
			
 
				+ 
			
 
				+ 		scanned += delta;
			
 
				+ 		if (scanned >= nr_to_scan)
			
 
				+@@ -4916,10 +4912,6 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 		cond_resched();
			
 
				+ 	}
			
 
				+ 
			
 
				+-	/* see the comment in lru_gen_age_node() */
			
 
				+-	if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
			
 
				+-		sc->memcgs_need_aging = false;
			
 
				+-done:
			
 
				+ 	clear_mm_walk();
			
 
				+ 
			
 
				+ 	blk_finish_plug(&plug);
			
--- a/target/linux/generic/backport-6.1/020-v6.3-25-mm-multi-gen-LRU-shuffle-should_run_aging.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-25-mm-multi-gen-LRU-shuffle-should_run_aging.patch
@@ -0,0 +1,161 @@
 
				+From 107d54931df3c28d81648122e219bf0034ef4e99 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Wed, 21 Dec 2022 21:19:03 -0700
			
 
				+Subject: [PATCH 25/29] mm: multi-gen LRU: shuffle should_run_aging()
			
 
				+
			
 
				+Move should_run_aging() next to its only caller left.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Roman Gushchin <[email protected]>
			
 
				+Cc: Suren Baghdasaryan <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 124 ++++++++++++++++++++++++++--------------------------
			
 
				+ 1 file changed, 62 insertions(+), 62 deletions(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -4183,68 +4183,6 @@ done:
			
 
				+ 	return true;
			
 
				+ }
			
 
				+ 
			
 
				+-static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
			
 
				+-			     struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
			
 
				+-{
			
 
				+-	int gen, type, zone;
			
 
				+-	unsigned long old = 0;
			
 
				+-	unsigned long young = 0;
			
 
				+-	unsigned long total = 0;
			
 
				+-	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				+-	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+-	DEFINE_MIN_SEQ(lruvec);
			
 
				+-
			
 
				+-	/* whether this lruvec is completely out of cold pages */
			
 
				+-	if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
			
 
				+-		*nr_to_scan = 0;
			
 
				+-		return true;
			
 
				+-	}
			
 
				+-
			
 
				+-	for (type = !can_swap; type < ANON_AND_FILE; type++) {
			
 
				+-		unsigned long seq;
			
 
				+-
			
 
				+-		for (seq = min_seq[type]; seq <= max_seq; seq++) {
			
 
				+-			unsigned long size = 0;
			
 
				+-
			
 
				+-			gen = lru_gen_from_seq(seq);
			
 
				+-
			
 
				+-			for (zone = 0; zone < MAX_NR_ZONES; zone++)
			
 
				+-				size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
			
 
				+-
			
 
				+-			total += size;
			
 
				+-			if (seq == max_seq)
			
 
				+-				young += size;
			
 
				+-			else if (seq + MIN_NR_GENS == max_seq)
			
 
				+-				old += size;
			
 
				+-		}
			
 
				+-	}
			
 
				+-
			
 
				+-	/* try to scrape all its memory if this memcg was deleted */
			
 
				+-	*nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
			
 
				+-
			
 
				+-	/*
			
 
				+-	 * The aging tries to be lazy to reduce the overhead, while the eviction
			
 
				+-	 * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
			
 
				+-	 * ideal number of generations is MIN_NR_GENS+1.
			
 
				+-	 */
			
 
				+-	if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
			
 
				+-		return false;
			
 
				+-
			
 
				+-	/*
			
 
				+-	 * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1)
			
 
				+-	 * of the total number of pages for each generation. A reasonable range
			
 
				+-	 * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The
			
 
				+-	 * aging cares about the upper bound of hot pages, while the eviction
			
 
				+-	 * cares about the lower bound of cold pages.
			
 
				+-	 */
			
 
				+-	if (young * MIN_NR_GENS > total)
			
 
				+-		return true;
			
 
				+-	if (old * (MIN_NR_GENS + 2) < total)
			
 
				+-		return true;
			
 
				+-
			
 
				+-	return false;
			
 
				+-}
			
 
				+-
			
 
				+ static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
			
 
				+ {
			
 
				+ 	int gen, type, zone;
			
 
				+@@ -4828,6 +4766,68 @@ retry:
			
 
				+ 	return scanned;
			
 
				+ }
			
 
				+ 
			
 
				++static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
			
 
				++			     struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
			
 
				++{
			
 
				++	int gen, type, zone;
			
 
				++	unsigned long old = 0;
			
 
				++	unsigned long young = 0;
			
 
				++	unsigned long total = 0;
			
 
				++	struct lru_gen_page *lrugen = &lruvec->lrugen;
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	DEFINE_MIN_SEQ(lruvec);
			
 
				++
			
 
				++	/* whether this lruvec is completely out of cold pages */
			
 
				++	if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
			
 
				++		*nr_to_scan = 0;
			
 
				++		return true;
			
 
				++	}
			
 
				++
			
 
				++	for (type = !can_swap; type < ANON_AND_FILE; type++) {
			
 
				++		unsigned long seq;
			
 
				++
			
 
				++		for (seq = min_seq[type]; seq <= max_seq; seq++) {
			
 
				++			unsigned long size = 0;
			
 
				++
			
 
				++			gen = lru_gen_from_seq(seq);
			
 
				++
			
 
				++			for (zone = 0; zone < MAX_NR_ZONES; zone++)
			
 
				++				size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
			
 
				++
			
 
				++			total += size;
			
 
				++			if (seq == max_seq)
			
 
				++				young += size;
			
 
				++			else if (seq + MIN_NR_GENS == max_seq)
			
 
				++				old += size;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	/* try to scrape all its memory if this memcg was deleted */
			
 
				++	*nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
			
 
				++
			
 
				++	/*
			
 
				++	 * The aging tries to be lazy to reduce the overhead, while the eviction
			
 
				++	 * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
			
 
				++	 * ideal number of generations is MIN_NR_GENS+1.
			
 
				++	 */
			
 
				++	if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
			
 
				++		return false;
			
 
				++
			
 
				++	/*
			
 
				++	 * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1)
			
 
				++	 * of the total number of pages for each generation. A reasonable range
			
 
				++	 * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The
			
 
				++	 * aging cares about the upper bound of hot pages, while the eviction
			
 
				++	 * cares about the lower bound of cold pages.
			
 
				++	 */
			
 
				++	if (young * MIN_NR_GENS > total)
			
 
				++		return true;
			
 
				++	if (old * (MIN_NR_GENS + 2) < total)
			
 
				++		return true;
			
 
				++
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				+ /*
			
 
				+  * For future optimizations:
			
 
				+  * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
			
--- a/target/linux/generic/backport-6.1/020-v6.3-26-mm-multi-gen-LRU-per-node-lru_gen_page-lists.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-26-mm-multi-gen-LRU-per-node-lru_gen_page-lists.patch
@@ -0,0 +1,868 @@
 
				+From fa6363828d314e837c5f79e97ea5e8c0d2f7f062 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Wed, 21 Dec 2022 21:19:04 -0700
			
 
				+Subject: [PATCH 26/29] mm: multi-gen LRU: per-node lru_gen_page lists
			
 
				+
			
 
				+For each node, memcgs are divided into two generations: the old and
			
 
				+the young. For each generation, memcgs are randomly sharded into
			
 
				+multiple bins to improve scalability. For each bin, an RCU hlist_nulls
			
 
				+is virtually divided into three segments: the head, the tail and the
			
 
				+default.
			
 
				+
			
 
				+An onlining memcg is added to the tail of a random bin in the old
			
 
				+generation. The eviction starts at the head of a random bin in the old
			
 
				+generation. The per-node memcg generation counter, whose reminder (mod
			
 
				+2) indexes the old generation, is incremented when all its bins become
			
 
				+empty.
			
 
				+
			
 
				+There are four operations:
			
 
				+1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in
			
 
				+   its current generation (old or young) and updates its "seg" to
			
 
				+   "head";
			
 
				+2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in
			
 
				+   its current generation (old or young) and updates its "seg" to
			
 
				+   "tail";
			
 
				+3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in
			
 
				+   the old generation, updates its "gen" to "old" and resets its "seg"
			
 
				+   to "default";
			
 
				+4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin
			
 
				+   in the young generation, updates its "gen" to "young" and resets
			
 
				+   its "seg" to "default".
			
 
				+
			
 
				+The events that trigger the above operations are:
			
 
				+1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD;
			
 
				+2. The first attempt to reclaim an memcg below low, which triggers
			
 
				+   MEMCG_LRU_TAIL;
			
 
				+3. The first attempt to reclaim an memcg below reclaimable size
			
 
				+   threshold, which triggers MEMCG_LRU_TAIL;
			
 
				+4. The second attempt to reclaim an memcg below reclaimable size
			
 
				+   threshold, which triggers MEMCG_LRU_YOUNG;
			
 
				+5. Attempting to reclaim an memcg below min, which triggers
			
 
				+   MEMCG_LRU_YOUNG;
			
 
				+6. Finishing the aging on the eviction path, which triggers
			
 
				+   MEMCG_LRU_YOUNG;
			
 
				+7. Offlining an memcg, which triggers MEMCG_LRU_OLD.
			
 
				+
			
 
				+Note that memcg LRU only applies to global reclaim, and the
			
 
				+round-robin incrementing of their max_seq counters ensures the
			
 
				+eventual fairness to all eligible memcgs. For memcg reclaim, it still
			
 
				+relies on mem_cgroup_iter().
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Roman Gushchin <[email protected]>
			
 
				+Cc: Suren Baghdasaryan <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ include/linux/memcontrol.h |  10 +
			
 
				+ include/linux/mm_inline.h  |  17 ++
			
 
				+ include/linux/mmzone.h     | 117 +++++++++++-
			
 
				+ mm/memcontrol.c            |  16 ++
			
 
				+ mm/page_alloc.c            |   1 +
			
 
				+ mm/vmscan.c                | 373 +++++++++++++++++++++++++++++++++----
			
 
				+ 6 files changed, 499 insertions(+), 35 deletions(-)
			
 
				+
			
 
				+--- a/include/linux/memcontrol.h
			
 
				++++ b/include/linux/memcontrol.h
			
 
				+@@ -818,6 +818,11 @@ static inline void obj_cgroup_put(struct
			
 
				+ 	percpu_ref_put(&objcg->refcnt);
			
 
				+ }
			
 
				+ 
			
 
				++static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++	return !memcg || css_tryget(&memcg->css);
			
 
				++}
			
 
				++
			
 
				+ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
			
 
				+ {
			
 
				+ 	if (memcg)
			
 
				+@@ -1283,6 +1288,11 @@ struct mem_cgroup *mem_cgroup_from_css(s
			
 
				+ 	return NULL;
			
 
				+ }
			
 
				+ 
			
 
				++static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				+ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
			
 
				+ {
			
 
				+ }
			
 
				+--- a/include/linux/mm_inline.h
			
 
				++++ b/include/linux/mm_inline.h
			
 
				+@@ -112,6 +112,18 @@ static inline bool lru_gen_in_fault(void
			
 
				+ 	return current->in_lru_fault;
			
 
				+ }
			
 
				+ 
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
			
 
				++{
			
 
				++	return READ_ONCE(lruvec->lrugen.seg);
			
 
				++}
			
 
				++#else
			
 
				++static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
			
 
				++{
			
 
				++	return 0;
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				+ static inline int lru_gen_from_seq(unsigned long seq)
			
 
				+ {
			
 
				+ 	return seq % MAX_NR_GENS;
			
 
				+@@ -287,6 +299,11 @@ static inline bool lru_gen_in_fault(void
			
 
				+ 	return false;
			
 
				+ }
			
 
				+ 
			
 
				++static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
			
 
				++{
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				+ static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
			
 
				+ {
			
 
				+ 	return false;
			
 
				+--- a/include/linux/mmzone.h
			
 
				++++ b/include/linux/mmzone.h
			
 
				+@@ -7,6 +7,7 @@
			
 
				+ 
			
 
				+ #include <linux/spinlock.h>
			
 
				+ #include <linux/list.h>
			
 
				++#include <linux/list_nulls.h>
			
 
				+ #include <linux/wait.h>
			
 
				+ #include <linux/bitops.h>
			
 
				+ #include <linux/cache.h>
			
 
				+@@ -357,6 +358,15 @@ struct page_vma_mapped_walk;
			
 
				+ #define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
			
 
				+ #define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
			
 
				+ 
			
 
				++/* see the comment on MEMCG_NR_GENS */
			
 
				++enum {
			
 
				++	MEMCG_LRU_NOP,
			
 
				++	MEMCG_LRU_HEAD,
			
 
				++	MEMCG_LRU_TAIL,
			
 
				++	MEMCG_LRU_OLD,
			
 
				++	MEMCG_LRU_YOUNG,
			
 
				++};
			
 
				++
			
 
				+ #ifdef CONFIG_LRU_GEN
			
 
				+ 
			
 
				+ enum {
			
 
				+@@ -416,6 +426,14 @@ struct lru_gen_page {
			
 
				+ 	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
			
 
				+ 	/* whether the multi-gen LRU is enabled */
			
 
				+ 	bool enabled;
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++	/* the memcg generation this lru_gen_page belongs to */
			
 
				++	u8 gen;
			
 
				++	/* the list segment this lru_gen_page belongs to */
			
 
				++	u8 seg;
			
 
				++	/* per-node lru_gen_page list for global reclaim */
			
 
				++	struct hlist_nulls_node list;
			
 
				++#endif
			
 
				+ };
			
 
				+ 
			
 
				+ enum {
			
 
				+@@ -469,12 +487,87 @@ void lru_gen_init_lruvec(struct lruvec *
			
 
				+ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
			
 
				+ 
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				++
			
 
				++/*
			
 
				++ * For each node, memcgs are divided into two generations: the old and the
			
 
				++ * young. For each generation, memcgs are randomly sharded into multiple bins
			
 
				++ * to improve scalability. For each bin, the hlist_nulls is virtually divided
			
 
				++ * into three segments: the head, the tail and the default.
			
 
				++ *
			
 
				++ * An onlining memcg is added to the tail of a random bin in the old generation.
			
 
				++ * The eviction starts at the head of a random bin in the old generation. The
			
 
				++ * per-node memcg generation counter, whose reminder (mod MEMCG_NR_GENS) indexes
			
 
				++ * the old generation, is incremented when all its bins become empty.
			
 
				++ *
			
 
				++ * There are four operations:
			
 
				++ * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its
			
 
				++ *    current generation (old or young) and updates its "seg" to "head";
			
 
				++ * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its
			
 
				++ *    current generation (old or young) and updates its "seg" to "tail";
			
 
				++ * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old
			
 
				++ *    generation, updates its "gen" to "old" and resets its "seg" to "default";
			
 
				++ * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the
			
 
				++ *    young generation, updates its "gen" to "young" and resets its "seg" to
			
 
				++ *    "default".
			
 
				++ *
			
 
				++ * The events that trigger the above operations are:
			
 
				++ * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD;
			
 
				++ * 2. The first attempt to reclaim an memcg below low, which triggers
			
 
				++ *    MEMCG_LRU_TAIL;
			
 
				++ * 3. The first attempt to reclaim an memcg below reclaimable size threshold,
			
 
				++ *    which triggers MEMCG_LRU_TAIL;
			
 
				++ * 4. The second attempt to reclaim an memcg below reclaimable size threshold,
			
 
				++ *    which triggers MEMCG_LRU_YOUNG;
			
 
				++ * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG;
			
 
				++ * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG;
			
 
				++ * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD.
			
 
				++ *
			
 
				++ * Note that memcg LRU only applies to global reclaim, and the round-robin
			
 
				++ * incrementing of their max_seq counters ensures the eventual fairness to all
			
 
				++ * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter().
			
 
				++ */
			
 
				++#define MEMCG_NR_GENS	2
			
 
				++#define MEMCG_NR_BINS	8
			
 
				++
			
 
				++struct lru_gen_memcg {
			
 
				++	/* the per-node memcg generation counter */
			
 
				++	unsigned long seq;
			
 
				++	/* each memcg has one lru_gen_page per node */
			
 
				++	unsigned long nr_memcgs[MEMCG_NR_GENS];
			
 
				++	/* per-node lru_gen_page list for global reclaim */
			
 
				++	struct hlist_nulls_head	fifo[MEMCG_NR_GENS][MEMCG_NR_BINS];
			
 
				++	/* protects the above */
			
 
				++	spinlock_t lock;
			
 
				++};
			
 
				++
			
 
				++void lru_gen_init_pgdat(struct pglist_data *pgdat);
			
 
				++
			
 
				+ void lru_gen_init_memcg(struct mem_cgroup *memcg);
			
 
				+ void lru_gen_exit_memcg(struct mem_cgroup *memcg);
			
 
				+-#endif
			
 
				++void lru_gen_online_memcg(struct mem_cgroup *memcg);
			
 
				++void lru_gen_offline_memcg(struct mem_cgroup *memcg);
			
 
				++void lru_gen_release_memcg(struct mem_cgroup *memcg);
			
 
				++void lru_gen_rotate_memcg(struct lruvec *lruvec, int op);
			
 
				++
			
 
				++#else /* !CONFIG_MEMCG */
			
 
				++
			
 
				++#define MEMCG_NR_GENS	1
			
 
				++
			
 
				++struct lru_gen_memcg {
			
 
				++};
			
 
				++
			
 
				++static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++#endif /* CONFIG_MEMCG */
			
 
				+ 
			
 
				+ #else /* !CONFIG_LRU_GEN */
			
 
				+ 
			
 
				++static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				+ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
			
 
				+ {
			
 
				+ }
			
 
				+@@ -484,6 +577,7 @@ static inline void lru_gen_look_around(s
			
 
				+ }
			
 
				+ 
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				++
			
 
				+ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
			
 
				+ {
			
 
				+ }
			
 
				+@@ -491,7 +585,24 @@ static inline void lru_gen_init_memcg(st
			
 
				+ static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
			
 
				+ {
			
 
				+ }
			
 
				+-#endif
			
 
				++
			
 
				++static inline void lru_gen_online_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++static inline void lru_gen_offline_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++static inline void lru_gen_release_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				++#endif /* CONFIG_MEMCG */
			
 
				+ 
			
 
				+ #endif /* CONFIG_LRU_GEN */
			
 
				+ 
			
 
				+@@ -1105,6 +1216,8 @@ typedef struct pglist_data {
			
 
				+ #ifdef CONFIG_LRU_GEN
			
 
				+ 	/* kswap mm walk data */
			
 
				+ 	struct lru_gen_mm_walk	mm_walk;
			
 
				++	/* lru_gen_page list */
			
 
				++	struct lru_gen_memcg memcg_lru;
			
 
				+ #endif
			
 
				+ 
			
 
				+ 	ZONE_PADDING(_pad2_)
			
 
				+--- a/mm/memcontrol.c
			
 
				++++ b/mm/memcontrol.c
			
 
				+@@ -549,6 +549,16 @@ static void mem_cgroup_update_tree(struc
			
 
				+ 	struct mem_cgroup_per_node *mz;
			
 
				+ 	struct mem_cgroup_tree_per_node *mctz;
			
 
				+ 
			
 
				++	if (lru_gen_enabled()) {
			
 
				++		struct lruvec *lruvec = &mem_cgroup_page_nodeinfo(memcg, page)->lruvec;
			
 
				++
			
 
				++		/* see the comment on MEMCG_NR_GENS */
			
 
				++		if (soft_limit_excess(memcg) && lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
			
 
				++			lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
			
 
				++
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				+ 	mctz = soft_limit_tree_from_page(page);
			
 
				+ 	if (!mctz)
			
 
				+ 		return;
			
 
				+@@ -3433,6 +3443,9 @@ unsigned long mem_cgroup_soft_limit_recl
			
 
				+ 	unsigned long excess;
			
 
				+ 	unsigned long nr_scanned;
			
 
				+ 
			
 
				++	if (lru_gen_enabled())
			
 
				++		return 0;
			
 
				++
			
 
				+ 	if (order > 0)
			
 
				+ 		return 0;
			
 
				+ 
			
 
				+@@ -5321,6 +5334,7 @@ static int mem_cgroup_css_online(struct
			
 
				+ 	if (unlikely(mem_cgroup_is_root(memcg)))
			
 
				+ 		queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
			
 
				+ 				   2UL*HZ);
			
 
				++	lru_gen_online_memcg(memcg);
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+ 
			
 
				+@@ -5347,6 +5361,7 @@ static void mem_cgroup_css_offline(struc
			
 
				+ 	memcg_offline_kmem(memcg);
			
 
				+ 	reparent_shrinker_deferred(memcg);
			
 
				+ 	wb_memcg_offline(memcg);
			
 
				++	lru_gen_offline_memcg(memcg);
			
 
				+ 
			
 
				+ 	drain_all_stock(memcg);
			
 
				+ 
			
 
				+@@ -5358,6 +5373,7 @@ static void mem_cgroup_css_released(stru
			
 
				+ 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
			
 
				+ 
			
 
				+ 	invalidate_reclaim_iterators(memcg);
			
 
				++	lru_gen_release_memcg(memcg);
			
 
				+ }
			
 
				+ 
			
 
				+ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
			
 
				+--- a/mm/page_alloc.c
			
 
				++++ b/mm/page_alloc.c
			
 
				+@@ -7661,6 +7661,7 @@ static void __init free_area_init_node(i
			
 
				+ 	pgdat_set_deferred_range(pgdat);
			
 
				+ 
			
 
				+ 	free_area_init_core(pgdat);
			
 
				++	lru_gen_init_pgdat(pgdat);
			
 
				+ }
			
 
				+ 
			
 
				+ void __init free_area_init_memoryless_node(int nid)
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -54,6 +54,8 @@
			
 
				+ #include <linux/shmem_fs.h>
			
 
				+ #include <linux/ctype.h>
			
 
				+ #include <linux/debugfs.h>
			
 
				++#include <linux/rculist_nulls.h>
			
 
				++#include <linux/random.h>
			
 
				+ 
			
 
				+ #include <asm/tlbflush.h>
			
 
				+ #include <asm/div64.h>
			
 
				+@@ -129,11 +131,6 @@ struct scan_control {
			
 
				+ 	/* Always discard instead of demoting to lower tier memory */
			
 
				+ 	unsigned int no_demotion:1;
			
 
				+ 
			
 
				+-#ifdef CONFIG_LRU_GEN
			
 
				+-	/* help kswapd make better choices among multiple memcgs */
			
 
				+-	unsigned long last_reclaimed;
			
 
				+-#endif
			
 
				+-
			
 
				+ 	/* Allocation order */
			
 
				+ 	s8 order;
			
 
				+ 
			
 
				+@@ -2880,6 +2877,9 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_ca
			
 
				+ 		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
			
 
				+ 			for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
			
 
				+ 
			
 
				++#define get_memcg_gen(seq)	((seq) % MEMCG_NR_GENS)
			
 
				++#define get_memcg_bin(bin)	((bin) % MEMCG_NR_BINS)
			
 
				++
			
 
				+ static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
			
 
				+ {
			
 
				+ 	struct pglist_data *pgdat = NODE_DATA(nid);
			
 
				+@@ -4169,8 +4169,7 @@ done:
			
 
				+ 		if (sc->priority <= DEF_PRIORITY - 2)
			
 
				+ 			wait_event_killable(lruvec->mm_state.wait,
			
 
				+ 					    max_seq < READ_ONCE(lrugen->max_seq));
			
 
				+-
			
 
				+-		return max_seq < READ_ONCE(lrugen->max_seq);
			
 
				++		return false;
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
			
 
				+@@ -4243,8 +4242,6 @@ static void lru_gen_age_node(struct pgli
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(!current_is_kswapd());
			
 
				+ 
			
 
				+-	sc->last_reclaimed = sc->nr_reclaimed;
			
 
				+-
			
 
				+ 	/* check the order to exclude compaction-induced reclaim */
			
 
				+ 	if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
			
 
				+ 		return;
			
 
				+@@ -4833,8 +4830,7 @@ static bool should_run_aging(struct lruv
			
 
				+  * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
			
 
				+  *    reclaim.
			
 
				+  */
			
 
				+-static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
			
 
				+-				    bool can_swap)
			
 
				++static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap)
			
 
				+ {
			
 
				+ 	unsigned long nr_to_scan;
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+@@ -4851,10 +4847,8 @@ static unsigned long get_nr_to_scan(stru
			
 
				+ 	if (sc->priority == DEF_PRIORITY)
			
 
				+ 		return nr_to_scan;
			
 
				+ 
			
 
				+-	try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
			
 
				+-
			
 
				+ 	/* skip this lruvec as it's low on cold pages */
			
 
				+-	return 0;
			
 
				++	return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
			
 
				+ }
			
 
				+ 
			
 
				+ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
			
 
				+@@ -4863,29 +4857,18 @@ static unsigned long get_nr_to_reclaim(s
			
 
				+ 	if (!global_reclaim(sc))
			
 
				+ 		return -1;
			
 
				+ 
			
 
				+-	/* discount the previous progress for kswapd */
			
 
				+-	if (current_is_kswapd())
			
 
				+-		return sc->nr_to_reclaim + sc->last_reclaimed;
			
 
				+-
			
 
				+ 	return max(sc->nr_to_reclaim, compact_gap(sc->order));
			
 
				+ }
			
 
				+ 
			
 
				+-static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				++static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				+ {
			
 
				+-	struct blk_plug plug;
			
 
				++	long nr_to_scan;
			
 
				+ 	unsigned long scanned = 0;
			
 
				+ 	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
			
 
				+ 
			
 
				+-	lru_add_drain();
			
 
				+-
			
 
				+-	blk_start_plug(&plug);
			
 
				+-
			
 
				+-	set_mm_walk(lruvec_pgdat(lruvec));
			
 
				+-
			
 
				+ 	while (true) {
			
 
				+ 		int delta;
			
 
				+ 		int swappiness;
			
 
				+-		unsigned long nr_to_scan;
			
 
				+ 
			
 
				+ 		if (sc->may_swap)
			
 
				+ 			swappiness = get_swappiness(lruvec, sc);
			
 
				+@@ -4895,7 +4878,7 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 			swappiness = 0;
			
 
				+ 
			
 
				+ 		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
			
 
				+-		if (!nr_to_scan)
			
 
				++		if (nr_to_scan <= 0)
			
 
				+ 			break;
			
 
				+ 
			
 
				+ 		delta = evict_pages(lruvec, sc, swappiness);
			
 
				+@@ -4912,10 +4895,250 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 		cond_resched();
			
 
				+ 	}
			
 
				+ 
			
 
				++	/* whether try_to_inc_max_seq() was successful */
			
 
				++	return nr_to_scan < 0;
			
 
				++}
			
 
				++
			
 
				++static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
			
 
				++{
			
 
				++	bool success;
			
 
				++	unsigned long scanned = sc->nr_scanned;
			
 
				++	unsigned long reclaimed = sc->nr_reclaimed;
			
 
				++	int seg = lru_gen_memcg_seg(lruvec);
			
 
				++	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				++	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
			
 
				++
			
 
				++	/* see the comment on MEMCG_NR_GENS */
			
 
				++	if (!lruvec_is_sizable(lruvec, sc))
			
 
				++		return seg != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG;
			
 
				++
			
 
				++	mem_cgroup_calculate_protection(NULL, memcg);
			
 
				++
			
 
				++	if (mem_cgroup_below_min(memcg))
			
 
				++		return MEMCG_LRU_YOUNG;
			
 
				++
			
 
				++	if (mem_cgroup_below_low(memcg)) {
			
 
				++		/* see the comment on MEMCG_NR_GENS */
			
 
				++		if (seg != MEMCG_LRU_TAIL)
			
 
				++			return MEMCG_LRU_TAIL;
			
 
				++
			
 
				++		memcg_memory_event(memcg, MEMCG_LOW);
			
 
				++	}
			
 
				++
			
 
				++	success = try_to_shrink_lruvec(lruvec, sc);
			
 
				++
			
 
				++	shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
			
 
				++
			
 
				++	vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
			
 
				++		   sc->nr_reclaimed - reclaimed);
			
 
				++
			
 
				++	sc->nr_reclaimed += current->reclaim_state->reclaimed_slab;
			
 
				++	current->reclaim_state->reclaimed_slab = 0;
			
 
				++
			
 
				++	return success ? MEMCG_LRU_YOUNG : 0;
			
 
				++}
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++
			
 
				++static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				++{
			
 
				++	int gen;
			
 
				++	int bin;
			
 
				++	int first_bin;
			
 
				++	struct lruvec *lruvec;
			
 
				++	struct lru_gen_page *lrugen;
			
 
				++	const struct hlist_nulls_node *pos;
			
 
				++	int op = 0;
			
 
				++	struct mem_cgroup *memcg = NULL;
			
 
				++	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
			
 
				++
			
 
				++	bin = first_bin = prandom_u32_max(MEMCG_NR_BINS);
			
 
				++restart:
			
 
				++	gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
			
 
				++
			
 
				++	rcu_read_lock();
			
 
				++
			
 
				++	hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
			
 
				++		if (op)
			
 
				++			lru_gen_rotate_memcg(lruvec, op);
			
 
				++
			
 
				++		mem_cgroup_put(memcg);
			
 
				++
			
 
				++		lruvec = container_of(lrugen, struct lruvec, lrugen);
			
 
				++		memcg = lruvec_memcg(lruvec);
			
 
				++
			
 
				++		if (!mem_cgroup_tryget(memcg)) {
			
 
				++			op = 0;
			
 
				++			memcg = NULL;
			
 
				++			continue;
			
 
				++		}
			
 
				++
			
 
				++		rcu_read_unlock();
			
 
				++
			
 
				++		op = shrink_one(lruvec, sc);
			
 
				++
			
 
				++		if (sc->nr_reclaimed >= nr_to_reclaim)
			
 
				++			goto success;
			
 
				++
			
 
				++		rcu_read_lock();
			
 
				++	}
			
 
				++
			
 
				++	rcu_read_unlock();
			
 
				++
			
 
				++	/* restart if raced with lru_gen_rotate_memcg() */
			
 
				++	if (gen != get_nulls_value(pos))
			
 
				++		goto restart;
			
 
				++
			
 
				++	/* try the rest of the bins of the current generation */
			
 
				++	bin = get_memcg_bin(bin + 1);
			
 
				++	if (bin != first_bin)
			
 
				++		goto restart;
			
 
				++success:
			
 
				++	if (op)
			
 
				++		lru_gen_rotate_memcg(lruvec, op);
			
 
				++
			
 
				++	mem_cgroup_put(memcg);
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				++{
			
 
				++	struct blk_plug plug;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(global_reclaim(sc));
			
 
				++
			
 
				++	lru_add_drain();
			
 
				++
			
 
				++	blk_start_plug(&plug);
			
 
				++
			
 
				++	set_mm_walk(lruvec_pgdat(lruvec));
			
 
				++
			
 
				++	if (try_to_shrink_lruvec(lruvec, sc))
			
 
				++		lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
			
 
				++
			
 
				++	clear_mm_walk();
			
 
				++
			
 
				++	blk_finish_plug(&plug);
			
 
				++}
			
 
				++
			
 
				++#else /* !CONFIG_MEMCG */
			
 
				++
			
 
				++static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				++{
			
 
				++	BUILD_BUG();
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				++{
			
 
				++	BUILD_BUG();
			
 
				++}
			
 
				++
			
 
				++#endif
			
 
				++
			
 
				++static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				++{
			
 
				++	int priority;
			
 
				++	unsigned long reclaimable;
			
 
				++	struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat);
			
 
				++
			
 
				++	if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
			
 
				++		return;
			
 
				++	/*
			
 
				++	 * Determine the initial priority based on ((total / MEMCG_NR_GENS) >>
			
 
				++	 * priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, where the
			
 
				++	 * estimated reclaimed_to_scanned_ratio = inactive / total.
			
 
				++	 */
			
 
				++	reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
			
 
				++	if (get_swappiness(lruvec, sc))
			
 
				++		reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
			
 
				++
			
 
				++	reclaimable /= MEMCG_NR_GENS;
			
 
				++
			
 
				++	/* round down reclaimable and round up sc->nr_to_reclaim */
			
 
				++	priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
			
 
				++
			
 
				++	sc->priority = clamp(priority, 0, DEF_PRIORITY);
			
 
				++}
			
 
				++
			
 
				++static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				++{
			
 
				++	struct blk_plug plug;
			
 
				++	unsigned long reclaimed = sc->nr_reclaimed;
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(!global_reclaim(sc));
			
 
				++
			
 
				++	lru_add_drain();
			
 
				++
			
 
				++	blk_start_plug(&plug);
			
 
				++
			
 
				++	set_mm_walk(pgdat);
			
 
				++
			
 
				++	set_initial_priority(pgdat, sc);
			
 
				++
			
 
				++	if (current_is_kswapd())
			
 
				++		sc->nr_reclaimed = 0;
			
 
				++
			
 
				++	if (mem_cgroup_disabled())
			
 
				++		shrink_one(&pgdat->__lruvec, sc);
			
 
				++	else
			
 
				++		shrink_many(pgdat, sc);
			
 
				++
			
 
				++	if (current_is_kswapd())
			
 
				++		sc->nr_reclaimed += reclaimed;
			
 
				++
			
 
				+ 	clear_mm_walk();
			
 
				+ 
			
 
				+ 	blk_finish_plug(&plug);
			
 
				++
			
 
				++	/* kswapd should never fail */
			
 
				++	pgdat->kswapd_failures = 0;
			
 
				++}
			
 
				++
			
 
				++#ifdef CONFIG_MEMCG
			
 
				++void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
			
 
				++{
			
 
				++	int seg;
			
 
				++	int old, new;
			
 
				++	int bin = prandom_u32_max(MEMCG_NR_BINS);
			
 
				++	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
			
 
				++
			
 
				++	spin_lock(&pgdat->memcg_lru.lock);
			
 
				++
			
 
				++	VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
			
 
				++
			
 
				++	seg = 0;
			
 
				++	new = old = lruvec->lrugen.gen;
			
 
				++
			
 
				++	/* see the comment on MEMCG_NR_GENS */
			
 
				++	if (op == MEMCG_LRU_HEAD)
			
 
				++		seg = MEMCG_LRU_HEAD;
			
 
				++	else if (op == MEMCG_LRU_TAIL)
			
 
				++		seg = MEMCG_LRU_TAIL;
			
 
				++	else if (op == MEMCG_LRU_OLD)
			
 
				++		new = get_memcg_gen(pgdat->memcg_lru.seq);
			
 
				++	else if (op == MEMCG_LRU_YOUNG)
			
 
				++		new = get_memcg_gen(pgdat->memcg_lru.seq + 1);
			
 
				++	else
			
 
				++		VM_WARN_ON_ONCE(true);
			
 
				++
			
 
				++	hlist_nulls_del_rcu(&lruvec->lrugen.list);
			
 
				++
			
 
				++	if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
			
 
				++		hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
			
 
				++	else
			
 
				++		hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
			
 
				++
			
 
				++	pgdat->memcg_lru.nr_memcgs[old]--;
			
 
				++	pgdat->memcg_lru.nr_memcgs[new]++;
			
 
				++
			
 
				++	lruvec->lrugen.gen = new;
			
 
				++	WRITE_ONCE(lruvec->lrugen.seg, seg);
			
 
				++
			
 
				++	if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq))
			
 
				++		WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
			
 
				++
			
 
				++	spin_unlock(&pgdat->memcg_lru.lock);
			
 
				+ }
			
 
				++#endif
			
 
				+ 
			
 
				+ /******************************************************************************
			
 
				+  *                          state change
			
 
				+@@ -5370,11 +5593,11 @@ static int run_cmd(char cmd, int memcg_i
			
 
				+ 
			
 
				+ 	if (!mem_cgroup_disabled()) {
			
 
				+ 		rcu_read_lock();
			
 
				++
			
 
				+ 		memcg = mem_cgroup_from_id(memcg_id);
			
 
				+-#ifdef CONFIG_MEMCG
			
 
				+-		if (memcg && !css_tryget(&memcg->css))
			
 
				++		if (!mem_cgroup_tryget(memcg))
			
 
				+ 			memcg = NULL;
			
 
				+-#endif
			
 
				++
			
 
				+ 		rcu_read_unlock();
			
 
				+ 
			
 
				+ 		if (!memcg)
			
 
				+@@ -5521,6 +5744,19 @@ void lru_gen_init_lruvec(struct lruvec *
			
 
				+ }
			
 
				+ 
			
 
				+ #ifdef CONFIG_MEMCG
			
 
				++
			
 
				++void lru_gen_init_pgdat(struct pglist_data *pgdat)
			
 
				++{
			
 
				++	int i, j;
			
 
				++
			
 
				++	spin_lock_init(&pgdat->memcg_lru.lock);
			
 
				++
			
 
				++	for (i = 0; i < MEMCG_NR_GENS; i++) {
			
 
				++		for (j = 0; j < MEMCG_NR_BINS; j++)
			
 
				++			INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				+ void lru_gen_init_memcg(struct mem_cgroup *memcg)
			
 
				+ {
			
 
				+ 	INIT_LIST_HEAD(&memcg->mm_list.fifo);
			
 
				+@@ -5544,7 +5780,69 @@ void lru_gen_exit_memcg(struct mem_cgrou
			
 
				+ 		}
			
 
				+ 	}
			
 
				+ }
			
 
				+-#endif
			
 
				++
			
 
				++void lru_gen_online_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++	int gen;
			
 
				++	int nid;
			
 
				++	int bin = prandom_u32_max(MEMCG_NR_BINS);
			
 
				++
			
 
				++	for_each_node(nid) {
			
 
				++		struct pglist_data *pgdat = NODE_DATA(nid);
			
 
				++		struct lruvec *lruvec = get_lruvec(memcg, nid);
			
 
				++
			
 
				++		spin_lock(&pgdat->memcg_lru.lock);
			
 
				++
			
 
				++		VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list));
			
 
				++
			
 
				++		gen = get_memcg_gen(pgdat->memcg_lru.seq);
			
 
				++
			
 
				++		hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
			
 
				++		pgdat->memcg_lru.nr_memcgs[gen]++;
			
 
				++
			
 
				++		lruvec->lrugen.gen = gen;
			
 
				++
			
 
				++		spin_unlock(&pgdat->memcg_lru.lock);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++void lru_gen_offline_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++	int nid;
			
 
				++
			
 
				++	for_each_node(nid) {
			
 
				++		struct lruvec *lruvec = get_lruvec(memcg, nid);
			
 
				++
			
 
				++		lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++void lru_gen_release_memcg(struct mem_cgroup *memcg)
			
 
				++{
			
 
				++	int gen;
			
 
				++	int nid;
			
 
				++
			
 
				++	for_each_node(nid) {
			
 
				++		struct pglist_data *pgdat = NODE_DATA(nid);
			
 
				++		struct lruvec *lruvec = get_lruvec(memcg, nid);
			
 
				++
			
 
				++		spin_lock(&pgdat->memcg_lru.lock);
			
 
				++
			
 
				++		VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
			
 
				++
			
 
				++		gen = lruvec->lrugen.gen;
			
 
				++
			
 
				++		hlist_nulls_del_rcu(&lruvec->lrugen.list);
			
 
				++		pgdat->memcg_lru.nr_memcgs[gen]--;
			
 
				++
			
 
				++		if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
			
 
				++			WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
			
 
				++
			
 
				++		spin_unlock(&pgdat->memcg_lru.lock);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++#endif /* CONFIG_MEMCG */
			
 
				+ 
			
 
				+ static int __init init_lru_gen(void)
			
 
				+ {
			
 
				+@@ -5571,6 +5869,10 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ {
			
 
				+ }
			
 
				+ 
			
 
				++static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				++{
			
 
				++}
			
 
				++
			
 
				+ #endif /* CONFIG_LRU_GEN */
			
 
				+ 
			
 
				+ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
 
				+@@ -5584,7 +5886,7 @@ static void shrink_lruvec(struct lruvec
			
 
				+ 	bool proportional_reclaim;
			
 
				+ 	struct blk_plug plug;
			
 
				+ 
			
 
				+-	if (lru_gen_enabled()) {
			
 
				++	if (lru_gen_enabled() && !global_reclaim(sc)) {
			
 
				+ 		lru_gen_shrink_lruvec(lruvec, sc);
			
 
				+ 		return;
			
 
				+ 	}
			
 
				+@@ -5826,6 +6128,11 @@ static void shrink_node(pg_data_t *pgdat
			
 
				+ 	struct lruvec *target_lruvec;
			
 
				+ 	bool reclaimable = false;
			
 
				+ 
			
 
				++	if (lru_gen_enabled() && global_reclaim(sc)) {
			
 
				++		lru_gen_shrink_node(pgdat, sc);
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				+ 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
			
 
				+ 
			
 
				+ again:
			
--- a/target/linux/generic/backport-6.1/020-v6.3-27-mm-multi-gen-LRU-clarify-scan_control-flags.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-27-mm-multi-gen-LRU-clarify-scan_control-flags.patch
@@ -0,0 +1,196 @@
 
				+From 93147736b5b3a21bea24313bfc7a696829932009 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Wed, 21 Dec 2022 21:19:05 -0700
			
 
				+Subject: [PATCH 27/29] mm: multi-gen LRU: clarify scan_control flags
			
 
				+
			
 
				+Among the flags in scan_control:
			
 
				+1. sc->may_swap, which indicates swap constraint due to memsw.max, is
			
 
				+   supported as usual.
			
 
				+2. sc->proactive, which indicates reclaim by memory.reclaim, may not
			
 
				+   opportunistically skip the aging path, since it is considered less
			
 
				+   latency sensitive.
			
 
				+3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
			
 
				+   swappiness to prioritize file LRU, since clean file pages are more
			
 
				+   likely to exist.
			
 
				+4. sc->may_writepage and sc->may_unmap, which indicates opportunistic
			
 
				+   reclaim, are rejected, since unmapped clean pages are already
			
 
				+   prioritized. Scanning for more of them is likely futile and can
			
 
				+   cause high reclaim latency when there is a large number of memcgs.
			
 
				+
			
 
				+The rest are handled by the existing code.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Roman Gushchin <[email protected]>
			
 
				+Cc: Suren Baghdasaryan <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 55 +++++++++++++++++++++++++++--------------------------
			
 
				+ 1 file changed, 28 insertions(+), 27 deletions(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -2905,6 +2905,9 @@ static int get_swappiness(struct lruvec
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
			
 
				+ 
			
 
				++	if (!sc->may_swap)
			
 
				++		return 0;
			
 
				++
			
 
				+ 	if (!can_demote(pgdat->node_id, sc) &&
			
 
				+ 	    mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
			
 
				+ 		return 0;
			
 
				+@@ -3952,7 +3955,7 @@ static void walk_mm(struct lruvec *lruve
			
 
				+ 	} while (err == -EAGAIN);
			
 
				+ }
			
 
				+ 
			
 
				+-static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
			
 
				++static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
			
 
				+ {
			
 
				+ 	struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
			
 
				+ 
			
 
				+@@ -3960,7 +3963,7 @@ static struct lru_gen_mm_walk *set_mm_wa
			
 
				+ 		VM_WARN_ON_ONCE(walk);
			
 
				+ 
			
 
				+ 		walk = &pgdat->mm_walk;
			
 
				+-	} else if (!pgdat && !walk) {
			
 
				++	} else if (!walk && force_alloc) {
			
 
				+ 		VM_WARN_ON_ONCE(current_is_kswapd());
			
 
				+ 
			
 
				+ 		walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
			
 
				+@@ -4146,7 +4149,7 @@ static bool try_to_inc_max_seq(struct lr
			
 
				+ 		goto done;
			
 
				+ 	}
			
 
				+ 
			
 
				+-	walk = set_mm_walk(NULL);
			
 
				++	walk = set_mm_walk(NULL, true);
			
 
				+ 	if (!walk) {
			
 
				+ 		success = iterate_mm_list_nowalk(lruvec, max_seq);
			
 
				+ 		goto done;
			
 
				+@@ -4215,8 +4218,6 @@ static bool lruvec_is_reclaimable(struct
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 	DEFINE_MIN_SEQ(lruvec);
			
 
				+ 
			
 
				+-	VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
			
 
				+-
			
 
				+ 	/* see the comment on lru_gen_page */
			
 
				+ 	gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
			
 
				+ 	birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
			
 
				+@@ -4472,12 +4473,8 @@ static bool isolate_page(struct lruvec *
			
 
				+ {
			
 
				+ 	bool success;
			
 
				+ 
			
 
				+-	/* unmapping inhibited */
			
 
				+-	if (!sc->may_unmap && page_mapped(page))
			
 
				+-		return false;
			
 
				+-
			
 
				+ 	/* swapping inhibited */
			
 
				+-	if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
			
 
				++	if (!(sc->gfp_mask & __GFP_IO) &&
			
 
				+ 	    (PageDirty(page) ||
			
 
				+ 	     (PageAnon(page) && !PageSwapCache(page))))
			
 
				+ 		return false;
			
 
				+@@ -4574,9 +4571,8 @@ static int scan_pages(struct lruvec *lru
			
 
				+ 	__count_vm_events(PGSCAN_ANON + type, isolated);
			
 
				+ 
			
 
				+ 	/*
			
 
				+-	 * There might not be eligible pages due to reclaim_idx, may_unmap and
			
 
				+-	 * may_writepage. Check the remaining to prevent livelock if it's not
			
 
				+-	 * making progress.
			
 
				++	 * There might not be eligible pages due to reclaim_idx. Check the
			
 
				++	 * remaining to prevent livelock if it's not making progress.
			
 
				+ 	 */
			
 
				+ 	return isolated || !remaining ? scanned : 0;
			
 
				+ }
			
 
				+@@ -4836,8 +4832,7 @@ static long get_nr_to_scan(struct lruvec
			
 
				+ 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
			
 
				+ 	DEFINE_MAX_SEQ(lruvec);
			
 
				+ 
			
 
				+-	if (mem_cgroup_below_min(memcg) ||
			
 
				+-	    (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
			
 
				++	if (mem_cgroup_below_min(memcg))
			
 
				+ 		return 0;
			
 
				+ 
			
 
				+ 	if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
			
 
				+@@ -4865,17 +4860,14 @@ static bool try_to_shrink_lruvec(struct
			
 
				+ 	long nr_to_scan;
			
 
				+ 	unsigned long scanned = 0;
			
 
				+ 	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
			
 
				++	int swappiness = get_swappiness(lruvec, sc);
			
 
				++
			
 
				++	/* clean file pages are more likely to exist */
			
 
				++	if (swappiness && !(sc->gfp_mask & __GFP_IO))
			
 
				++		swappiness = 1;
			
 
				+ 
			
 
				+ 	while (true) {
			
 
				+ 		int delta;
			
 
				+-		int swappiness;
			
 
				+-
			
 
				+-		if (sc->may_swap)
			
 
				+-			swappiness = get_swappiness(lruvec, sc);
			
 
				+-		else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
			
 
				+-			swappiness = 1;
			
 
				+-		else
			
 
				+-			swappiness = 0;
			
 
				+ 
			
 
				+ 		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
			
 
				+ 		if (nr_to_scan <= 0)
			
 
				+@@ -5005,12 +4997,13 @@ static void lru_gen_shrink_lruvec(struct
			
 
				+ 	struct blk_plug plug;
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(global_reclaim(sc));
			
 
				++	VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap);
			
 
				+ 
			
 
				+ 	lru_add_drain();
			
 
				+ 
			
 
				+ 	blk_start_plug(&plug);
			
 
				+ 
			
 
				+-	set_mm_walk(lruvec_pgdat(lruvec));
			
 
				++	set_mm_walk(NULL, false);
			
 
				+ 
			
 
				+ 	if (try_to_shrink_lruvec(lruvec, sc))
			
 
				+ 		lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
			
 
				+@@ -5066,11 +5059,19 @@ static void lru_gen_shrink_node(struct p
			
 
				+ 
			
 
				+ 	VM_WARN_ON_ONCE(!global_reclaim(sc));
			
 
				+ 
			
 
				++	/*
			
 
				++	 * Unmapped clean pages are already prioritized. Scanning for more of
			
 
				++	 * them is likely futile and can cause high reclaim latency when there
			
 
				++	 * is a large number of memcgs.
			
 
				++	 */
			
 
				++	if (!sc->may_writepage || !sc->may_unmap)
			
 
				++		goto done;
			
 
				++
			
 
				+ 	lru_add_drain();
			
 
				+ 
			
 
				+ 	blk_start_plug(&plug);
			
 
				+ 
			
 
				+-	set_mm_walk(pgdat);
			
 
				++	set_mm_walk(pgdat, false);
			
 
				+ 
			
 
				+ 	set_initial_priority(pgdat, sc);
			
 
				+ 
			
 
				+@@ -5088,7 +5089,7 @@ static void lru_gen_shrink_node(struct p
			
 
				+ 	clear_mm_walk();
			
 
				+ 
			
 
				+ 	blk_finish_plug(&plug);
			
 
				+-
			
 
				++done:
			
 
				+ 	/* kswapd should never fail */
			
 
				+ 	pgdat->kswapd_failures = 0;
			
 
				+ }
			
 
				+@@ -5656,7 +5657,7 @@ static ssize_t lru_gen_seq_write(struct
			
 
				+ 	set_task_reclaim_state(current, &sc.reclaim_state);
			
 
				+ 	flags = memalloc_noreclaim_save();
			
 
				+ 	blk_start_plug(&plug);
			
 
				+-	if (!set_mm_walk(NULL)) {
			
 
				++	if (!set_mm_walk(NULL, true)) {
			
 
				+ 		err = -ENOMEM;
			
 
				+ 		goto done;
			
 
				+ 	}
			
--- a/target/linux/generic/backport-6.1/020-v6.3-28-mm-multi-gen-LRU-simplify-arch_has_hw_pte_young-chec.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-28-mm-multi-gen-LRU-simplify-arch_has_hw_pte_young-chec.patch
@@ -0,0 +1,34 @@
 
				+From cf3297e4c7a928da8b2b2f0baff2f9c69ea57952 Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Wed, 21 Dec 2022 21:19:06 -0700
			
 
				+Subject: [PATCH 28/29] mm: multi-gen LRU: simplify arch_has_hw_pte_young()
			
 
				+ check
			
 
				+
			
 
				+Scanning page tables when hardware does not set the accessed bit has
			
 
				+no real use cases.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Cc: Johannes Weiner <[email protected]>
			
 
				+Cc: Jonathan Corbet <[email protected]>
			
 
				+Cc: Michael Larabel <[email protected]>
			
 
				+Cc: Michal Hocko <[email protected]>
			
 
				+Cc: Mike Rapoport <[email protected]>
			
 
				+Cc: Roman Gushchin <[email protected]>
			
 
				+Cc: Suren Baghdasaryan <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 2 +-
			
 
				+ 1 file changed, 1 insertion(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -4144,7 +4144,7 @@ static bool try_to_inc_max_seq(struct lr
			
 
				+ 	 * handful of PTEs. Spreading the work out over a period of time usually
			
 
				+ 	 * is less efficient, but it avoids bursty page faults.
			
 
				+ 	 */
			
 
				+-	if (!force_scan && !(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
			
 
				++	if (!arch_has_hw_pte_young() || !get_cap(LRU_GEN_MM_WALK)) {
			
 
				+ 		success = iterate_mm_list_nowalk(lruvec, max_seq);
			
 
				+ 		goto done;
			
 
				+ 	}
			
--- a/target/linux/generic/backport-6.1/020-v6.3-29-mm-multi-gen-LRU-avoid-futile-retries.patch
+++ b/target/linux/generic/backport-6.1/020-v6.3-29-mm-multi-gen-LRU-avoid-futile-retries.patch
@@ -0,0 +1,88 @@
 
				+From cc67f962cc53f6e1dfa92eb85b7b26fe83a3c66f Mon Sep 17 00:00:00 2001
			
 
				+From: Yu Zhao <[email protected]>
			
 
				+Date: Mon, 13 Feb 2023 00:53:22 -0700
			
 
				+Subject: [PATCH 29/29] mm: multi-gen LRU: avoid futile retries
			
 
				+
			
 
				+Recall that the per-node memcg LRU has two generations and they alternate
			
 
				+when the last memcg (of a given node) is moved from one to the other.
			
 
				+Each generation is also sharded into multiple bins to improve scalability.
			
 
				+A reclaimer starts with a random bin (in the old generation) and, if it
			
 
				+fails, it will retry, i.e., to try the rest of the bins.
			
 
				+
			
 
				+If a reclaimer fails with the last memcg, it should move this memcg to the
			
 
				+young generation first, which causes the generations to alternate, and
			
 
				+then retry.  Otherwise, the retries will be futile because all other bins
			
 
				+are empty.
			
 
				+
			
 
				+Link: https://lkml.kernel.org/r/[email protected]
			
 
				+Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists")
			
 
				+Signed-off-by: Yu Zhao <[email protected]>
			
 
				+Reported-by: T.J. Mercier <[email protected]>
			
 
				+Signed-off-by: Andrew Morton <[email protected]>
			
 
				+---
			
 
				+ mm/vmscan.c | 25 +++++++++++++++----------
			
 
				+ 1 file changed, 15 insertions(+), 10 deletions(-)
			
 
				+
			
 
				+--- a/mm/vmscan.c
			
 
				++++ b/mm/vmscan.c
			
 
				+@@ -4934,18 +4934,20 @@ static int shrink_one(struct lruvec *lru
			
 
				+ 
			
 
				+ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
			
 
				+ {
			
 
				++	int op;
			
 
				+ 	int gen;
			
 
				+ 	int bin;
			
 
				+ 	int first_bin;
			
 
				+ 	struct lruvec *lruvec;
			
 
				+ 	struct lru_gen_page *lrugen;
			
 
				++	struct mem_cgroup *memcg;
			
 
				+ 	const struct hlist_nulls_node *pos;
			
 
				+-	int op = 0;
			
 
				+-	struct mem_cgroup *memcg = NULL;
			
 
				+ 	unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
			
 
				+ 
			
 
				+ 	bin = first_bin = prandom_u32_max(MEMCG_NR_BINS);
			
 
				+ restart:
			
 
				++	op = 0;
			
 
				++	memcg = NULL;
			
 
				+ 	gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
			
 
				+ 
			
 
				+ 	rcu_read_lock();
			
 
				+@@ -4969,14 +4971,22 @@ restart:
			
 
				+ 
			
 
				+ 		op = shrink_one(lruvec, sc);
			
 
				+ 
			
 
				+-		if (sc->nr_reclaimed >= nr_to_reclaim)
			
 
				+-			goto success;
			
 
				+-
			
 
				+ 		rcu_read_lock();
			
 
				++
			
 
				++		if (sc->nr_reclaimed >= nr_to_reclaim)
			
 
				++			break;
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	rcu_read_unlock();
			
 
				+ 
			
 
				++	if (op)
			
 
				++		lru_gen_rotate_memcg(lruvec, op);
			
 
				++
			
 
				++	mem_cgroup_put(memcg);
			
 
				++
			
 
				++	if (sc->nr_reclaimed >= nr_to_reclaim)
			
 
				++		return;
			
 
				++
			
 
				+ 	/* restart if raced with lru_gen_rotate_memcg() */
			
 
				+ 	if (gen != get_nulls_value(pos))
			
 
				+ 		goto restart;
			
 
				+@@ -4985,11 +4995,6 @@ restart:
			
 
				+ 	bin = get_memcg_bin(bin + 1);
			
 
				+ 	if (bin != first_bin)
			
 
				+ 		goto restart;
			
 
				+-success:
			
 
				+-	if (op)
			
 
				+-		lru_gen_rotate_memcg(lruvec, op);
			
 
				+-
			
 
				+-	mem_cgroup_put(memcg);
			
 
				+ }
			
 
				+ 
			
 
				+ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
			
--- a/target/linux/generic/backport-6.1/050-v5.16-00-MIPS-uasm-Enable-muhu-opcode-for-MIPS-R6.patch
+++ b/target/linux/generic/backport-6.1/050-v5.16-00-MIPS-uasm-Enable-muhu-opcode-for-MIPS-R6.patch
@@ -0,0 +1,65 @@
 
				+From: Johan Almbladh <[email protected]>
			
 
				+Date: Tue, 5 Oct 2021 18:54:02 +0200
			
 
				+Subject: [PATCH] MIPS: uasm: Enable muhu opcode for MIPS R6
			
 
				+
			
 
				+Enable the 'muhu' instruction, complementing the existing 'mulu', needed
			
 
				+to implement a MIPS32 BPF JIT.
			
 
				+
			
 
				+Also fix a typo in the existing definition of 'dmulu'.
			
 
				+
			
 
				+Signed-off-by: Tony Ambardar <[email protected]>
			
 
				+
			
 
				+This patch is a dependency for my 32-bit MIPS eBPF JIT.
			
 
				+
			
 
				+Signed-off-by: Johan Almbladh <[email protected]>
			
 
				+---
			
 
				+
			
 
				+--- a/arch/mips/include/asm/uasm.h
			
 
				++++ b/arch/mips/include/asm/uasm.h
			
 
				+@@ -145,6 +145,7 @@ Ip_u1(_mtlo);
			
 
				+ Ip_u3u1u2(_mul);
			
 
				+ Ip_u1u2(_multu);
			
 
				+ Ip_u3u1u2(_mulu);
			
 
				++Ip_u3u1u2(_muhu);
			
 
				+ Ip_u3u1u2(_nor);
			
 
				+ Ip_u3u1u2(_or);
			
 
				+ Ip_u2u1u3(_ori);
			
 
				+--- a/arch/mips/mm/uasm-mips.c
			
 
				++++ b/arch/mips/mm/uasm-mips.c
			
 
				+@@ -90,7 +90,7 @@ static const struct insn insn_table[insn
			
 
				+ 				RS | RT | RD},
			
 
				+ 	[insn_dmtc0]	= {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
			
 
				+ 	[insn_dmultu]	= {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT},
			
 
				+-	[insn_dmulu]	= {M(spec_op, 0, 0, 0, dmult_dmul_op, dmultu_op),
			
 
				++	[insn_dmulu]	= {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op),
			
 
				+ 				RS | RT | RD},
			
 
				+ 	[insn_drotr]	= {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE},
			
 
				+ 	[insn_drotr32]	= {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE},
			
 
				+@@ -150,6 +150,8 @@ static const struct insn insn_table[insn
			
 
				+ 	[insn_mtlo]	= {M(spec_op, 0, 0, 0, 0, mtlo_op), RS},
			
 
				+ 	[insn_mulu]	= {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op),
			
 
				+ 				RS | RT | RD},
			
 
				++	[insn_muhu]	= {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op),
			
 
				++				RS | RT | RD},
			
 
				+ #ifndef CONFIG_CPU_MIPSR6
			
 
				+ 	[insn_mul]	= {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
			
 
				+ #else
			
 
				+--- a/arch/mips/mm/uasm.c
			
 
				++++ b/arch/mips/mm/uasm.c
			
 
				+@@ -59,7 +59,7 @@ enum opcode {
			
 
				+ 	insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld,
			
 
				+ 	insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi,
			
 
				+ 	insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0,
			
 
				+-	insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_nor,
			
 
				++	insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor,
			
 
				+ 	insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc,
			
 
				+ 	insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll,
			
 
				+ 	insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra,
			
 
				+@@ -344,6 +344,7 @@ I_u1(_mtlo)
			
 
				+ I_u3u1u2(_mul)
			
 
				+ I_u1u2(_multu)
			
 
				+ I_u3u1u2(_mulu)
			
 
				++I_u3u1u2(_muhu)
			
 
				+ I_u3u1u2(_nor)
			
 
				+ I_u3u1u2(_or)
			
 
				+ I_u2u1u3(_ori)
			
--- a/target/linux/generic/backport-6.1/050-v5.16-01-mips-uasm-Add-workaround-for-Loongson-2F-nop-CPU-err.patch
+++ b/target/linux/generic/backport-6.1/050-v5.16-01-mips-uasm-Add-workaround-for-Loongson-2F-nop-CPU-err.patch
@@ -0,0 +1,31 @@
 
				+From: Johan Almbladh <[email protected]>
			
 
				+Date: Tue, 5 Oct 2021 18:54:03 +0200
			
 
				+Subject: [PATCH] mips: uasm: Add workaround for Loongson-2F nop CPU errata
			
 
				+
			
 
				+This patch implements a workaround for the Loongson-2F nop in generated,
			
 
				+code, if the existing option CONFIG_CPU_NOP_WORKAROUND is set. Before,
			
 
				+the binutils option -mfix-loongson2f-nop was enabled, but no workaround
			
 
				+was done when emitting MIPS code. Now, the nop pseudo instruction is
			
 
				+emitted as "or ax,ax,zero" instead of the default "sll zero,zero,0". This
			
 
				+is consistent with the workaround implemented by binutils.
			
 
				+
			
 
				+Link: https://sourceware.org/legacy-ml/binutils/2009-11/msg00387.html
			
 
				+
			
 
				+Signed-off-by: Johan Almbladh <[email protected]>
			
 
				+Reviewed-by: Jiaxun Yang <[email protected]>
			
 
				+---
			
 
				+
			
 
				+--- a/arch/mips/include/asm/uasm.h
			
 
				++++ b/arch/mips/include/asm/uasm.h
			
 
				+@@ -249,7 +249,11 @@ static inline void uasm_l##lb(struct uas
			
 
				+ #define uasm_i_bnezl(buf, rs, off) uasm_i_bnel(buf, rs, 0, off)
			
 
				+ #define uasm_i_ehb(buf) uasm_i_sll(buf, 0, 0, 3)
			
 
				+ #define uasm_i_move(buf, a, b) UASM_i_ADDU(buf, a, 0, b)
			
 
				++#ifdef CONFIG_CPU_NOP_WORKAROUNDS
			
 
				++#define uasm_i_nop(buf) uasm_i_or(buf, 1, 1, 0)
			
 
				++#else
			
 
				+ #define uasm_i_nop(buf) uasm_i_sll(buf, 0, 0, 0)
			
 
				++#endif
			
 
				+ #define uasm_i_ssnop(buf) uasm_i_sll(buf, 0, 0, 1)
			
 
				+ 
			
 
				+ static inline void uasm_i_drotr_safe(u32 **p, unsigned int a1,
			
--- a/target/linux/generic/backport-6.1/050-v5.16-02-mips-bpf-Add-eBPF-JIT-for-32-bit-MIPS.patch
+++ b/target/linux/generic/backport-6.1/050-v5.16-02-mips-bpf-Add-eBPF-JIT-for-32-bit-MIPS.patch
@@ -0,0 +1,3078 @@
 
				+From: Johan Almbladh <[email protected]>
			
 
				+Date: Tue, 5 Oct 2021 18:54:04 +0200
			
 
				+Subject: [PATCH] mips: bpf: Add eBPF JIT for 32-bit MIPS
			
 
				+
			
 
				+This is an implementation of an eBPF JIT for 32-bit MIPS I-V and MIPS32.
			
 
				+The implementation supports all 32-bit and 64-bit ALU and JMP operations,
			
 
				+including the recently-added atomics. 64-bit div/mod and 64-bit atomics
			
 
				+are implemented using function calls to math64 and atomic64 functions,
			
 
				+respectively. All 32-bit operations are implemented natively by the JIT,
			
 
				+except if the CPU lacks ll/sc instructions.
			
 
				+
			
 
				+Register mapping
			
 
				+================
			
 
				+All 64-bit eBPF registers are mapped to native 32-bit MIPS register pairs,
			
 
				+and does not use any stack scratch space for register swapping. This means
			
 
				+that all eBPF register data is kept in CPU registers all the time, and
			
 
				+this simplifies the register management a lot. It also reduces the JIT's
			
 
				+pressure on temporary registers since we do not have to move data around.
			
 
				+
			
 
				+Native register pairs are ordered according to CPU endiannes, following
			
 
				+the O32 calling convention for passing 64-bit arguments and return values.
			
 
				+The eBPF return value, arguments and callee-saved registers are mapped to
			
 
				+their native MIPS equivalents.
			
 
				+
			
 
				+Since the 32 highest bits in the eBPF FP (frame pointer) register are
			
 
				+always zero, only one general-purpose register is actually needed for the
			
 
				+mapping. The MIPS fp register is used for this purpose. The high bits are
			
 
				+mapped to MIPS register r0. This saves us one CPU register, which is much
			
 
				+needed for temporaries, while still allowing us to treat the R10 (FP)
			
 
				+register just like any other eBPF register in the JIT.
			
 
				+
			
 
				+The MIPS gp (global pointer) and at (assembler temporary) registers are
			
 
				+used as internal temporary registers for constant blinding. CPU registers
			
 
				+t6-t9 are used internally by the JIT when constructing more complex 64-bit
			
 
				+operations. This is precisely what is needed - two registers to store an
			
 
				+operand value, and two more as scratch registers when performing the
			
 
				+operation.
			
 
				+
			
 
				+The register mapping is shown below.
			
 
				+
			
 
				+    R0 - $v1, $v0   return value
			
 
				+    R1 - $a1, $a0   argument 1, passed in registers
			
 
				+    R2 - $a3, $a2   argument 2, passed in registers
			
 
				+    R3 - $t1, $t0   argument 3, passed on stack
			
 
				+    R4 - $t3, $t2   argument 4, passed on stack
			
 
				+    R5 - $t4, $t3   argument 5, passed on stack
			
 
				+    R6 - $s1, $s0   callee-saved
			
 
				+    R7 - $s3, $s2   callee-saved
			
 
				+    R8 - $s5, $s4   callee-saved
			
 
				+    R9 - $s7, $s6   callee-saved
			
 
				+    FP - $r0, $fp   32-bit frame pointer
			
 
				+    AX - $gp, $at   constant-blinding
			
 
				+         $t6 - $t9  unallocated, JIT temporaries
			
 
				+
			
 
				+Jump offsets
			
 
				+============
			
 
				+The JIT tries to map all conditional JMP operations to MIPS conditional
			
 
				+PC-relative branches. The MIPS branch offset field is 18 bits, in bytes,
			
 
				+which is equivalent to the eBPF 16-bit instruction offset. However, since
			
 
				+the JIT may emit more than one CPU instruction per eBPF instruction, the
			
 
				+field width may overflow. If that happens, the JIT converts the long
			
 
				+conditional jump to a short PC-relative branch with the condition
			
 
				+inverted, jumping over a long unconditional absolute jmp (j).
			
 
				+
			
 
				+This conversion will change the instruction offset mapping used for jumps,
			
 
				+and may in turn result in more branch offset overflows. The JIT therefore
			
 
				+dry-runs the translation until no more branches are converted and the
			
 
				+offsets do not change anymore. There is an upper bound on this of course,
			
 
				+and if the JIT hits that limit, the last two iterations are run with all
			
 
				+branches being converted.
			
 
				+
			
 
				+Tail call count
			
 
				+===============
			
 
				+The current tail call count is stored in the 16-byte area of the caller's
			
 
				+stack frame that is reserved for the callee in the o32 ABI. The value is
			
 
				+initialized in the prologue, and propagated to the tail-callee by skipping
			
 
				+the initialization instructions when emitting the tail call.
			
 
				+
			
 
				+Signed-off-by: Johan Almbladh <[email protected]>
			
 
				+---
			
 
				+ create mode 100644 arch/mips/net/bpf_jit_comp.c
			
 
				+ create mode 100644 arch/mips/net/bpf_jit_comp.h
			
 
				+ create mode 100644 arch/mips/net/bpf_jit_comp32.c
			
 
				+
			
 
				+--- a/arch/mips/net/Makefile
			
 
				++++ b/arch/mips/net/Makefile
			
 
				+@@ -2,4 +2,9 @@
			
 
				+ # MIPS networking code
			
 
				+ 
			
 
				+ obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
			
 
				+-obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
			
 
				++
			
 
				++ifeq ($(CONFIG_32BIT),y)
			
 
				++        obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o bpf_jit_comp32.o
			
 
				++else
			
 
				++        obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
			
 
				++endif
			
 
				+--- /dev/null
			
 
				++++ b/arch/mips/net/bpf_jit_comp.c
			
 
				+@@ -0,0 +1,1020 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-only
			
 
				++/*
			
 
				++ * Just-In-Time compiler for eBPF bytecode on MIPS.
			
 
				++ * Implementation of JIT functions common to 32-bit and 64-bit CPUs.
			
 
				++ *
			
 
				++ * Copyright (c) 2021 Anyfi Networks AB.
			
 
				++ * Author: Johan Almbladh <[email protected]>
			
 
				++ *
			
 
				++ * Based on code and ideas from
			
 
				++ * Copyright (c) 2017 Cavium, Inc.
			
 
				++ * Copyright (c) 2017 Shubham Bansal <[email protected]>
			
 
				++ * Copyright (c) 2011 Mircea Gherzan <[email protected]>
			
 
				++ */
			
 
				++
			
 
				++/*
			
 
				++ * Code overview
			
 
				++ * =============
			
 
				++ *
			
 
				++ * - bpf_jit_comp.h
			
 
				++ *   Common definitions and utilities.
			
 
				++ *
			
 
				++ * - bpf_jit_comp.c
			
 
				++ *   Implementation of JIT top-level logic and exported JIT API functions.
			
 
				++ *   Implementation of internal operations shared by 32-bit and 64-bit code.
			
 
				++ *   JMP and ALU JIT control code, register control code, shared ALU and
			
 
				++ *   JMP/JMP32 JIT operations.
			
 
				++ *
			
 
				++ * - bpf_jit_comp32.c
			
 
				++ *   Implementation of functions to JIT prologue, epilogue and a single eBPF
			
 
				++ *   instruction for 32-bit MIPS CPUs. The functions use shared operations
			
 
				++ *   where possible, and implement the rest for 32-bit MIPS such as ALU64
			
 
				++ *   operations.
			
 
				++ *
			
 
				++ * - bpf_jit_comp64.c
			
 
				++ *   Ditto, for 64-bit MIPS CPUs.
			
 
				++ *
			
 
				++ * Zero and sign extension
			
 
				++ * ========================
			
 
				++ * 32-bit MIPS instructions on 64-bit MIPS registers use sign extension,
			
 
				++ * but the eBPF instruction set mandates zero extension. We let the verifier
			
 
				++ * insert explicit zero-extensions after 32-bit ALU operations, both for
			
 
				++ * 32-bit and 64-bit MIPS JITs. Conditional JMP32 operations on 64-bit MIPs
			
 
				++ * are JITed with sign extensions inserted when so expected.
			
 
				++ *
			
 
				++ * ALU operations
			
 
				++ * ==============
			
 
				++ * ALU operations on 32/64-bit MIPS and ALU64 operations on 64-bit MIPS are
			
 
				++ * JITed in the following steps. ALU64 operations on 32-bit MIPS are more
			
 
				++ * complicated and therefore only processed by special implementations in
			
 
				++ * step (3).
			
 
				++ *
			
 
				++ * 1) valid_alu_i:
			
 
				++ *    Determine if an immediate operation can be emitted as such, or if
			
 
				++ *    we must fall back to the register version.
			
 
				++ *
			
 
				++ * 2) rewrite_alu_i:
			
 
				++ *    Convert BPF operation and immediate value to a canonical form for
			
 
				++ *    JITing. In some degenerate cases this form may be a no-op.
			
 
				++ *
			
 
				++ * 3) emit_alu_{i,i64,r,64}:
			
 
				++ *    Emit instructions for an ALU or ALU64 immediate or register operation.
			
 
				++ *
			
 
				++ * JMP operations
			
 
				++ * ==============
			
 
				++ * JMP and JMP32 operations require an JIT instruction offset table for
			
 
				++ * translating the jump offset. This table is computed by dry-running the
			
 
				++ * JIT without actually emitting anything. However, the computed PC-relative
			
 
				++ * offset may overflow the 18-bit offset field width of the native MIPS
			
 
				++ * branch instruction. In such cases, the long jump is converted into the
			
 
				++ * following sequence.
			
 
				++ *
			
 
				++ *    <branch> !<cond> +2    Inverted PC-relative branch
			
 
				++ *    nop                    Delay slot
			
 
				++ *    j <offset>             Unconditional absolute long jump
			
 
				++ *    nop                    Delay slot
			
 
				++ *
			
 
				++ * Since this converted sequence alters the offset table, all offsets must
			
 
				++ * be re-calculated. This may in turn trigger new branch conversions, so
			
 
				++ * the process is repeated until no further changes are made. Normally it
			
 
				++ * completes in 1-2 iterations. If JIT_MAX_ITERATIONS should reached, we
			
 
				++ * fall back to converting every remaining jump operation. The branch
			
 
				++ * conversion is independent of how the JMP or JMP32 condition is JITed.
			
 
				++ *
			
 
				++ * JMP32 and JMP operations are JITed as follows.
			
 
				++ *
			
 
				++ * 1) setup_jmp_{i,r}:
			
 
				++ *    Convert jump conditional and offset into a form that can be JITed.
			
 
				++ *    This form may be a no-op, a canonical form, or an inverted PC-relative
			
 
				++ *    jump if branch conversion is necessary.
			
 
				++ *
			
 
				++ * 2) valid_jmp_i:
			
 
				++ *    Determine if an immediate operations can be emitted as such, or if
			
 
				++ *    we must fall back to the register version. Applies to JMP32 for 32-bit
			
 
				++ *    MIPS, and both JMP and JMP32 for 64-bit MIPS.
			
 
				++ *
			
 
				++ * 3) emit_jmp_{i,i64,r,r64}:
			
 
				++ *    Emit instructions for an JMP or JMP32 immediate or register operation.
			
 
				++ *
			
 
				++ * 4) finish_jmp_{i,r}:
			
 
				++ *    Emit any instructions needed to finish the jump. This includes a nop
			
 
				++ *    for the delay slot if a branch was emitted, and a long absolute jump
			
 
				++ *    if the branch was converted.
			
 
				++ */
			
 
				++
			
 
				++#include <linux/limits.h>
			
 
				++#include <linux/bitops.h>
			
 
				++#include <linux/errno.h>
			
 
				++#include <linux/filter.h>
			
 
				++#include <linux/bpf.h>
			
 
				++#include <linux/slab.h>
			
 
				++#include <asm/bitops.h>
			
 
				++#include <asm/cacheflush.h>
			
 
				++#include <asm/cpu-features.h>
			
 
				++#include <asm/isa-rev.h>
			
 
				++#include <asm/uasm.h>
			
 
				++
			
 
				++#include "bpf_jit_comp.h"
			
 
				++
			
 
				++/* Convenience macros for descriptor access */
			
 
				++#define CONVERTED(desc)	((desc) & JIT_DESC_CONVERT)
			
 
				++#define INDEX(desc)	((desc) & ~JIT_DESC_CONVERT)
			
 
				++
			
 
				++/*
			
 
				++ * Push registers on the stack, starting at a given depth from the stack
			
 
				++ * pointer and increasing. The next depth to be written is returned.
			
 
				++ */
			
 
				++int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth)
			
 
				++{
			
 
				++	int reg;
			
 
				++
			
 
				++	for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++)
			
 
				++		if (mask & BIT(reg)) {
			
 
				++			if ((excl & BIT(reg)) == 0) {
			
 
				++				if (sizeof(long) == 4)
			
 
				++					emit(ctx, sw, reg, depth, MIPS_R_SP);
			
 
				++				else /* sizeof(long) == 8 */
			
 
				++					emit(ctx, sd, reg, depth, MIPS_R_SP);
			
 
				++			}
			
 
				++			depth += sizeof(long);
			
 
				++		}
			
 
				++
			
 
				++	ctx->stack_used = max((int)ctx->stack_used, depth);
			
 
				++	return depth;
			
 
				++}
			
 
				++
			
 
				++/*
			
 
				++ * Pop registers from the stack, starting at a given depth from the stack
			
 
				++ * pointer and increasing. The next depth to be read is returned.
			
 
				++ */
			
 
				++int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth)
			
 
				++{
			
 
				++	int reg;
			
 
				++
			
 
				++	for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++)
			
 
				++		if (mask & BIT(reg)) {
			
 
				++			if ((excl & BIT(reg)) == 0) {
			
 
				++				if (sizeof(long) == 4)
			
 
				++					emit(ctx, lw, reg, depth, MIPS_R_SP);
			
 
				++				else /* sizeof(long) == 8 */
			
 
				++					emit(ctx, ld, reg, depth, MIPS_R_SP);
			
 
				++			}
			
 
				++			depth += sizeof(long);
			
 
				++		}
			
 
				++
			
 
				++	return depth;
			
 
				++}
			
 
				++
			
 
				++/* Compute the 28-bit jump target address from a BPF program location */
			
 
				++int get_target(struct jit_context *ctx, u32 loc)
			
 
				++{
			
 
				++	u32 index = INDEX(ctx->descriptors[loc]);
			
 
				++	unsigned long pc = (unsigned long)&ctx->target[ctx->jit_index];
			
 
				++	unsigned long addr = (unsigned long)&ctx->target[index];
			
 
				++
			
 
				++	if (!ctx->target)
			
 
				++		return 0;
			
 
				++
			
 
				++	if ((addr ^ pc) & ~MIPS_JMP_MASK)
			
 
				++		return -1;
			
 
				++
			
 
				++	return addr & MIPS_JMP_MASK;
			
 
				++}
			
 
				++
			
 
				++/* Compute the PC-relative offset to relative BPF program offset */
			
 
				++int get_offset(const struct jit_context *ctx, int off)
			
 
				++{
			
 
				++	return (INDEX(ctx->descriptors[ctx->bpf_index + off]) -
			
 
				++		ctx->jit_index - 1) * sizeof(u32);
			
 
				++}
			
 
				++
			
 
				++/* dst = imm (register width) */
			
 
				++void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm)
			
 
				++{
			
 
				++	if (imm >= -0x8000 && imm <= 0x7fff) {
			
 
				++		emit(ctx, addiu, dst, MIPS_R_ZERO, imm);
			
 
				++	} else {
			
 
				++		emit(ctx, lui, dst, (s16)((u32)imm >> 16));
			
 
				++		emit(ctx, ori, dst, dst, (u16)(imm & 0xffff));
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* dst = src (register width) */
			
 
				++void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src)
			
 
				++{
			
 
				++	emit(ctx, ori, dst, src, 0);
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Validate ALU immediate range */
			
 
				++bool valid_alu_i(u8 op, s32 imm)
			
 
				++{
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	case BPF_NEG:
			
 
				++	case BPF_LSH:
			
 
				++	case BPF_RSH:
			
 
				++	case BPF_ARSH:
			
 
				++		/* All legal eBPF values are valid */
			
 
				++		return true;
			
 
				++	case BPF_ADD:
			
 
				++		/* imm must be 16 bits */
			
 
				++		return imm >= -0x8000 && imm <= 0x7fff;
			
 
				++	case BPF_SUB:
			
 
				++		/* -imm must be 16 bits */
			
 
				++		return imm >= -0x7fff && imm <= 0x8000;
			
 
				++	case BPF_AND:
			
 
				++	case BPF_OR:
			
 
				++	case BPF_XOR:
			
 
				++		/* imm must be 16 bits unsigned */
			
 
				++		return imm >= 0 && imm <= 0xffff;
			
 
				++	case BPF_MUL:
			
 
				++		/* imm must be zero or a positive power of two */
			
 
				++		return imm == 0 || (imm > 0 && is_power_of_2(imm));
			
 
				++	case BPF_DIV:
			
 
				++	case BPF_MOD:
			
 
				++		/* imm must be an 17-bit power of two */
			
 
				++		return (u32)imm <= 0x10000 && is_power_of_2((u32)imm);
			
 
				++	}
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++/* Rewrite ALU immediate operation */
			
 
				++bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val)
			
 
				++{
			
 
				++	bool act = true;
			
 
				++
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	case BPF_LSH:
			
 
				++	case BPF_RSH:
			
 
				++	case BPF_ARSH:
			
 
				++	case BPF_ADD:
			
 
				++	case BPF_SUB:
			
 
				++	case BPF_OR:
			
 
				++	case BPF_XOR:
			
 
				++		/* imm == 0 is a no-op */
			
 
				++		act = imm != 0;
			
 
				++		break;
			
 
				++	case BPF_MUL:
			
 
				++		if (imm == 1) {
			
 
				++			/* dst * 1 is a no-op */
			
 
				++			act = false;
			
 
				++		} else if (imm == 0) {
			
 
				++			/* dst * 0 is dst & 0 */
			
 
				++			op = BPF_AND;
			
 
				++		} else {
			
 
				++			/* dst * (1 << n) is dst << n */
			
 
				++			op = BPF_LSH;
			
 
				++			imm = ilog2(abs(imm));
			
 
				++		}
			
 
				++		break;
			
 
				++	case BPF_DIV:
			
 
				++		if (imm == 1) {
			
 
				++			/* dst / 1 is a no-op */
			
 
				++			act = false;
			
 
				++		} else {
			
 
				++			/* dst / (1 << n) is dst >> n */
			
 
				++			op = BPF_RSH;
			
 
				++			imm = ilog2(imm);
			
 
				++		}
			
 
				++		break;
			
 
				++	case BPF_MOD:
			
 
				++		/* dst % (1 << n) is dst & ((1 << n) - 1) */
			
 
				++		op = BPF_AND;
			
 
				++		imm--;
			
 
				++		break;
			
 
				++	}
			
 
				++
			
 
				++	*alu = op;
			
 
				++	*val = imm;
			
 
				++	return act;
			
 
				++}
			
 
				++
			
 
				++/* ALU immediate operation (32-bit) */
			
 
				++void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op)
			
 
				++{
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	/* dst = -dst */
			
 
				++	case BPF_NEG:
			
 
				++		emit(ctx, subu, dst, MIPS_R_ZERO, dst);
			
 
				++		break;
			
 
				++	/* dst = dst & imm */
			
 
				++	case BPF_AND:
			
 
				++		emit(ctx, andi, dst, dst, (u16)imm);
			
 
				++		break;
			
 
				++	/* dst = dst | imm */
			
 
				++	case BPF_OR:
			
 
				++		emit(ctx, ori, dst, dst, (u16)imm);
			
 
				++		break;
			
 
				++	/* dst = dst ^ imm */
			
 
				++	case BPF_XOR:
			
 
				++		emit(ctx, xori, dst, dst, (u16)imm);
			
 
				++		break;
			
 
				++	/* dst = dst << imm */
			
 
				++	case BPF_LSH:
			
 
				++		emit(ctx, sll, dst, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = dst >> imm */
			
 
				++	case BPF_RSH:
			
 
				++		emit(ctx, srl, dst, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = dst >> imm (arithmetic) */
			
 
				++	case BPF_ARSH:
			
 
				++		emit(ctx, sra, dst, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = dst + imm */
			
 
				++	case BPF_ADD:
			
 
				++		emit(ctx, addiu, dst, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = dst - imm */
			
 
				++	case BPF_SUB:
			
 
				++		emit(ctx, addiu, dst, dst, -imm);
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* ALU register operation (32-bit) */
			
 
				++void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op)
			
 
				++{
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	/* dst = dst & src */
			
 
				++	case BPF_AND:
			
 
				++		emit(ctx, and, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst | src */
			
 
				++	case BPF_OR:
			
 
				++		emit(ctx, or, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst ^ src */
			
 
				++	case BPF_XOR:
			
 
				++		emit(ctx, xor, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst << src */
			
 
				++	case BPF_LSH:
			
 
				++		emit(ctx, sllv, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst >> src */
			
 
				++	case BPF_RSH:
			
 
				++		emit(ctx, srlv, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst >> src (arithmetic) */
			
 
				++	case BPF_ARSH:
			
 
				++		emit(ctx, srav, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst + src */
			
 
				++	case BPF_ADD:
			
 
				++		emit(ctx, addu, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst - src */
			
 
				++	case BPF_SUB:
			
 
				++		emit(ctx, subu, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst * src */
			
 
				++	case BPF_MUL:
			
 
				++		if (cpu_has_mips32r1 || cpu_has_mips32r6) {
			
 
				++			emit(ctx, mul, dst, dst, src);
			
 
				++		} else {
			
 
				++			emit(ctx, multu, dst, src);
			
 
				++			emit(ctx, mflo, dst);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* dst = dst / src */
			
 
				++	case BPF_DIV:
			
 
				++		if (cpu_has_mips32r6) {
			
 
				++			emit(ctx, divu_r6, dst, dst, src);
			
 
				++		} else {
			
 
				++			emit(ctx, divu, dst, src);
			
 
				++			emit(ctx, mflo, dst);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* dst = dst % src */
			
 
				++	case BPF_MOD:
			
 
				++		if (cpu_has_mips32r6) {
			
 
				++			emit(ctx, modu, dst, dst, src);
			
 
				++		} else {
			
 
				++			emit(ctx, divu, dst, src);
			
 
				++			emit(ctx, mfhi, dst);
			
 
				++		}
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Atomic read-modify-write (32-bit) */
			
 
				++void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code)
			
 
				++{
			
 
				++	emit(ctx, ll, MIPS_R_T9, off, dst);
			
 
				++	switch (code) {
			
 
				++	case BPF_ADD:
			
 
				++		emit(ctx, addu, MIPS_R_T8, MIPS_R_T9, src);
			
 
				++		break;
			
 
				++	case BPF_AND:
			
 
				++		emit(ctx, and, MIPS_R_T8, MIPS_R_T9, src);
			
 
				++		break;
			
 
				++	case BPF_OR:
			
 
				++		emit(ctx, or, MIPS_R_T8, MIPS_R_T9, src);
			
 
				++		break;
			
 
				++	case BPF_XOR:
			
 
				++		emit(ctx, xor, MIPS_R_T8, MIPS_R_T9, src);
			
 
				++		break;
			
 
				++	}
			
 
				++	emit(ctx, sc, MIPS_R_T8, off, dst);
			
 
				++	emit(ctx, beqz, MIPS_R_T8, -16);
			
 
				++	emit(ctx, nop); /* Delay slot */
			
 
				++}
			
 
				++
			
 
				++/* Atomic compare-and-exchange (32-bit) */
			
 
				++void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off)
			
 
				++{
			
 
				++	emit(ctx, ll, MIPS_R_T9, off, dst);
			
 
				++	emit(ctx, bne, MIPS_R_T9, res, 12);
			
 
				++	emit(ctx, move, MIPS_R_T8, src);     /* Delay slot */
			
 
				++	emit(ctx, sc, MIPS_R_T8, off, dst);
			
 
				++	emit(ctx, beqz, MIPS_R_T8, -20);
			
 
				++	emit(ctx, move, res, MIPS_R_T9);     /* Delay slot */
			
 
				++	clobber_reg(ctx, res);
			
 
				++}
			
 
				++
			
 
				++/* Swap bytes and truncate a register word or half word */
			
 
				++void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width)
			
 
				++{
			
 
				++	u8 tmp = MIPS_R_T8;
			
 
				++	u8 msk = MIPS_R_T9;
			
 
				++
			
 
				++	switch (width) {
			
 
				++	/* Swap bytes in a word */
			
 
				++	case 32:
			
 
				++		if (cpu_has_mips32r2 || cpu_has_mips32r6) {
			
 
				++			emit(ctx, wsbh, dst, dst);
			
 
				++			emit(ctx, rotr, dst, dst, 16);
			
 
				++		} else {
			
 
				++			emit(ctx, sll, tmp, dst, 16);    /* tmp  = dst << 16 */
			
 
				++			emit(ctx, srl, dst, dst, 16);    /* dst = dst >> 16  */
			
 
				++			emit(ctx, or, dst, dst, tmp);    /* dst = dst | tmp  */
			
 
				++
			
 
				++			emit(ctx, lui, msk, 0xff);       /* msk = 0x00ff0000 */
			
 
				++			emit(ctx, ori, msk, msk, 0xff);  /* msk = msk | 0xff */
			
 
				++
			
 
				++			emit(ctx, and, tmp, dst, msk);   /* tmp = dst & msk  */
			
 
				++			emit(ctx, sll, tmp, tmp, 8);     /* tmp = tmp << 8   */
			
 
				++			emit(ctx, srl, dst, dst, 8);     /* dst = dst >> 8   */
			
 
				++			emit(ctx, and, dst, dst, msk);   /* dst = dst & msk  */
			
 
				++			emit(ctx, or, dst, dst, tmp);    /* reg = dst | tmp  */
			
 
				++		}
			
 
				++		break;
			
 
				++	/* Swap bytes in a half word */
			
 
				++	case 16:
			
 
				++		if (cpu_has_mips32r2 || cpu_has_mips32r6) {
			
 
				++			emit(ctx, wsbh, dst, dst);
			
 
				++			emit(ctx, andi, dst, dst, 0xffff);
			
 
				++		} else {
			
 
				++			emit(ctx, andi, tmp, dst, 0xff00); /* t = d & 0xff00 */
			
 
				++			emit(ctx, srl, tmp, tmp, 8);       /* t = t >> 8     */
			
 
				++			emit(ctx, andi, dst, dst, 0x00ff); /* d = d & 0x00ff */
			
 
				++			emit(ctx, sll, dst, dst, 8);       /* d = d << 8     */
			
 
				++			emit(ctx, or,  dst, dst, tmp);     /* d = d | t      */
			
 
				++		}
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Validate jump immediate range */
			
 
				++bool valid_jmp_i(u8 op, s32 imm)
			
 
				++{
			
 
				++	switch (op) {
			
 
				++	case JIT_JNOP:
			
 
				++		/* Immediate value not used */
			
 
				++		return true;
			
 
				++	case BPF_JEQ:
			
 
				++	case BPF_JNE:
			
 
				++		/* No immediate operation */
			
 
				++		return false;
			
 
				++	case BPF_JSET:
			
 
				++	case JIT_JNSET:
			
 
				++		/* imm must be 16 bits unsigned */
			
 
				++		return imm >= 0 && imm <= 0xffff;
			
 
				++	case BPF_JGE:
			
 
				++	case BPF_JLT:
			
 
				++	case BPF_JSGE:
			
 
				++	case BPF_JSLT:
			
 
				++		/* imm must be 16 bits */
			
 
				++		return imm >= -0x8000 && imm <= 0x7fff;
			
 
				++	case BPF_JGT:
			
 
				++	case BPF_JLE:
			
 
				++	case BPF_JSGT:
			
 
				++	case BPF_JSLE:
			
 
				++		/* imm + 1 must be 16 bits */
			
 
				++		return imm >= -0x8001 && imm <= 0x7ffe;
			
 
				++	}
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++/* Invert a conditional jump operation */
			
 
				++static u8 invert_jmp(u8 op)
			
 
				++{
			
 
				++	switch (op) {
			
 
				++	case BPF_JA: return JIT_JNOP;
			
 
				++	case BPF_JEQ: return BPF_JNE;
			
 
				++	case BPF_JNE: return BPF_JEQ;
			
 
				++	case BPF_JSET: return JIT_JNSET;
			
 
				++	case BPF_JGT: return BPF_JLE;
			
 
				++	case BPF_JGE: return BPF_JLT;
			
 
				++	case BPF_JLT: return BPF_JGE;
			
 
				++	case BPF_JLE: return BPF_JGT;
			
 
				++	case BPF_JSGT: return BPF_JSLE;
			
 
				++	case BPF_JSGE: return BPF_JSLT;
			
 
				++	case BPF_JSLT: return BPF_JSGE;
			
 
				++	case BPF_JSLE: return BPF_JSGT;
			
 
				++	}
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++/* Prepare a PC-relative jump operation */
			
 
				++static void setup_jmp(struct jit_context *ctx, u8 bpf_op,
			
 
				++		      s16 bpf_off, u8 *jit_op, s32 *jit_off)
			
 
				++{
			
 
				++	u32 *descp = &ctx->descriptors[ctx->bpf_index];
			
 
				++	int op = bpf_op;
			
 
				++	int offset = 0;
			
 
				++
			
 
				++	/* Do not compute offsets on the first pass */
			
 
				++	if (INDEX(*descp) == 0)
			
 
				++		goto done;
			
 
				++
			
 
				++	/* Skip jumps never taken */
			
 
				++	if (bpf_op == JIT_JNOP)
			
 
				++		goto done;
			
 
				++
			
 
				++	/* Convert jumps always taken */
			
 
				++	if (bpf_op == BPF_JA)
			
 
				++		*descp |= JIT_DESC_CONVERT;
			
 
				++
			
 
				++	/*
			
 
				++	 * Current ctx->jit_index points to the start of the branch preamble.
			
 
				++	 * Since the preamble differs among different branch conditionals,
			
 
				++	 * the current index cannot be used to compute the branch offset.
			
 
				++	 * Instead, we use the offset table value for the next instruction,
			
 
				++	 * which gives the index immediately after the branch delay slot.
			
 
				++	 */
			
 
				++	if (!CONVERTED(*descp)) {
			
 
				++		int target = ctx->bpf_index + bpf_off + 1;
			
 
				++		int origin = ctx->bpf_index + 1;
			
 
				++
			
 
				++		offset = (INDEX(ctx->descriptors[target]) -
			
 
				++			  INDEX(ctx->descriptors[origin]) + 1) * sizeof(u32);
			
 
				++	}
			
 
				++
			
 
				++	/*
			
 
				++	 * The PC-relative branch offset field on MIPS is 18 bits signed,
			
 
				++	 * so if the computed offset is larger than this we generate a an
			
 
				++	 * absolute jump that we skip with an inverted conditional branch.
			
 
				++	 */
			
 
				++	if (CONVERTED(*descp) || offset < -0x20000 || offset > 0x1ffff) {
			
 
				++		offset = 3 * sizeof(u32);
			
 
				++		op = invert_jmp(bpf_op);
			
 
				++		ctx->changes += !CONVERTED(*descp);
			
 
				++		*descp |= JIT_DESC_CONVERT;
			
 
				++	}
			
 
				++
			
 
				++done:
			
 
				++	*jit_off = offset;
			
 
				++	*jit_op = op;
			
 
				++}
			
 
				++
			
 
				++/* Prepare a PC-relative jump operation with immediate conditional */
			
 
				++void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width,
			
 
				++		 u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off)
			
 
				++{
			
 
				++	bool always = false;
			
 
				++	bool never = false;
			
 
				++
			
 
				++	switch (bpf_op) {
			
 
				++	case BPF_JEQ:
			
 
				++	case BPF_JNE:
			
 
				++		break;
			
 
				++	case BPF_JSET:
			
 
				++	case BPF_JLT:
			
 
				++		never = imm == 0;
			
 
				++		break;
			
 
				++	case BPF_JGE:
			
 
				++		always = imm == 0;
			
 
				++		break;
			
 
				++	case BPF_JGT:
			
 
				++		never = (u32)imm == U32_MAX;
			
 
				++		break;
			
 
				++	case BPF_JLE:
			
 
				++		always = (u32)imm == U32_MAX;
			
 
				++		break;
			
 
				++	case BPF_JSGT:
			
 
				++		never = imm == S32_MAX && width == 32;
			
 
				++		break;
			
 
				++	case BPF_JSGE:
			
 
				++		always = imm == S32_MIN && width == 32;
			
 
				++		break;
			
 
				++	case BPF_JSLT:
			
 
				++		never = imm == S32_MIN && width == 32;
			
 
				++		break;
			
 
				++	case BPF_JSLE:
			
 
				++		always = imm == S32_MAX && width == 32;
			
 
				++		break;
			
 
				++	}
			
 
				++
			
 
				++	if (never)
			
 
				++		bpf_op = JIT_JNOP;
			
 
				++	if (always)
			
 
				++		bpf_op = BPF_JA;
			
 
				++	setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off);
			
 
				++}
			
 
				++
			
 
				++/* Prepare a PC-relative jump operation with register conditional */
			
 
				++void setup_jmp_r(struct jit_context *ctx, bool same_reg,
			
 
				++		 u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off)
			
 
				++{
			
 
				++	switch (bpf_op) {
			
 
				++	case BPF_JSET:
			
 
				++		break;
			
 
				++	case BPF_JEQ:
			
 
				++	case BPF_JGE:
			
 
				++	case BPF_JLE:
			
 
				++	case BPF_JSGE:
			
 
				++	case BPF_JSLE:
			
 
				++		if (same_reg)
			
 
				++			bpf_op = BPF_JA;
			
 
				++		break;
			
 
				++	case BPF_JNE:
			
 
				++	case BPF_JLT:
			
 
				++	case BPF_JGT:
			
 
				++	case BPF_JSGT:
			
 
				++	case BPF_JSLT:
			
 
				++		if (same_reg)
			
 
				++			bpf_op = JIT_JNOP;
			
 
				++		break;
			
 
				++	}
			
 
				++	setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off);
			
 
				++}
			
 
				++
			
 
				++/* Finish a PC-relative jump operation */
			
 
				++int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off)
			
 
				++{
			
 
				++	/* Emit conditional branch delay slot */
			
 
				++	if (jit_op != JIT_JNOP)
			
 
				++		emit(ctx, nop);
			
 
				++	/*
			
 
				++	 * Emit an absolute long jump with delay slot,
			
 
				++	 * if the PC-relative branch was converted.
			
 
				++	 */
			
 
				++	if (CONVERTED(ctx->descriptors[ctx->bpf_index])) {
			
 
				++		int target = get_target(ctx, ctx->bpf_index + bpf_off + 1);
			
 
				++
			
 
				++		if (target < 0)
			
 
				++			return -1;
			
 
				++		emit(ctx, j, target);
			
 
				++		emit(ctx, nop);
			
 
				++	}
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++/* Jump immediate (32-bit) */
			
 
				++void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op)
			
 
				++{
			
 
				++	switch (op) {
			
 
				++	/* No-op, used internally for branch optimization */
			
 
				++	case JIT_JNOP:
			
 
				++		break;
			
 
				++	/* PC += off if dst & imm */
			
 
				++	case BPF_JSET:
			
 
				++		emit(ctx, andi, MIPS_R_T9, dst, (u16)imm);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
			
 
				++	case JIT_JNSET:
			
 
				++		emit(ctx, andi, MIPS_R_T9, dst, (u16)imm);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst > imm */
			
 
				++	case BPF_JGT:
			
 
				++		emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst >= imm */
			
 
				++	case BPF_JGE:
			
 
				++		emit(ctx, sltiu, MIPS_R_T9, dst, imm);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst < imm */
			
 
				++	case BPF_JLT:
			
 
				++		emit(ctx, sltiu, MIPS_R_T9, dst, imm);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst <= imm */
			
 
				++	case BPF_JLE:
			
 
				++		emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst > imm (signed) */
			
 
				++	case BPF_JSGT:
			
 
				++		emit(ctx, slti, MIPS_R_T9, dst, imm + 1);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst >= imm (signed) */
			
 
				++	case BPF_JSGE:
			
 
				++		emit(ctx, slti, MIPS_R_T9, dst, imm);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst < imm (signed) */
			
 
				++	case BPF_JSLT:
			
 
				++		emit(ctx, slti, MIPS_R_T9, dst, imm);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst <= imm (signed) */
			
 
				++	case BPF_JSLE:
			
 
				++		emit(ctx, slti, MIPS_R_T9, dst, imm + 1);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* Jump register (32-bit) */
			
 
				++void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op)
			
 
				++{
			
 
				++	switch (op) {
			
 
				++	/* No-op, used internally for branch optimization */
			
 
				++	case JIT_JNOP:
			
 
				++		break;
			
 
				++	/* PC += off if dst == src */
			
 
				++	case BPF_JEQ:
			
 
				++		emit(ctx, beq, dst, src, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst != src */
			
 
				++	case BPF_JNE:
			
 
				++		emit(ctx, bne, dst, src, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst & src */
			
 
				++	case BPF_JSET:
			
 
				++		emit(ctx, and, MIPS_R_T9, dst, src);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
			
 
				++	case JIT_JNSET:
			
 
				++		emit(ctx, and, MIPS_R_T9, dst, src);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst > src */
			
 
				++	case BPF_JGT:
			
 
				++		emit(ctx, sltu, MIPS_R_T9, src, dst);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst >= src */
			
 
				++	case BPF_JGE:
			
 
				++		emit(ctx, sltu, MIPS_R_T9, dst, src);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst < src */
			
 
				++	case BPF_JLT:
			
 
				++		emit(ctx, sltu, MIPS_R_T9, dst, src);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst <= src */
			
 
				++	case BPF_JLE:
			
 
				++		emit(ctx, sltu, MIPS_R_T9, src, dst);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst > src (signed) */
			
 
				++	case BPF_JSGT:
			
 
				++		emit(ctx, slt, MIPS_R_T9, src, dst);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst >= src (signed) */
			
 
				++	case BPF_JSGE:
			
 
				++		emit(ctx, slt, MIPS_R_T9, dst, src);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst < src (signed) */
			
 
				++	case BPF_JSLT:
			
 
				++		emit(ctx, slt, MIPS_R_T9, dst, src);
			
 
				++		emit(ctx, bnez, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst <= src (signed) */
			
 
				++	case BPF_JSLE:
			
 
				++		emit(ctx, slt, MIPS_R_T9, src, dst);
			
 
				++		emit(ctx, beqz, MIPS_R_T9, off);
			
 
				++		break;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* Jump always */
			
 
				++int emit_ja(struct jit_context *ctx, s16 off)
			
 
				++{
			
 
				++	int target = get_target(ctx, ctx->bpf_index + off + 1);
			
 
				++
			
 
				++	if (target < 0)
			
 
				++		return -1;
			
 
				++	emit(ctx, j, target);
			
 
				++	emit(ctx, nop);
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++/* Jump to epilogue */
			
 
				++int emit_exit(struct jit_context *ctx)
			
 
				++{
			
 
				++	int target = get_target(ctx, ctx->program->len);
			
 
				++
			
 
				++	if (target < 0)
			
 
				++		return -1;
			
 
				++	emit(ctx, j, target);
			
 
				++	emit(ctx, nop);
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++/* Build the program body from eBPF bytecode */
			
 
				++static int build_body(struct jit_context *ctx)
			
 
				++{
			
 
				++	const struct bpf_prog *prog = ctx->program;
			
 
				++	unsigned int i;
			
 
				++
			
 
				++	ctx->stack_used = 0;
			
 
				++	for (i = 0; i < prog->len; i++) {
			
 
				++		const struct bpf_insn *insn = &prog->insnsi[i];
			
 
				++		u32 *descp = &ctx->descriptors[i];
			
 
				++		int ret;
			
 
				++
			
 
				++		access_reg(ctx, insn->src_reg);
			
 
				++		access_reg(ctx, insn->dst_reg);
			
 
				++
			
 
				++		ctx->bpf_index = i;
			
 
				++		if (ctx->target == NULL) {
			
 
				++			ctx->changes += INDEX(*descp) != ctx->jit_index;
			
 
				++			*descp &= JIT_DESC_CONVERT;
			
 
				++			*descp |= ctx->jit_index;
			
 
				++		}
			
 
				++
			
 
				++		ret = build_insn(insn, ctx);
			
 
				++		if (ret < 0)
			
 
				++			return ret;
			
 
				++
			
 
				++		if (ret > 0) {
			
 
				++			i++;
			
 
				++			if (ctx->target == NULL)
			
 
				++				descp[1] = ctx->jit_index;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	/* Store the end offset, where the epilogue begins */
			
 
				++	ctx->descriptors[prog->len] = ctx->jit_index;
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++/* Set the branch conversion flag on all instructions */
			
 
				++static void set_convert_flag(struct jit_context *ctx, bool enable)
			
 
				++{
			
 
				++	const struct bpf_prog *prog = ctx->program;
			
 
				++	u32 flag = enable ? JIT_DESC_CONVERT : 0;
			
 
				++	unsigned int i;
			
 
				++
			
 
				++	for (i = 0; i <= prog->len; i++)
			
 
				++		ctx->descriptors[i] = INDEX(ctx->descriptors[i]) | flag;
			
 
				++}
			
 
				++
			
 
				++static void jit_fill_hole(void *area, unsigned int size)
			
 
				++{
			
 
				++	u32 *p;
			
 
				++
			
 
				++	/* We are guaranteed to have aligned memory. */
			
 
				++	for (p = area; size >= sizeof(u32); size -= sizeof(u32))
			
 
				++		uasm_i_break(&p, BRK_BUG); /* Increments p */
			
 
				++}
			
 
				++
			
 
				++bool bpf_jit_needs_zext(void)
			
 
				++{
			
 
				++	return true;
			
 
				++}
			
 
				++
			
 
				++struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
			
 
				++{
			
 
				++	struct bpf_prog *tmp, *orig_prog = prog;
			
 
				++	struct bpf_binary_header *header = NULL;
			
 
				++	struct jit_context ctx;
			
 
				++	bool tmp_blinded = false;
			
 
				++	unsigned int tmp_idx;
			
 
				++	unsigned int image_size;
			
 
				++	u8 *image_ptr;
			
 
				++	int tries;
			
 
				++
			
 
				++	/*
			
 
				++	 * If BPF JIT was not enabled then we must fall back to
			
 
				++	 * the interpreter.
			
 
				++	 */
			
 
				++	if (!prog->jit_requested)
			
 
				++		return orig_prog;
			
 
				++	/*
			
 
				++	 * If constant blinding was enabled and we failed during blinding
			
 
				++	 * then we must fall back to the interpreter. Otherwise, we save
			
 
				++	 * the new JITed code.
			
 
				++	 */
			
 
				++	tmp = bpf_jit_blind_constants(prog);
			
 
				++	if (IS_ERR(tmp))
			
 
				++		return orig_prog;
			
 
				++	if (tmp != prog) {
			
 
				++		tmp_blinded = true;
			
 
				++		prog = tmp;
			
 
				++	}
			
 
				++
			
 
				++	memset(&ctx, 0, sizeof(ctx));
			
 
				++	ctx.program = prog;
			
 
				++
			
 
				++	/*
			
 
				++	 * Not able to allocate memory for descriptors[], then
			
 
				++	 * we must fall back to the interpreter
			
 
				++	 */
			
 
				++	ctx.descriptors = kcalloc(prog->len + 1, sizeof(*ctx.descriptors),
			
 
				++				  GFP_KERNEL);
			
 
				++	if (ctx.descriptors == NULL)
			
 
				++		goto out_err;
			
 
				++
			
 
				++	/* First pass discovers used resources */
			
 
				++	if (build_body(&ctx) < 0)
			
 
				++		goto out_err;
			
 
				++	/*
			
 
				++	 * Second pass computes instruction offsets.
			
 
				++	 * If any PC-relative branches are out of range, a sequence of
			
 
				++	 * a PC-relative branch + a jump is generated, and we have to
			
 
				++	 * try again from the beginning to generate the new offsets.
			
 
				++	 * This is done until no additional conversions are necessary.
			
 
				++	 * The last two iterations are done with all branches being
			
 
				++	 * converted, to guarantee offset table convergence within a
			
 
				++	 * fixed number of iterations.
			
 
				++	 */
			
 
				++	ctx.jit_index = 0;
			
 
				++	build_prologue(&ctx);
			
 
				++	tmp_idx = ctx.jit_index;
			
 
				++
			
 
				++	tries = JIT_MAX_ITERATIONS;
			
 
				++	do {
			
 
				++		ctx.jit_index = tmp_idx;
			
 
				++		ctx.changes = 0;
			
 
				++		if (tries == 2)
			
 
				++			set_convert_flag(&ctx, true);
			
 
				++		if (build_body(&ctx) < 0)
			
 
				++			goto out_err;
			
 
				++	} while (ctx.changes > 0 && --tries > 0);
			
 
				++
			
 
				++	if (WARN_ONCE(ctx.changes > 0, "JIT offsets failed to converge"))
			
 
				++		goto out_err;
			
 
				++
			
 
				++	build_epilogue(&ctx, MIPS_R_RA);
			
 
				++
			
 
				++	/* Now we know the size of the structure to make */
			
 
				++	image_size = sizeof(u32) * ctx.jit_index;
			
 
				++	header = bpf_jit_binary_alloc(image_size, &image_ptr,
			
 
				++				      sizeof(u32), jit_fill_hole);
			
 
				++	/*
			
 
				++	 * Not able to allocate memory for the structure then
			
 
				++	 * we must fall back to the interpretation
			
 
				++	 */
			
 
				++	if (header == NULL)
			
 
				++		goto out_err;
			
 
				++
			
 
				++	/* Actual pass to generate final JIT code */
			
 
				++	ctx.target = (u32 *)image_ptr;
			
 
				++	ctx.jit_index = 0;
			
 
				++
			
 
				++	/*
			
 
				++	 * If building the JITed code fails somehow,
			
 
				++	 * we fall back to the interpretation.
			
 
				++	 */
			
 
				++	build_prologue(&ctx);
			
 
				++	if (build_body(&ctx) < 0)
			
 
				++		goto out_err;
			
 
				++	build_epilogue(&ctx, MIPS_R_RA);
			
 
				++
			
 
				++	/* Populate line info meta data */
			
 
				++	set_convert_flag(&ctx, false);
			
 
				++	bpf_prog_fill_jited_linfo(prog, &ctx.descriptors[1]);
			
 
				++
			
 
				++	/* Set as read-only exec and flush instruction cache */
			
 
				++	bpf_jit_binary_lock_ro(header);
			
 
				++	flush_icache_range((unsigned long)header,
			
 
				++			   (unsigned long)&ctx.target[ctx.jit_index]);
			
 
				++
			
 
				++	if (bpf_jit_enable > 1)
			
 
				++		bpf_jit_dump(prog->len, image_size, 2, ctx.target);
			
 
				++
			
 
				++	prog->bpf_func = (void *)ctx.target;
			
 
				++	prog->jited = 1;
			
 
				++	prog->jited_len = image_size;
			
 
				++
			
 
				++out:
			
 
				++	if (tmp_blinded)
			
 
				++		bpf_jit_prog_release_other(prog, prog == orig_prog ?
			
 
				++					   tmp : orig_prog);
			
 
				++	kfree(ctx.descriptors);
			
 
				++	return prog;
			
 
				++
			
 
				++out_err:
			
 
				++	prog = orig_prog;
			
 
				++	if (header)
			
 
				++		bpf_jit_binary_free(header);
			
 
				++	goto out;
			
 
				++}
			
 
				+--- /dev/null
			
 
				++++ b/arch/mips/net/bpf_jit_comp.h
			
 
				+@@ -0,0 +1,211 @@
			
 
				++/* SPDX-License-Identifier: GPL-2.0-only */
			
 
				++/*
			
 
				++ * Just-In-Time compiler for eBPF bytecode on 32-bit and 64-bit MIPS.
			
 
				++ *
			
 
				++ * Copyright (c) 2021 Anyfi Networks AB.
			
 
				++ * Author: Johan Almbladh <[email protected]>
			
 
				++ *
			
 
				++ * Based on code and ideas from
			
 
				++ * Copyright (c) 2017 Cavium, Inc.
			
 
				++ * Copyright (c) 2017 Shubham Bansal <[email protected]>
			
 
				++ * Copyright (c) 2011 Mircea Gherzan <[email protected]>
			
 
				++ */
			
 
				++
			
 
				++#ifndef _BPF_JIT_COMP_H
			
 
				++#define _BPF_JIT_COMP_H
			
 
				++
			
 
				++/* MIPS registers */
			
 
				++#define MIPS_R_ZERO	0   /* Const zero */
			
 
				++#define MIPS_R_AT	1   /* Asm temp   */
			
 
				++#define MIPS_R_V0	2   /* Result     */
			
 
				++#define MIPS_R_V1	3   /* Result     */
			
 
				++#define MIPS_R_A0	4   /* Argument   */
			
 
				++#define MIPS_R_A1	5   /* Argument   */
			
 
				++#define MIPS_R_A2	6   /* Argument   */
			
 
				++#define MIPS_R_A3	7   /* Argument   */
			
 
				++#define MIPS_R_A4	8   /* Arg (n64)  */
			
 
				++#define MIPS_R_A5	9   /* Arg (n64)  */
			
 
				++#define MIPS_R_A6	10  /* Arg (n64)  */
			
 
				++#define MIPS_R_A7	11  /* Arg (n64)  */
			
 
				++#define MIPS_R_T0	8   /* Temp (o32) */
			
 
				++#define MIPS_R_T1	9   /* Temp (o32) */
			
 
				++#define MIPS_R_T2	10  /* Temp (o32) */
			
 
				++#define MIPS_R_T3	11  /* Temp (o32) */
			
 
				++#define MIPS_R_T4	12  /* Temporary  */
			
 
				++#define MIPS_R_T5	13  /* Temporary  */
			
 
				++#define MIPS_R_T6	14  /* Temporary  */
			
 
				++#define MIPS_R_T7	15  /* Temporary  */
			
 
				++#define MIPS_R_S0	16  /* Saved      */
			
 
				++#define MIPS_R_S1	17  /* Saved      */
			
 
				++#define MIPS_R_S2	18  /* Saved      */
			
 
				++#define MIPS_R_S3	19  /* Saved      */
			
 
				++#define MIPS_R_S4	20  /* Saved      */
			
 
				++#define MIPS_R_S5	21  /* Saved      */
			
 
				++#define MIPS_R_S6	22  /* Saved      */
			
 
				++#define MIPS_R_S7	23  /* Saved      */
			
 
				++#define MIPS_R_T8	24  /* Temporary  */
			
 
				++#define MIPS_R_T9	25  /* Temporary  */
			
 
				++/*      MIPS_R_K0	26     Reserved   */
			
 
				++/*      MIPS_R_K1	27     Reserved   */
			
 
				++#define MIPS_R_GP	28  /* Global ptr */
			
 
				++#define MIPS_R_SP	29  /* Stack ptr  */
			
 
				++#define MIPS_R_FP	30  /* Frame ptr  */
			
 
				++#define MIPS_R_RA	31  /* Return     */
			
 
				++
			
 
				++/*
			
 
				++ * Jump address mask for immediate jumps. The four most significant bits
			
 
				++ * must be equal to PC.
			
 
				++ */
			
 
				++#define MIPS_JMP_MASK	0x0fffffffUL
			
 
				++
			
 
				++/* Maximum number of iterations in offset table computation */
			
 
				++#define JIT_MAX_ITERATIONS	8
			
 
				++
			
 
				++/*
			
 
				++ * Jump pseudo-instructions used internally
			
 
				++ * for branch conversion and branch optimization.
			
 
				++ */
			
 
				++#define JIT_JNSET	0xe0
			
 
				++#define JIT_JNOP	0xf0
			
 
				++
			
 
				++/* Descriptor flag for PC-relative branch conversion */
			
 
				++#define JIT_DESC_CONVERT	BIT(31)
			
 
				++
			
 
				++/* JIT context for an eBPF program */
			
 
				++struct jit_context {
			
 
				++	struct bpf_prog *program;     /* The eBPF program being JITed        */
			
 
				++	u32 *descriptors;             /* eBPF to JITed CPU insn descriptors  */
			
 
				++	u32 *target;                  /* JITed code buffer                   */
			
 
				++	u32 bpf_index;                /* Index of current BPF program insn   */
			
 
				++	u32 jit_index;                /* Index of current JIT target insn    */
			
 
				++	u32 changes;                  /* Number of PC-relative branch conv   */
			
 
				++	u32 accessed;                 /* Bit mask of read eBPF registers     */
			
 
				++	u32 clobbered;                /* Bit mask of modified CPU registers  */
			
 
				++	u32 stack_size;               /* Total allocated stack size in bytes */
			
 
				++	u32 saved_size;               /* Size of callee-saved registers      */
			
 
				++	u32 stack_used;               /* Stack size used for function calls  */
			
 
				++};
			
 
				++
			
 
				++/* Emit the instruction if the JIT memory space has been allocated */
			
 
				++#define emit(ctx, func, ...)					\
			
 
				++do {								\
			
 
				++	if ((ctx)->target != NULL) {				\
			
 
				++		u32 *p = &(ctx)->target[ctx->jit_index];	\
			
 
				++		uasm_i_##func(&p, ##__VA_ARGS__);		\
			
 
				++	}							\
			
 
				++	(ctx)->jit_index++;					\
			
 
				++} while (0)
			
 
				++
			
 
				++/*
			
 
				++ * Mark a BPF register as accessed, it needs to be
			
 
				++ * initialized by the program if expected, e.g. FP.
			
 
				++ */
			
 
				++static inline void access_reg(struct jit_context *ctx, u8 reg)
			
 
				++{
			
 
				++	ctx->accessed |= BIT(reg);
			
 
				++}
			
 
				++
			
 
				++/*
			
 
				++ * Mark a CPU register as clobbered, it needs to be
			
 
				++ * saved/restored by the program if callee-saved.
			
 
				++ */
			
 
				++static inline void clobber_reg(struct jit_context *ctx, u8 reg)
			
 
				++{
			
 
				++	ctx->clobbered |= BIT(reg);
			
 
				++}
			
 
				++
			
 
				++/*
			
 
				++ * Push registers on the stack, starting at a given depth from the stack
			
 
				++ * pointer and increasing. The next depth to be written is returned.
			
 
				++ */
			
 
				++int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth);
			
 
				++
			
 
				++/*
			
 
				++ * Pop registers from the stack, starting at a given depth from the stack
			
 
				++ * pointer and increasing. The next depth to be read is returned.
			
 
				++ */
			
 
				++int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth);
			
 
				++
			
 
				++/* Compute the 28-bit jump target address from a BPF program location */
			
 
				++int get_target(struct jit_context *ctx, u32 loc);
			
 
				++
			
 
				++/* Compute the PC-relative offset to relative BPF program offset */
			
 
				++int get_offset(const struct jit_context *ctx, int off);
			
 
				++
			
 
				++/* dst = imm (32-bit) */
			
 
				++void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm);
			
 
				++
			
 
				++/* dst = src (32-bit) */
			
 
				++void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src);
			
 
				++
			
 
				++/* Validate ALU/ALU64 immediate range */
			
 
				++bool valid_alu_i(u8 op, s32 imm);
			
 
				++
			
 
				++/* Rewrite ALU/ALU64 immediate operation */
			
 
				++bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val);
			
 
				++
			
 
				++/* ALU immediate operation (32-bit) */
			
 
				++void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op);
			
 
				++
			
 
				++/* ALU register operation (32-bit) */
			
 
				++void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op);
			
 
				++
			
 
				++/* Atomic read-modify-write (32-bit) */
			
 
				++void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code);
			
 
				++
			
 
				++/* Atomic compare-and-exchange (32-bit) */
			
 
				++void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off);
			
 
				++
			
 
				++/* Swap bytes and truncate a register word or half word */
			
 
				++void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width);
			
 
				++
			
 
				++/* Validate JMP/JMP32 immediate range */
			
 
				++bool valid_jmp_i(u8 op, s32 imm);
			
 
				++
			
 
				++/* Prepare a PC-relative jump operation with immediate conditional */
			
 
				++void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width,
			
 
				++		 u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off);
			
 
				++
			
 
				++/* Prepare a PC-relative jump operation with register conditional */
			
 
				++void setup_jmp_r(struct jit_context *ctx, bool same_reg,
			
 
				++		 u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off);
			
 
				++
			
 
				++/* Finish a PC-relative jump operation */
			
 
				++int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off);
			
 
				++
			
 
				++/* Conditional JMP/JMP32 immediate */
			
 
				++void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op);
			
 
				++
			
 
				++/* Conditional JMP/JMP32 register */
			
 
				++void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op);
			
 
				++
			
 
				++/* Jump always */
			
 
				++int emit_ja(struct jit_context *ctx, s16 off);
			
 
				++
			
 
				++/* Jump to epilogue */
			
 
				++int emit_exit(struct jit_context *ctx);
			
 
				++
			
 
				++/*
			
 
				++ * Build program prologue to set up the stack and registers.
			
 
				++ * This function is implemented separately for 32-bit and 64-bit JITs.
			
 
				++ */
			
 
				++void build_prologue(struct jit_context *ctx);
			
 
				++
			
 
				++/*
			
 
				++ * Build the program epilogue to restore the stack and registers.
			
 
				++ * This function is implemented separately for 32-bit and 64-bit JITs.
			
 
				++ */
			
 
				++void build_epilogue(struct jit_context *ctx, int dest_reg);
			
 
				++
			
 
				++/*
			
 
				++ * Convert an eBPF instruction to native instruction, i.e
			
 
				++ * JITs an eBPF instruction.
			
 
				++ * Returns :
			
 
				++ *	0  - Successfully JITed an 8-byte eBPF instruction
			
 
				++ *	>0 - Successfully JITed a 16-byte eBPF instruction
			
 
				++ *	<0 - Failed to JIT.
			
 
				++ * This function is implemented separately for 32-bit and 64-bit JITs.
			
 
				++ */
			
 
				++int build_insn(const struct bpf_insn *insn, struct jit_context *ctx);
			
 
				++
			
 
				++#endif /* _BPF_JIT_COMP_H */
			
 
				+--- /dev/null
			
 
				++++ b/arch/mips/net/bpf_jit_comp32.c
			
 
				+@@ -0,0 +1,1741 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-only
			
 
				++/*
			
 
				++ * Just-In-Time compiler for eBPF bytecode on MIPS.
			
 
				++ * Implementation of JIT functions for 32-bit CPUs.
			
 
				++ *
			
 
				++ * Copyright (c) 2021 Anyfi Networks AB.
			
 
				++ * Author: Johan Almbladh <[email protected]>
			
 
				++ *
			
 
				++ * Based on code and ideas from
			
 
				++ * Copyright (c) 2017 Cavium, Inc.
			
 
				++ * Copyright (c) 2017 Shubham Bansal <[email protected]>
			
 
				++ * Copyright (c) 2011 Mircea Gherzan <[email protected]>
			
 
				++ */
			
 
				++
			
 
				++#include <linux/math64.h>
			
 
				++#include <linux/errno.h>
			
 
				++#include <linux/filter.h>
			
 
				++#include <linux/bpf.h>
			
 
				++#include <asm/cpu-features.h>
			
 
				++#include <asm/isa-rev.h>
			
 
				++#include <asm/uasm.h>
			
 
				++
			
 
				++#include "bpf_jit_comp.h"
			
 
				++
			
 
				++/* MIPS a4-a7 are not available in the o32 ABI */
			
 
				++#undef MIPS_R_A4
			
 
				++#undef MIPS_R_A5
			
 
				++#undef MIPS_R_A6
			
 
				++#undef MIPS_R_A7
			
 
				++
			
 
				++/* Stack is 8-byte aligned in o32 ABI */
			
 
				++#define MIPS_STACK_ALIGNMENT 8
			
 
				++
			
 
				++/*
			
 
				++ * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI.
			
 
				++ * This corresponds to stack space for register arguments a0-a3.
			
 
				++ */
			
 
				++#define JIT_RESERVED_STACK 16
			
 
				++
			
 
				++/* Temporary 64-bit register used by JIT */
			
 
				++#define JIT_REG_TMP MAX_BPF_JIT_REG
			
 
				++
			
 
				++/*
			
 
				++ * Number of prologue bytes to skip when doing a tail call.
			
 
				++ * Tail call count (TCC) initialization (8 bytes) always, plus
			
 
				++ * R0-to-v0 assignment (4 bytes) if big endian.
			
 
				++ */
			
 
				++#ifdef __BIG_ENDIAN
			
 
				++#define JIT_TCALL_SKIP 12
			
 
				++#else
			
 
				++#define JIT_TCALL_SKIP 8
			
 
				++#endif
			
 
				++
			
 
				++/* CPU registers holding the callee return value */
			
 
				++#define JIT_RETURN_REGS	  \
			
 
				++	(BIT(MIPS_R_V0) | \
			
 
				++	 BIT(MIPS_R_V1))
			
 
				++
			
 
				++/* CPU registers arguments passed to callee directly */
			
 
				++#define JIT_ARG_REGS      \
			
 
				++	(BIT(MIPS_R_A0) | \
			
 
				++	 BIT(MIPS_R_A1) | \
			
 
				++	 BIT(MIPS_R_A2) | \
			
 
				++	 BIT(MIPS_R_A3))
			
 
				++
			
 
				++/* CPU register arguments passed to callee on stack */
			
 
				++#define JIT_STACK_REGS    \
			
 
				++	(BIT(MIPS_R_T0) | \
			
 
				++	 BIT(MIPS_R_T1) | \
			
 
				++	 BIT(MIPS_R_T2) | \
			
 
				++	 BIT(MIPS_R_T3) | \
			
 
				++	 BIT(MIPS_R_T4) | \
			
 
				++	 BIT(MIPS_R_T5))
			
 
				++
			
 
				++/* Caller-saved CPU registers */
			
 
				++#define JIT_CALLER_REGS    \
			
 
				++	(JIT_RETURN_REGS | \
			
 
				++	 JIT_ARG_REGS    | \
			
 
				++	 JIT_STACK_REGS)
			
 
				++
			
 
				++/* Callee-saved CPU registers */
			
 
				++#define JIT_CALLEE_REGS   \
			
 
				++	(BIT(MIPS_R_S0) | \
			
 
				++	 BIT(MIPS_R_S1) | \
			
 
				++	 BIT(MIPS_R_S2) | \
			
 
				++	 BIT(MIPS_R_S3) | \
			
 
				++	 BIT(MIPS_R_S4) | \
			
 
				++	 BIT(MIPS_R_S5) | \
			
 
				++	 BIT(MIPS_R_S6) | \
			
 
				++	 BIT(MIPS_R_S7) | \
			
 
				++	 BIT(MIPS_R_GP) | \
			
 
				++	 BIT(MIPS_R_FP) | \
			
 
				++	 BIT(MIPS_R_RA))
			
 
				++
			
 
				++/*
			
 
				++ * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers.
			
 
				++ *
			
 
				++ * 1) Native register pairs are ordered according to CPU endiannes, following
			
 
				++ *    the MIPS convention for passing 64-bit arguments and return values.
			
 
				++ * 2) The eBPF return value, arguments and callee-saved registers are mapped
			
 
				++ *    to their native MIPS equivalents.
			
 
				++ * 3) Since the 32 highest bits in the eBPF FP register are always zero,
			
 
				++ *    only one general-purpose register is actually needed for the mapping.
			
 
				++ *    We use the fp register for this purpose, and map the highest bits to
			
 
				++ *    the MIPS register r0 (zero).
			
 
				++ * 4) We use the MIPS gp and at registers as internal temporary registers
			
 
				++ *    for constant blinding. The gp register is callee-saved.
			
 
				++ * 5) One 64-bit temporary register is mapped for use when sign-extending
			
 
				++ *    immediate operands. MIPS registers t6-t9 are available to the JIT
			
 
				++ *    for as temporaries when implementing complex 64-bit operations.
			
 
				++ *
			
 
				++ * With this scheme all eBPF registers are being mapped to native MIPS
			
 
				++ * registers without having to use any stack scratch space. The direct
			
 
				++ * register mapping (2) simplifies the handling of function calls.
			
 
				++ */
			
 
				++static const u8 bpf2mips32[][2] = {
			
 
				++	/* Return value from in-kernel function, and exit value from eBPF */
			
 
				++	[BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0},
			
 
				++	/* Arguments from eBPF program to in-kernel function */
			
 
				++	[BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0},
			
 
				++	[BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2},
			
 
				++	/* Remaining arguments, to be passed on the stack per O32 ABI */
			
 
				++	[BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0},
			
 
				++	[BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2},
			
 
				++	[BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4},
			
 
				++	/* Callee-saved registers that in-kernel function will preserve */
			
 
				++	[BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0},
			
 
				++	[BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2},
			
 
				++	[BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4},
			
 
				++	[BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6},
			
 
				++	/* Read-only frame pointer to access the eBPF stack */
			
 
				++#ifdef __BIG_ENDIAN
			
 
				++	[BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO},
			
 
				++#else
			
 
				++	[BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP},
			
 
				++#endif
			
 
				++	/* Temporary register for blinding constants */
			
 
				++	[BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT},
			
 
				++	/* Temporary register for internal JIT use */
			
 
				++	[JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6},
			
 
				++};
			
 
				++
			
 
				++/* Get low CPU register for a 64-bit eBPF register mapping */
			
 
				++static inline u8 lo(const u8 reg[])
			
 
				++{
			
 
				++#ifdef __BIG_ENDIAN
			
 
				++	return reg[0];
			
 
				++#else
			
 
				++	return reg[1];
			
 
				++#endif
			
 
				++}
			
 
				++
			
 
				++/* Get high CPU register for a 64-bit eBPF register mapping */
			
 
				++static inline u8 hi(const u8 reg[])
			
 
				++{
			
 
				++#ifdef __BIG_ENDIAN
			
 
				++	return reg[1];
			
 
				++#else
			
 
				++	return reg[0];
			
 
				++#endif
			
 
				++}
			
 
				++
			
 
				++/*
			
 
				++ * Mark a 64-bit CPU register pair as clobbered, it needs to be
			
 
				++ * saved/restored by the program if callee-saved.
			
 
				++ */
			
 
				++static void clobber_reg64(struct jit_context *ctx, const u8 reg[])
			
 
				++{
			
 
				++	clobber_reg(ctx, reg[0]);
			
 
				++	clobber_reg(ctx, reg[1]);
			
 
				++}
			
 
				++
			
 
				++/* dst = imm (sign-extended) */
			
 
				++static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
			
 
				++{
			
 
				++	emit_mov_i(ctx, lo(dst), imm);
			
 
				++	if (imm < 0)
			
 
				++		emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
			
 
				++	else
			
 
				++		emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Zero extension, if verifier does not do it for us  */
			
 
				++static void emit_zext_ver(struct jit_context *ctx, const u8 dst[])
			
 
				++{
			
 
				++	if (!ctx->program->aux->verifier_zext) {
			
 
				++		emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++		clobber_reg(ctx, hi(dst));
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* Load delay slot, if ISA mandates it */
			
 
				++static void emit_load_delay(struct jit_context *ctx)
			
 
				++{
			
 
				++	if (!cpu_has_mips_2_3_4_5_r)
			
 
				++		emit(ctx, nop);
			
 
				++}
			
 
				++
			
 
				++/* ALU immediate operation (64-bit) */
			
 
				++static void emit_alu_i64(struct jit_context *ctx,
			
 
				++			 const u8 dst[], s32 imm, u8 op)
			
 
				++{
			
 
				++	u8 src = MIPS_R_T6;
			
 
				++
			
 
				++	/*
			
 
				++	 * ADD/SUB with all but the max negative imm can be handled by
			
 
				++	 * inverting the operation and the imm value, saving one insn.
			
 
				++	 */
			
 
				++	if (imm > S32_MIN && imm < 0)
			
 
				++		switch (op) {
			
 
				++		case BPF_ADD:
			
 
				++			op = BPF_SUB;
			
 
				++			imm = -imm;
			
 
				++			break;
			
 
				++		case BPF_SUB:
			
 
				++			op = BPF_ADD;
			
 
				++			imm = -imm;
			
 
				++			break;
			
 
				++		}
			
 
				++
			
 
				++	/* Move immediate to temporary register */
			
 
				++	emit_mov_i(ctx, src, imm);
			
 
				++
			
 
				++	switch (op) {
			
 
				++	/* dst = dst + imm */
			
 
				++	case BPF_ADD:
			
 
				++		emit(ctx, addu, lo(dst), lo(dst), src);
			
 
				++		emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
			
 
				++		emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
			
 
				++		if (imm < 0)
			
 
				++			emit(ctx, addiu, hi(dst), hi(dst), -1);
			
 
				++		break;
			
 
				++	/* dst = dst - imm */
			
 
				++	case BPF_SUB:
			
 
				++		emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
			
 
				++		emit(ctx, subu, lo(dst), lo(dst), src);
			
 
				++		emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
			
 
				++		if (imm < 0)
			
 
				++			emit(ctx, addiu, hi(dst), hi(dst), 1);
			
 
				++		break;
			
 
				++	/* dst = dst | imm */
			
 
				++	case BPF_OR:
			
 
				++		emit(ctx, or, lo(dst), lo(dst), src);
			
 
				++		if (imm < 0)
			
 
				++			emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
			
 
				++		break;
			
 
				++	/* dst = dst & imm */
			
 
				++	case BPF_AND:
			
 
				++		emit(ctx, and, lo(dst), lo(dst), src);
			
 
				++		if (imm >= 0)
			
 
				++			emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++		break;
			
 
				++	/* dst = dst ^ imm */
			
 
				++	case BPF_XOR:
			
 
				++		emit(ctx, xor, lo(dst), lo(dst), src);
			
 
				++		if (imm < 0) {
			
 
				++			emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
			
 
				++			emit(ctx, addiu, hi(dst), hi(dst), -1);
			
 
				++		}
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* ALU register operation (64-bit) */
			
 
				++static void emit_alu_r64(struct jit_context *ctx,
			
 
				++			 const u8 dst[], const u8 src[], u8 op)
			
 
				++{
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	/* dst = dst + src */
			
 
				++	case BPF_ADD:
			
 
				++		if (src == dst) {
			
 
				++			emit(ctx, srl, MIPS_R_T9, lo(dst), 31);
			
 
				++			emit(ctx, addu, lo(dst), lo(dst), lo(dst));
			
 
				++		} else {
			
 
				++			emit(ctx, addu, lo(dst), lo(dst), lo(src));
			
 
				++			emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
			
 
				++		}
			
 
				++		emit(ctx, addu, hi(dst), hi(dst), hi(src));
			
 
				++		emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
			
 
				++		break;
			
 
				++	/* dst = dst - src */
			
 
				++	case BPF_SUB:
			
 
				++		emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
			
 
				++		emit(ctx, subu, lo(dst), lo(dst), lo(src));
			
 
				++		emit(ctx, subu, hi(dst), hi(dst), hi(src));
			
 
				++		emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
			
 
				++		break;
			
 
				++	/* dst = dst | src */
			
 
				++	case BPF_OR:
			
 
				++		emit(ctx, or, lo(dst), lo(dst), lo(src));
			
 
				++		emit(ctx, or, hi(dst), hi(dst), hi(src));
			
 
				++		break;
			
 
				++	/* dst = dst & src */
			
 
				++	case BPF_AND:
			
 
				++		emit(ctx, and, lo(dst), lo(dst), lo(src));
			
 
				++		emit(ctx, and, hi(dst), hi(dst), hi(src));
			
 
				++		break;
			
 
				++	/* dst = dst ^ src */
			
 
				++	case BPF_XOR:
			
 
				++		emit(ctx, xor, lo(dst), lo(dst), lo(src));
			
 
				++		emit(ctx, xor, hi(dst), hi(dst), hi(src));
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* ALU invert (64-bit) */
			
 
				++static void emit_neg_i64(struct jit_context *ctx, const u8 dst[])
			
 
				++{
			
 
				++	emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst));
			
 
				++	emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst));
			
 
				++	emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
			
 
				++	emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
			
 
				++
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* ALU shift immediate (64-bit) */
			
 
				++static void emit_shift_i64(struct jit_context *ctx,
			
 
				++			   const u8 dst[], u32 imm, u8 op)
			
 
				++{
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	/* dst = dst << imm */
			
 
				++	case BPF_LSH:
			
 
				++		if (imm < 32) {
			
 
				++			emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm);
			
 
				++			emit(ctx, sll, lo(dst), lo(dst), imm);
			
 
				++			emit(ctx, sll, hi(dst), hi(dst), imm);
			
 
				++			emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9);
			
 
				++		} else {
			
 
				++			emit(ctx, sll, hi(dst), lo(dst), imm - 32);
			
 
				++			emit(ctx, move, lo(dst), MIPS_R_ZERO);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* dst = dst >> imm */
			
 
				++	case BPF_RSH:
			
 
				++		if (imm < 32) {
			
 
				++			emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
			
 
				++			emit(ctx, srl, lo(dst), lo(dst), imm);
			
 
				++			emit(ctx, srl, hi(dst), hi(dst), imm);
			
 
				++			emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
			
 
				++		} else {
			
 
				++			emit(ctx, srl, lo(dst), hi(dst), imm - 32);
			
 
				++			emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* dst = dst >> imm (arithmetic) */
			
 
				++	case BPF_ARSH:
			
 
				++		if (imm < 32) {
			
 
				++			emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
			
 
				++			emit(ctx, srl, lo(dst), lo(dst), imm);
			
 
				++			emit(ctx, sra, hi(dst), hi(dst), imm);
			
 
				++			emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
			
 
				++		} else {
			
 
				++			emit(ctx, sra, lo(dst), hi(dst), imm - 32);
			
 
				++			emit(ctx, sra, hi(dst), hi(dst), 31);
			
 
				++		}
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* ALU shift register (64-bit) */
			
 
				++static void emit_shift_r64(struct jit_context *ctx,
			
 
				++			   const u8 dst[], u8 src, u8 op)
			
 
				++{
			
 
				++	u8 t1 = MIPS_R_T8;
			
 
				++	u8 t2 = MIPS_R_T9;
			
 
				++
			
 
				++	emit(ctx, andi, t1, src, 32);              /* t1 = src & 32          */
			
 
				++	emit(ctx, beqz, t1, 16);                   /* PC += 16 if t1 == 0    */
			
 
				++	emit(ctx, nor, t2, src, MIPS_R_ZERO);      /* t2 = ~src (delay slot) */
			
 
				++
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	/* dst = dst << src */
			
 
				++	case BPF_LSH:
			
 
				++		/* Next: shift >= 32 */
			
 
				++		emit(ctx, sllv, hi(dst), lo(dst), src);    /* dh = dl << src */
			
 
				++		emit(ctx, move, lo(dst), MIPS_R_ZERO);     /* dl = 0         */
			
 
				++		emit(ctx, b, 20);                          /* PC += 20       */
			
 
				++		/* +16: shift < 32 */
			
 
				++		emit(ctx, srl, t1, lo(dst), 1);            /* t1 = dl >> 1   */
			
 
				++		emit(ctx, srlv, t1, t1, t2);               /* t1 = t1 >> t2  */
			
 
				++		emit(ctx, sllv, lo(dst), lo(dst), src);    /* dl = dl << src */
			
 
				++		emit(ctx, sllv, hi(dst), hi(dst), src);    /* dh = dh << src */
			
 
				++		emit(ctx, or, hi(dst), hi(dst), t1);       /* dh = dh | t1   */
			
 
				++		break;
			
 
				++	/* dst = dst >> src */
			
 
				++	case BPF_RSH:
			
 
				++		/* Next: shift >= 32 */
			
 
				++		emit(ctx, srlv, lo(dst), hi(dst), src);    /* dl = dh >> src */
			
 
				++		emit(ctx, move, hi(dst), MIPS_R_ZERO);     /* dh = 0         */
			
 
				++		emit(ctx, b, 20);                          /* PC += 20       */
			
 
				++		/* +16: shift < 32 */
			
 
				++		emit(ctx, sll, t1, hi(dst), 1);            /* t1 = dl << 1   */
			
 
				++		emit(ctx, sllv, t1, t1, t2);               /* t1 = t1 << t2  */
			
 
				++		emit(ctx, srlv, lo(dst), lo(dst), src);    /* dl = dl >> src */
			
 
				++		emit(ctx, srlv, hi(dst), hi(dst), src);    /* dh = dh >> src */
			
 
				++		emit(ctx, or, lo(dst), lo(dst), t1);       /* dl = dl | t1   */
			
 
				++		break;
			
 
				++	/* dst = dst >> src (arithmetic) */
			
 
				++	case BPF_ARSH:
			
 
				++		/* Next: shift >= 32 */
			
 
				++		emit(ctx, srav, lo(dst), hi(dst), src);   /* dl = dh >>a src */
			
 
				++		emit(ctx, sra, hi(dst), hi(dst), 31);     /* dh = dh >>a 31  */
			
 
				++		emit(ctx, b, 20);                         /* PC += 20        */
			
 
				++		/* +16: shift < 32 */
			
 
				++		emit(ctx, sll, t1, hi(dst), 1);           /* t1 = dl << 1    */
			
 
				++		emit(ctx, sllv, t1, t1, t2);              /* t1 = t1 << t2   */
			
 
				++		emit(ctx, srlv, lo(dst), lo(dst), src);   /* dl = dl >>a src */
			
 
				++		emit(ctx, srav, hi(dst), hi(dst), src);   /* dh = dh >> src  */
			
 
				++		emit(ctx, or, lo(dst), lo(dst), t1);      /* dl = dl | t1    */
			
 
				++		break;
			
 
				++	}
			
 
				++
			
 
				++	/* +20: Done */
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* ALU mul immediate (64x32-bit) */
			
 
				++static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
			
 
				++{
			
 
				++	u8 src = MIPS_R_T6;
			
 
				++	u8 tmp = MIPS_R_T9;
			
 
				++
			
 
				++	switch (imm) {
			
 
				++	/* dst = dst * 1 is a no-op */
			
 
				++	case 1:
			
 
				++		break;
			
 
				++	/* dst = dst * -1 */
			
 
				++	case -1:
			
 
				++		emit_neg_i64(ctx, dst);
			
 
				++		break;
			
 
				++	case 0:
			
 
				++		emit_mov_r(ctx, lo(dst), MIPS_R_ZERO);
			
 
				++		emit_mov_r(ctx, hi(dst), MIPS_R_ZERO);
			
 
				++		break;
			
 
				++	/* Full 64x32 multiply */
			
 
				++	default:
			
 
				++		/* hi(dst) = hi(dst) * src(imm) */
			
 
				++		emit_mov_i(ctx, src, imm);
			
 
				++		if (cpu_has_mips32r1 || cpu_has_mips32r6) {
			
 
				++			emit(ctx, mul, hi(dst), hi(dst), src);
			
 
				++		} else {
			
 
				++			emit(ctx, multu, hi(dst), src);
			
 
				++			emit(ctx, mflo, hi(dst));
			
 
				++		}
			
 
				++
			
 
				++		/* hi(dst) = hi(dst) - lo(dst) */
			
 
				++		if (imm < 0)
			
 
				++			emit(ctx, subu, hi(dst), hi(dst), lo(dst));
			
 
				++
			
 
				++		/* tmp = lo(dst) * src(imm) >> 32 */
			
 
				++		/* lo(dst) = lo(dst) * src(imm) */
			
 
				++		if (cpu_has_mips32r6) {
			
 
				++			emit(ctx, muhu, tmp, lo(dst), src);
			
 
				++			emit(ctx, mulu, lo(dst), lo(dst), src);
			
 
				++		} else {
			
 
				++			emit(ctx, multu, lo(dst), src);
			
 
				++			emit(ctx, mflo, lo(dst));
			
 
				++			emit(ctx, mfhi, tmp);
			
 
				++		}
			
 
				++
			
 
				++		/* hi(dst) += tmp */
			
 
				++		emit(ctx, addu, hi(dst), hi(dst), tmp);
			
 
				++		clobber_reg64(ctx, dst);
			
 
				++		break;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* ALU mul register (64x64-bit) */
			
 
				++static void emit_mul_r64(struct jit_context *ctx,
			
 
				++			 const u8 dst[], const u8 src[])
			
 
				++{
			
 
				++	u8 acc = MIPS_R_T8;
			
 
				++	u8 tmp = MIPS_R_T9;
			
 
				++
			
 
				++	/* acc = hi(dst) * lo(src) */
			
 
				++	if (cpu_has_mips32r1 || cpu_has_mips32r6) {
			
 
				++		emit(ctx, mul, acc, hi(dst), lo(src));
			
 
				++	} else {
			
 
				++		emit(ctx, multu, hi(dst), lo(src));
			
 
				++		emit(ctx, mflo, acc);
			
 
				++	}
			
 
				++
			
 
				++	/* tmp = lo(dst) * hi(src) */
			
 
				++	if (cpu_has_mips32r1 || cpu_has_mips32r6) {
			
 
				++		emit(ctx, mul, tmp, lo(dst), hi(src));
			
 
				++	} else {
			
 
				++		emit(ctx, multu, lo(dst), hi(src));
			
 
				++		emit(ctx, mflo, tmp);
			
 
				++	}
			
 
				++
			
 
				++	/* acc += tmp */
			
 
				++	emit(ctx, addu, acc, acc, tmp);
			
 
				++
			
 
				++	/* tmp = lo(dst) * lo(src) >> 32 */
			
 
				++	/* lo(dst) = lo(dst) * lo(src) */
			
 
				++	if (cpu_has_mips32r6) {
			
 
				++		emit(ctx, muhu, tmp, lo(dst), lo(src));
			
 
				++		emit(ctx, mulu, lo(dst), lo(dst), lo(src));
			
 
				++	} else {
			
 
				++		emit(ctx, multu, lo(dst), lo(src));
			
 
				++		emit(ctx, mflo, lo(dst));
			
 
				++		emit(ctx, mfhi, tmp);
			
 
				++	}
			
 
				++
			
 
				++	/* hi(dst) = acc + tmp */
			
 
				++	emit(ctx, addu, hi(dst), acc, tmp);
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Helper function for 64-bit modulo */
			
 
				++static u64 jit_mod64(u64 a, u64 b)
			
 
				++{
			
 
				++	u64 rem;
			
 
				++
			
 
				++	div64_u64_rem(a, b, &rem);
			
 
				++	return rem;
			
 
				++}
			
 
				++
			
 
				++/* ALU div/mod register (64-bit) */
			
 
				++static void emit_divmod_r64(struct jit_context *ctx,
			
 
				++			    const u8 dst[], const u8 src[], u8 op)
			
 
				++{
			
 
				++	const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
			
 
				++	const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
			
 
				++	const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */
			
 
				++	int exclude, k;
			
 
				++	u32 addr = 0;
			
 
				++
			
 
				++	/* Push caller-saved registers on stack */
			
 
				++	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
			
 
				++		  0, JIT_RESERVED_STACK);
			
 
				++
			
 
				++	/* Put 64-bit arguments 1 and 2 in registers a0-a3 */
			
 
				++	for (k = 0; k < 2; k++) {
			
 
				++		emit(ctx, move, MIPS_R_T9, src[k]);
			
 
				++		emit(ctx, move, r1[k], dst[k]);
			
 
				++		emit(ctx, move, r2[k], MIPS_R_T9);
			
 
				++	}
			
 
				++
			
 
				++	/* Emit function call */
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	/* dst = dst / src */
			
 
				++	case BPF_DIV:
			
 
				++		addr = (u32)&div64_u64;
			
 
				++		break;
			
 
				++	/* dst = dst % src */
			
 
				++	case BPF_MOD:
			
 
				++		addr = (u32)&jit_mod64;
			
 
				++		break;
			
 
				++	}
			
 
				++	emit_mov_i(ctx, MIPS_R_T9, addr);
			
 
				++	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
			
 
				++	emit(ctx, nop); /* Delay slot */
			
 
				++
			
 
				++	/* Store the 64-bit result in dst */
			
 
				++	emit(ctx, move, dst[0], r0[0]);
			
 
				++	emit(ctx, move, dst[1], r0[1]);
			
 
				++
			
 
				++	/* Restore caller-saved registers, excluding the computed result */
			
 
				++	exclude = BIT(lo(dst)) | BIT(hi(dst));
			
 
				++	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
			
 
				++		 exclude, JIT_RESERVED_STACK);
			
 
				++	emit_load_delay(ctx);
			
 
				++
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++	clobber_reg(ctx, MIPS_R_V0);
			
 
				++	clobber_reg(ctx, MIPS_R_V1);
			
 
				++	clobber_reg(ctx, MIPS_R_RA);
			
 
				++}
			
 
				++
			
 
				++/* Swap bytes in a register word */
			
 
				++static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask)
			
 
				++{
			
 
				++	u8 tmp = MIPS_R_T9;
			
 
				++
			
 
				++	emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */
			
 
				++	emit(ctx, sll, tmp, tmp, 8);    /* tmp = tmp << 8         */
			
 
				++	emit(ctx, srl, dst, src, 8);    /* dst = src >> 8         */
			
 
				++	emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */
			
 
				++	emit(ctx, or,  dst, dst, tmp);  /* dst = dst | tmp        */
			
 
				++}
			
 
				++
			
 
				++/* Swap half words in a register word */
			
 
				++static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src)
			
 
				++{
			
 
				++	u8 tmp = MIPS_R_T9;
			
 
				++
			
 
				++	emit(ctx, sll, tmp, src, 16);  /* tmp = src << 16 */
			
 
				++	emit(ctx, srl, dst, src, 16);  /* dst = src >> 16 */
			
 
				++	emit(ctx, or,  dst, dst, tmp); /* dst = dst | tmp */
			
 
				++}
			
 
				++
			
 
				++/* Swap bytes and truncate a register double word, word or half word */
			
 
				++static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width)
			
 
				++{
			
 
				++	u8 tmp = MIPS_R_T8;
			
 
				++
			
 
				++	switch (width) {
			
 
				++	/* Swap bytes in a double word */
			
 
				++	case 64:
			
 
				++		if (cpu_has_mips32r2 || cpu_has_mips32r6) {
			
 
				++			emit(ctx, rotr, tmp, hi(dst), 16);
			
 
				++			emit(ctx, rotr, hi(dst), lo(dst), 16);
			
 
				++			emit(ctx, wsbh, lo(dst), tmp);
			
 
				++			emit(ctx, wsbh, hi(dst), hi(dst));
			
 
				++		} else {
			
 
				++			emit_swap16_r(ctx, tmp, lo(dst));
			
 
				++			emit_swap16_r(ctx, lo(dst), hi(dst));
			
 
				++			emit(ctx, move, hi(dst), tmp);
			
 
				++
			
 
				++			emit(ctx, lui, tmp, 0xff);      /* tmp = 0x00ff0000 */
			
 
				++			emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */
			
 
				++			emit_swap8_r(ctx, lo(dst), lo(dst), tmp);
			
 
				++			emit_swap8_r(ctx, hi(dst), hi(dst), tmp);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* Swap bytes in a word */
			
 
				++	/* Swap bytes in a half word */
			
 
				++	case 32:
			
 
				++	case 16:
			
 
				++		emit_bswap_r(ctx, lo(dst), width);
			
 
				++		emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Truncate a register double word, word or half word */
			
 
				++static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width)
			
 
				++{
			
 
				++	switch (width) {
			
 
				++	case 64:
			
 
				++		break;
			
 
				++	/* Zero-extend a word */
			
 
				++	case 32:
			
 
				++		emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++		clobber_reg(ctx, hi(dst));
			
 
				++		break;
			
 
				++	/* Zero-extend a half word */
			
 
				++	case 16:
			
 
				++		emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++		emit(ctx, andi, lo(dst), lo(dst), 0xffff);
			
 
				++		clobber_reg64(ctx, dst);
			
 
				++		break;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* Load operation: dst = *(size*)(src + off) */
			
 
				++static void emit_ldx(struct jit_context *ctx,
			
 
				++		     const u8 dst[], u8 src, s16 off, u8 size)
			
 
				++{
			
 
				++	switch (size) {
			
 
				++	/* Load a byte */
			
 
				++	case BPF_B:
			
 
				++		emit(ctx, lbu, lo(dst), off, src);
			
 
				++		emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++		break;
			
 
				++	/* Load a half word */
			
 
				++	case BPF_H:
			
 
				++		emit(ctx, lhu, lo(dst), off, src);
			
 
				++		emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++		break;
			
 
				++	/* Load a word */
			
 
				++	case BPF_W:
			
 
				++		emit(ctx, lw, lo(dst), off, src);
			
 
				++		emit(ctx, move, hi(dst), MIPS_R_ZERO);
			
 
				++		break;
			
 
				++	/* Load a double word */
			
 
				++	case BPF_DW:
			
 
				++		if (dst[1] == src) {
			
 
				++			emit(ctx, lw, dst[0], off + 4, src);
			
 
				++			emit(ctx, lw, dst[1], off, src);
			
 
				++		} else {
			
 
				++			emit(ctx, lw, dst[1], off, src);
			
 
				++			emit(ctx, lw, dst[0], off + 4, src);
			
 
				++		}
			
 
				++		emit_load_delay(ctx);
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg64(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Store operation: *(size *)(dst + off) = src */
			
 
				++static void emit_stx(struct jit_context *ctx,
			
 
				++		     const u8 dst, const u8 src[], s16 off, u8 size)
			
 
				++{
			
 
				++	switch (size) {
			
 
				++	/* Store a byte */
			
 
				++	case BPF_B:
			
 
				++		emit(ctx, sb, lo(src), off, dst);
			
 
				++		break;
			
 
				++	/* Store a half word */
			
 
				++	case BPF_H:
			
 
				++		emit(ctx, sh, lo(src), off, dst);
			
 
				++		break;
			
 
				++	/* Store a word */
			
 
				++	case BPF_W:
			
 
				++		emit(ctx, sw, lo(src), off, dst);
			
 
				++		break;
			
 
				++	/* Store a double word */
			
 
				++	case BPF_DW:
			
 
				++		emit(ctx, sw, src[1], off, dst);
			
 
				++		emit(ctx, sw, src[0], off + 4, dst);
			
 
				++		break;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* Atomic read-modify-write (32-bit, non-ll/sc fallback) */
			
 
				++static void emit_atomic_r32(struct jit_context *ctx,
			
 
				++			    u8 dst, u8 src, s16 off, u8 code)
			
 
				++{
			
 
				++	u32 exclude = 0;
			
 
				++	u32 addr = 0;
			
 
				++
			
 
				++	/* Push caller-saved registers on stack */
			
 
				++	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
			
 
				++		  0, JIT_RESERVED_STACK);
			
 
				++	/*
			
 
				++	 * Argument 1: dst+off if xchg, otherwise src, passed in register a0
			
 
				++	 * Argument 2: src if xchg, othersize dst+off, passed in register a1
			
 
				++	 */
			
 
				++	emit(ctx, move, MIPS_R_T9, dst);
			
 
				++	emit(ctx, move, MIPS_R_A0, src);
			
 
				++	emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off);
			
 
				++
			
 
				++	/* Emit function call */
			
 
				++	switch (code) {
			
 
				++	case BPF_ADD:
			
 
				++		addr = (u32)&atomic_add;
			
 
				++		break;
			
 
				++	case BPF_SUB:
			
 
				++		addr = (u32)&atomic_sub;
			
 
				++		break;
			
 
				++	case BPF_OR:
			
 
				++		addr = (u32)&atomic_or;
			
 
				++		break;
			
 
				++	case BPF_AND:
			
 
				++		addr = (u32)&atomic_and;
			
 
				++		break;
			
 
				++	case BPF_XOR:
			
 
				++		addr = (u32)&atomic_xor;
			
 
				++		break;
			
 
				++	}
			
 
				++	emit_mov_i(ctx, MIPS_R_T9, addr);
			
 
				++	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
			
 
				++	emit(ctx, nop); /* Delay slot */
			
 
				++
			
 
				++	/* Restore caller-saved registers, except any fetched value */
			
 
				++	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
			
 
				++		 exclude, JIT_RESERVED_STACK);
			
 
				++	emit_load_delay(ctx);
			
 
				++	clobber_reg(ctx, MIPS_R_RA);
			
 
				++}
			
 
				++
			
 
				++/* Atomic read-modify-write (64-bit) */
			
 
				++static void emit_atomic_r64(struct jit_context *ctx,
			
 
				++			    u8 dst, const u8 src[], s16 off, u8 code)
			
 
				++{
			
 
				++	const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
			
 
				++	u32 exclude = 0;
			
 
				++	u32 addr = 0;
			
 
				++
			
 
				++	/* Push caller-saved registers on stack */
			
 
				++	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
			
 
				++		  0, JIT_RESERVED_STACK);
			
 
				++	/*
			
 
				++	 * Argument 1: 64-bit src, passed in registers a0-a1
			
 
				++	 * Argument 2: 32-bit dst+off, passed in register a2
			
 
				++	 */
			
 
				++	emit(ctx, move, MIPS_R_T9, dst);
			
 
				++	emit(ctx, move, r1[0], src[0]);
			
 
				++	emit(ctx, move, r1[1], src[1]);
			
 
				++	emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off);
			
 
				++
			
 
				++	/* Emit function call */
			
 
				++	switch (code) {
			
 
				++	case BPF_ADD:
			
 
				++		addr = (u32)&atomic64_add;
			
 
				++		break;
			
 
				++	case BPF_SUB:
			
 
				++		addr = (u32)&atomic64_sub;
			
 
				++		break;
			
 
				++	case BPF_OR:
			
 
				++		addr = (u32)&atomic64_or;
			
 
				++		break;
			
 
				++	case BPF_AND:
			
 
				++		addr = (u32)&atomic64_and;
			
 
				++		break;
			
 
				++	case BPF_XOR:
			
 
				++		addr = (u32)&atomic64_xor;
			
 
				++		break;
			
 
				++	}
			
 
				++	emit_mov_i(ctx, MIPS_R_T9, addr);
			
 
				++	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
			
 
				++	emit(ctx, nop); /* Delay slot */
			
 
				++
			
 
				++	/* Restore caller-saved registers, except any fetched value */
			
 
				++	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
			
 
				++		 exclude, JIT_RESERVED_STACK);
			
 
				++	emit_load_delay(ctx);
			
 
				++	clobber_reg(ctx, MIPS_R_RA);
			
 
				++}
			
 
				++
			
 
				++/*
			
 
				++ * Conditional movz or an emulated equivalent.
			
 
				++ * Note that the rs register may be modified.
			
 
				++ */
			
 
				++static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
			
 
				++{
			
 
				++	if (cpu_has_mips_2) {
			
 
				++		emit(ctx, movz, rd, rs, rt);           /* rd = rt ? rd : rs  */
			
 
				++	} else if (cpu_has_mips32r6) {
			
 
				++		if (rs != MIPS_R_ZERO)
			
 
				++			emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0  */
			
 
				++		emit(ctx, selnez, rd, rd, rt);         /* rd = 0 if rt != 0  */
			
 
				++		if (rs != MIPS_R_ZERO)
			
 
				++			emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
			
 
				++	} else {
			
 
				++		emit(ctx, bnez, rt, 8);                /* PC += 8 if rd != 0 */
			
 
				++		emit(ctx, nop);                        /* +0: delay slot     */
			
 
				++		emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
			
 
				++	}
			
 
				++	clobber_reg(ctx, rd);
			
 
				++	clobber_reg(ctx, rs);
			
 
				++}
			
 
				++
			
 
				++/*
			
 
				++ * Conditional movn or an emulated equivalent.
			
 
				++ * Note that the rs register may be modified.
			
 
				++ */
			
 
				++static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
			
 
				++{
			
 
				++	if (cpu_has_mips_2) {
			
 
				++		emit(ctx, movn, rd, rs, rt);           /* rd = rt ? rs : rd  */
			
 
				++	} else if (cpu_has_mips32r6) {
			
 
				++		if (rs != MIPS_R_ZERO)
			
 
				++			emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0  */
			
 
				++		emit(ctx, seleqz, rd, rd, rt);         /* rd = 0 if rt != 0  */
			
 
				++		if (rs != MIPS_R_ZERO)
			
 
				++			emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
			
 
				++	} else {
			
 
				++		emit(ctx, beqz, rt, 8);                /* PC += 8 if rd == 0 */
			
 
				++		emit(ctx, nop);                        /* +0: delay slot     */
			
 
				++		emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
			
 
				++	}
			
 
				++	clobber_reg(ctx, rd);
			
 
				++	clobber_reg(ctx, rs);
			
 
				++}
			
 
				++
			
 
				++/* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */
			
 
				++static void emit_sltiu_r64(struct jit_context *ctx, u8 rd,
			
 
				++			   const u8 rs[], s64 imm)
			
 
				++{
			
 
				++	u8 tmp = MIPS_R_T9;
			
 
				++
			
 
				++	if (imm < 0) {
			
 
				++		emit_mov_i(ctx, rd, imm);                 /* rd = imm        */
			
 
				++		emit(ctx, sltu, rd, lo(rs), rd);          /* rd = rsl < rd   */
			
 
				++		emit(ctx, sltiu, tmp, hi(rs), -1);        /* tmp = rsh < ~0U */
			
 
				++		emit(ctx, or, rd, rd, tmp);               /* rd = rd | tmp   */
			
 
				++	} else { /* imm >= 0 */
			
 
				++		if (imm > 0x7fff) {
			
 
				++			emit_mov_i(ctx, rd, (s32)imm);     /* rd = imm       */
			
 
				++			emit(ctx, sltu, rd, lo(rs), rd);   /* rd = rsl < rd  */
			
 
				++		} else {
			
 
				++			emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */
			
 
				++		}
			
 
				++		emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh  */
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* Emulation of 64-bit sltu rd, rs, rt */
			
 
				++static void emit_sltu_r64(struct jit_context *ctx, u8 rd,
			
 
				++			  const u8 rs[], const u8 rt[])
			
 
				++{
			
 
				++	u8 tmp = MIPS_R_T9;
			
 
				++
			
 
				++	emit(ctx, sltu, rd, lo(rs), lo(rt));           /* rd = rsl < rtl     */
			
 
				++	emit(ctx, subu, tmp, hi(rs), hi(rt));          /* tmp = rsh - rth    */
			
 
				++	emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp);        /* rd = 0 if tmp != 0 */
			
 
				++	emit(ctx, sltu, tmp, hi(rs), hi(rt));          /* tmp = rsh < rth    */
			
 
				++	emit(ctx, or, rd, rd, tmp);                    /* rd = rd | tmp      */
			
 
				++}
			
 
				++
			
 
				++/* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */
			
 
				++static void emit_slti_r64(struct jit_context *ctx, u8 rd,
			
 
				++			  const u8 rs[], s64 imm)
			
 
				++{
			
 
				++	u8 t1 = MIPS_R_T8;
			
 
				++	u8 t2 = MIPS_R_T9;
			
 
				++	u8 cmp;
			
 
				++
			
 
				++	/*
			
 
				++	 * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl
			
 
				++	 * else                      t1 = rsl <u imm
			
 
				++	 */
			
 
				++	emit_mov_i(ctx, rd, (s32)imm);
			
 
				++	emit(ctx, sltu, t1, lo(rs), rd);               /* t1 = rsl <u imm   */
			
 
				++	emit(ctx, sltu, t2, rd, lo(rs));               /* t2 = imm <u rsl   */
			
 
				++	emit(ctx, srl, rd, hi(rs), 31);                /* rd = rsh >> 31    */
			
 
				++	if (imm < 0)
			
 
				++		emit_movz_r(ctx, t1, t2, rd);          /* t1 = rd ? t1 : t2 */
			
 
				++	else
			
 
				++		emit_movn_r(ctx, t1, t2, rd);          /* t1 = rd ? t2 : t1 */
			
 
				++	/*
			
 
				++	 * if ((imm < 0 && rsh != 0xffffffff) ||
			
 
				++	 *     (imm >= 0 && rsh != 0))
			
 
				++	 *      t1 = 0
			
 
				++	 */
			
 
				++	if (imm < 0) {
			
 
				++		emit(ctx, addiu, rd, hi(rs), 1);       /* rd = rsh + 1 */
			
 
				++		cmp = rd;
			
 
				++	} else { /* imm >= 0 */
			
 
				++		cmp = hi(rs);
			
 
				++	}
			
 
				++	emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp);        /* t1 = 0 if cmp != 0 */
			
 
				++
			
 
				++	/*
			
 
				++	 * if (imm < 0) rd = rsh < -1
			
 
				++	 * else         rd = rsh != 0
			
 
				++	 * rd = rd | t1
			
 
				++	 */
			
 
				++	emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */
			
 
				++	emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1       */
			
 
				++}
			
 
				++
			
 
				++/* Emulation of 64-bit(slt rd, rs, rt) */
			
 
				++static void emit_slt_r64(struct jit_context *ctx, u8 rd,
			
 
				++			 const u8 rs[], const u8 rt[])
			
 
				++{
			
 
				++	u8 t1 = MIPS_R_T7;
			
 
				++	u8 t2 = MIPS_R_T8;
			
 
				++	u8 t3 = MIPS_R_T9;
			
 
				++
			
 
				++	/*
			
 
				++	 * if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl
			
 
				++	 * else                     t1 = rsl <u rtl
			
 
				++	 * if (rsh == rth)          t1 = 0
			
 
				++	 */
			
 
				++	emit(ctx, sltu, t1, lo(rs), lo(rt));           /* t1 = rsl <u rtl   */
			
 
				++	emit(ctx, sltu, t2, lo(rt), lo(rs));           /* t2 = rtl <u rsl   */
			
 
				++	emit(ctx, xor, t3, hi(rs), hi(rt));            /* t3 = rlh ^ rth    */
			
 
				++	emit(ctx, srl, rd, t3, 31);                    /* rd = t3 >> 31     */
			
 
				++	emit_movn_r(ctx, t1, t2, rd);                  /* t1 = rd ? t2 : t1 */
			
 
				++	emit_movn_r(ctx, t1, MIPS_R_ZERO, t3);         /* t1 = 0 if t3 != 0 */
			
 
				++
			
 
				++	/* rd = (rsh < rth) | t1 */
			
 
				++	emit(ctx, slt, rd, hi(rs), hi(rt));            /* rd = rsh <s rth   */
			
 
				++	emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1      */
			
 
				++}
			
 
				++
			
 
				++/* Jump immediate (64-bit) */
			
 
				++static void emit_jmp_i64(struct jit_context *ctx,
			
 
				++			 const u8 dst[], s32 imm, s32 off, u8 op)
			
 
				++{
			
 
				++	u8 tmp = MIPS_R_T6;
			
 
				++
			
 
				++	switch (op) {
			
 
				++	/* No-op, used internally for branch optimization */
			
 
				++	case JIT_JNOP:
			
 
				++		break;
			
 
				++	/* PC += off if dst == imm */
			
 
				++	/* PC += off if dst != imm */
			
 
				++	case BPF_JEQ:
			
 
				++	case BPF_JNE:
			
 
				++		if (imm >= -0x7fff && imm <= 0x8000) {
			
 
				++			emit(ctx, addiu, tmp, lo(dst), -imm);
			
 
				++		} else if ((u32)imm <= 0xffff) {
			
 
				++			emit(ctx, xori, tmp, lo(dst), imm);
			
 
				++		} else {       /* Register fallback */
			
 
				++			emit_mov_i(ctx, tmp, imm);
			
 
				++			emit(ctx, xor, tmp, lo(dst), tmp);
			
 
				++		}
			
 
				++		if (imm < 0) { /* Compare sign extension */
			
 
				++			emit(ctx, addu, MIPS_R_T9, hi(dst), 1);
			
 
				++			emit(ctx, or, tmp, tmp, MIPS_R_T9);
			
 
				++		} else {       /* Compare zero extension */
			
 
				++			emit(ctx, or, tmp, tmp, hi(dst));
			
 
				++		}
			
 
				++		if (op == BPF_JEQ)
			
 
				++			emit(ctx, beqz, tmp, off);
			
 
				++		else   /* BPF_JNE */
			
 
				++			emit(ctx, bnez, tmp, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst & imm */
			
 
				++	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
			
 
				++	case BPF_JSET:
			
 
				++	case JIT_JNSET:
			
 
				++		if ((u32)imm <= 0xffff) {
			
 
				++			emit(ctx, andi, tmp, lo(dst), imm);
			
 
				++		} else {     /* Register fallback */
			
 
				++			emit_mov_i(ctx, tmp, imm);
			
 
				++			emit(ctx, and, tmp, lo(dst), tmp);
			
 
				++		}
			
 
				++		if (imm < 0) /* Sign-extension pulls in high word */
			
 
				++			emit(ctx, or, tmp, tmp, hi(dst));
			
 
				++		if (op == BPF_JSET)
			
 
				++			emit(ctx, bnez, tmp, off);
			
 
				++		else   /* JIT_JNSET */
			
 
				++			emit(ctx, beqz, tmp, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst > imm */
			
 
				++	case BPF_JGT:
			
 
				++		emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
			
 
				++		emit(ctx, beqz, tmp, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst >= imm */
			
 
				++	case BPF_JGE:
			
 
				++		emit_sltiu_r64(ctx, tmp, dst, imm);
			
 
				++		emit(ctx, beqz, tmp, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst < imm */
			
 
				++	case BPF_JLT:
			
 
				++		emit_sltiu_r64(ctx, tmp, dst, imm);
			
 
				++		emit(ctx, bnez, tmp, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst <= imm */
			
 
				++	case BPF_JLE:
			
 
				++		emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
			
 
				++		emit(ctx, bnez, tmp, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst > imm (signed) */
			
 
				++	case BPF_JSGT:
			
 
				++		emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
			
 
				++		emit(ctx, beqz, tmp, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst >= imm (signed) */
			
 
				++	case BPF_JSGE:
			
 
				++		emit_slti_r64(ctx, tmp, dst, imm);
			
 
				++		emit(ctx, beqz, tmp, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst < imm (signed) */
			
 
				++	case BPF_JSLT:
			
 
				++		emit_slti_r64(ctx, tmp, dst, imm);
			
 
				++		emit(ctx, bnez, tmp, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst <= imm (signed) */
			
 
				++	case BPF_JSLE:
			
 
				++		emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
			
 
				++		emit(ctx, bnez, tmp, off);
			
 
				++		break;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* Jump register (64-bit) */
			
 
				++static void emit_jmp_r64(struct jit_context *ctx,
			
 
				++			 const u8 dst[], const u8 src[], s32 off, u8 op)
			
 
				++{
			
 
				++	u8 t1 = MIPS_R_T6;
			
 
				++	u8 t2 = MIPS_R_T7;
			
 
				++
			
 
				++	switch (op) {
			
 
				++	/* No-op, used internally for branch optimization */
			
 
				++	case JIT_JNOP:
			
 
				++		break;
			
 
				++	/* PC += off if dst == src */
			
 
				++	/* PC += off if dst != src */
			
 
				++	case BPF_JEQ:
			
 
				++	case BPF_JNE:
			
 
				++		emit(ctx, subu, t1, lo(dst), lo(src));
			
 
				++		emit(ctx, subu, t2, hi(dst), hi(src));
			
 
				++		emit(ctx, or, t1, t1, t2);
			
 
				++		if (op == BPF_JEQ)
			
 
				++			emit(ctx, beqz, t1, off);
			
 
				++		else   /* BPF_JNE */
			
 
				++			emit(ctx, bnez, t1, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst & src */
			
 
				++	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
			
 
				++	case BPF_JSET:
			
 
				++	case JIT_JNSET:
			
 
				++		emit(ctx, and, t1, lo(dst), lo(src));
			
 
				++		emit(ctx, and, t2, hi(dst), hi(src));
			
 
				++		emit(ctx, or, t1, t1, t2);
			
 
				++		if (op == BPF_JSET)
			
 
				++			emit(ctx, bnez, t1, off);
			
 
				++		else   /* JIT_JNSET */
			
 
				++			emit(ctx, beqz, t1, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst > src */
			
 
				++	case BPF_JGT:
			
 
				++		emit_sltu_r64(ctx, t1, src, dst);
			
 
				++		emit(ctx, bnez, t1, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst >= src */
			
 
				++	case BPF_JGE:
			
 
				++		emit_sltu_r64(ctx, t1, dst, src);
			
 
				++		emit(ctx, beqz, t1, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst < src */
			
 
				++	case BPF_JLT:
			
 
				++		emit_sltu_r64(ctx, t1, dst, src);
			
 
				++		emit(ctx, bnez, t1, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst <= src */
			
 
				++	case BPF_JLE:
			
 
				++		emit_sltu_r64(ctx, t1, src, dst);
			
 
				++		emit(ctx, beqz, t1, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst > src (signed) */
			
 
				++	case BPF_JSGT:
			
 
				++		emit_slt_r64(ctx, t1, src, dst);
			
 
				++		emit(ctx, bnez, t1, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst >= src (signed) */
			
 
				++	case BPF_JSGE:
			
 
				++		emit_slt_r64(ctx, t1, dst, src);
			
 
				++		emit(ctx, beqz, t1, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst < src (signed) */
			
 
				++	case BPF_JSLT:
			
 
				++		emit_slt_r64(ctx, t1, dst, src);
			
 
				++		emit(ctx, bnez, t1, off);
			
 
				++		break;
			
 
				++	/* PC += off if dst <= src (signed) */
			
 
				++	case BPF_JSLE:
			
 
				++		emit_slt_r64(ctx, t1, src, dst);
			
 
				++		emit(ctx, beqz, t1, off);
			
 
				++		break;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* Function call */
			
 
				++static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
			
 
				++{
			
 
				++	bool fixed;
			
 
				++	u64 addr;
			
 
				++
			
 
				++	/* Decode the call address */
			
 
				++	if (bpf_jit_get_func_addr(ctx->program, insn, false,
			
 
				++				  &addr, &fixed) < 0)
			
 
				++		return -1;
			
 
				++	if (!fixed)
			
 
				++		return -1;
			
 
				++
			
 
				++	/* Push stack arguments */
			
 
				++	push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK);
			
 
				++
			
 
				++	/* Emit function call */
			
 
				++	emit_mov_i(ctx, MIPS_R_T9, addr);
			
 
				++	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
			
 
				++	emit(ctx, nop); /* Delay slot */
			
 
				++
			
 
				++	clobber_reg(ctx, MIPS_R_RA);
			
 
				++	clobber_reg(ctx, MIPS_R_V0);
			
 
				++	clobber_reg(ctx, MIPS_R_V1);
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++/* Function tail call */
			
 
				++static int emit_tail_call(struct jit_context *ctx)
			
 
				++{
			
 
				++	u8 ary = lo(bpf2mips32[BPF_REG_2]);
			
 
				++	u8 ind = lo(bpf2mips32[BPF_REG_3]);
			
 
				++	u8 t1 = MIPS_R_T8;
			
 
				++	u8 t2 = MIPS_R_T9;
			
 
				++	int off;
			
 
				++
			
 
				++	/*
			
 
				++	 * Tail call:
			
 
				++	 * eBPF R1   - function argument (context ptr), passed in a0-a1
			
 
				++	 * eBPF R2   - ptr to object with array of function entry points
			
 
				++	 * eBPF R3   - array index of function to be called
			
 
				++	 * stack[sz] - remaining tail call count, initialized in prologue
			
 
				++	 */
			
 
				++
			
 
				++	/* if (ind >= ary->map.max_entries) goto out */
			
 
				++	off = offsetof(struct bpf_array, map.max_entries);
			
 
				++	if (off > 0x7fff)
			
 
				++		return -1;
			
 
				++	emit(ctx, lw, t1, off, ary);             /* t1 = ary->map.max_entries*/
			
 
				++	emit_load_delay(ctx);                    /* Load delay slot          */
			
 
				++	emit(ctx, sltu, t1, ind, t1);            /* t1 = ind < t1            */
			
 
				++	emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0  */
			
 
				++						 /* (next insn delay slot)   */
			
 
				++	/* if (TCC-- <= 0) goto out */
			
 
				++	emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP);  /* t2 = *(SP + size) */
			
 
				++	emit_load_delay(ctx);                     /* Load delay slot         */
			
 
				++	emit(ctx, blez, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 < 0  */
			
 
				++	emit(ctx, addiu, t2, t2, -1);             /* t2-- (delay slot)       */
			
 
				++	emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP);  /* *(SP + size) = t2 */
			
 
				++
			
 
				++	/* prog = ary->ptrs[ind] */
			
 
				++	off = offsetof(struct bpf_array, ptrs);
			
 
				++	if (off > 0x7fff)
			
 
				++		return -1;
			
 
				++	emit(ctx, sll, t1, ind, 2);               /* t1 = ind << 2           */
			
 
				++	emit(ctx, addu, t1, t1, ary);             /* t1 += ary               */
			
 
				++	emit(ctx, lw, t2, off, t1);               /* t2 = *(t1 + off)        */
			
 
				++	emit_load_delay(ctx);                     /* Load delay slot         */
			
 
				++
			
 
				++	/* if (prog == 0) goto out */
			
 
				++	emit(ctx, beqz, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 == 0 */
			
 
				++	emit(ctx, nop);                           /* Delay slot              */
			
 
				++
			
 
				++	/* func = prog->bpf_func + 8 (prologue skip offset) */
			
 
				++	off = offsetof(struct bpf_prog, bpf_func);
			
 
				++	if (off > 0x7fff)
			
 
				++		return -1;
			
 
				++	emit(ctx, lw, t1, off, t2);                /* t1 = *(t2 + off)       */
			
 
				++	emit_load_delay(ctx);                      /* Load delay slot        */
			
 
				++	emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP);  /* t1 += skip (8 or 12)   */
			
 
				++
			
 
				++	/* goto func */
			
 
				++	build_epilogue(ctx, t1);
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++/*
			
 
				++ * Stack frame layout for a JITed program (stack grows down).
			
 
				++ *
			
 
				++ * Higher address  : Caller's stack frame       :
			
 
				++ *                 :----------------------------:
			
 
				++ *                 : 64-bit eBPF args r3-r5     :
			
 
				++ *                 :----------------------------:
			
 
				++ *                 : Reserved / tail call count :
			
 
				++ *                 +============================+  <--- MIPS sp before call
			
 
				++ *                 | Callee-saved registers,    |
			
 
				++ *                 | including RA and FP        |
			
 
				++ *                 +----------------------------+  <--- eBPF FP (MIPS zero,fp)
			
 
				++ *                 | Local eBPF variables       |
			
 
				++ *                 | allocated by program       |
			
 
				++ *                 +----------------------------+
			
 
				++ *                 | Reserved for caller-saved  |
			
 
				++ *                 | registers                  |
			
 
				++ *                 +----------------------------+
			
 
				++ *                 | Reserved for 64-bit eBPF   |
			
 
				++ *                 | args r3-r5 & args passed   |
			
 
				++ *                 | on stack in kernel calls   |
			
 
				++ * Lower address   +============================+  <--- MIPS sp
			
 
				++ */
			
 
				++
			
 
				++/* Build program prologue to set up the stack and registers */
			
 
				++void build_prologue(struct jit_context *ctx)
			
 
				++{
			
 
				++	const u8 *r1 = bpf2mips32[BPF_REG_1];
			
 
				++	const u8 *fp = bpf2mips32[BPF_REG_FP];
			
 
				++	int stack, saved, locals, reserved;
			
 
				++
			
 
				++	/*
			
 
				++	 * The first two instructions initialize TCC in the reserved (for us)
			
 
				++	 * 16-byte area in the parent's stack frame. On a tail call, the
			
 
				++	 * calling function jumps into the prologue after these instructions.
			
 
				++	 */
			
 
				++	emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO,
			
 
				++	     min(MAX_TAIL_CALL_CNT + 1, 0xffff));
			
 
				++	emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP);
			
 
				++
			
 
				++	/*
			
 
				++	 * Register eBPF R1 contains the 32-bit context pointer argument.
			
 
				++	 * A 32-bit argument is always passed in MIPS register a0, regardless
			
 
				++	 * of CPU endianness. Initialize R1 accordingly and zero-extend.
			
 
				++	 */
			
 
				++#ifdef __BIG_ENDIAN
			
 
				++	emit(ctx, move, lo(r1), MIPS_R_A0);
			
 
				++#endif
			
 
				++
			
 
				++	/* === Entry-point for tail calls === */
			
 
				++
			
 
				++	/* Zero-extend the 32-bit argument */
			
 
				++	emit(ctx, move, hi(r1), MIPS_R_ZERO);
			
 
				++
			
 
				++	/* If the eBPF frame pointer was accessed it must be saved */
			
 
				++	if (ctx->accessed & BIT(BPF_REG_FP))
			
 
				++		clobber_reg64(ctx, fp);
			
 
				++
			
 
				++	/* Compute the stack space needed for callee-saved registers */
			
 
				++	saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32);
			
 
				++	saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
			
 
				++
			
 
				++	/* Stack space used by eBPF program local data */
			
 
				++	locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
			
 
				++
			
 
				++	/*
			
 
				++	 * If we are emitting function calls, reserve extra stack space for
			
 
				++	 * caller-saved registers and function arguments passed on the stack.
			
 
				++	 * The required space is computed automatically during resource
			
 
				++	 * usage discovery (pass 1).
			
 
				++	 */
			
 
				++	reserved = ctx->stack_used;
			
 
				++
			
 
				++	/* Allocate the stack frame */
			
 
				++	stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
			
 
				++	emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack);
			
 
				++
			
 
				++	/* Store callee-saved registers on stack */
			
 
				++	push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
			
 
				++
			
 
				++	/* Initialize the eBPF frame pointer if accessed */
			
 
				++	if (ctx->accessed & BIT(BPF_REG_FP))
			
 
				++		emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved);
			
 
				++
			
 
				++	ctx->saved_size = saved;
			
 
				++	ctx->stack_size = stack;
			
 
				++}
			
 
				++
			
 
				++/* Build the program epilogue to restore the stack and registers */
			
 
				++void build_epilogue(struct jit_context *ctx, int dest_reg)
			
 
				++{
			
 
				++	/* Restore callee-saved registers from stack */
			
 
				++	pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
			
 
				++		 ctx->stack_size - ctx->saved_size);
			
 
				++	/*
			
 
				++	 * A 32-bit return value is always passed in MIPS register v0,
			
 
				++	 * but on big-endian targets the low part of R0 is mapped to v1.
			
 
				++	 */
			
 
				++#ifdef __BIG_ENDIAN
			
 
				++	emit(ctx, move, MIPS_R_V0, MIPS_R_V1);
			
 
				++#endif
			
 
				++
			
 
				++	/* Jump to the return address and adjust the stack pointer */
			
 
				++	emit(ctx, jr, dest_reg);
			
 
				++	emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
			
 
				++}
			
 
				++
			
 
				++/* Build one eBPF instruction */
			
 
				++int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
			
 
				++{
			
 
				++	const u8 *dst = bpf2mips32[insn->dst_reg];
			
 
				++	const u8 *src = bpf2mips32[insn->src_reg];
			
 
				++	const u8 *tmp = bpf2mips32[JIT_REG_TMP];
			
 
				++	u8 code = insn->code;
			
 
				++	s16 off = insn->off;
			
 
				++	s32 imm = insn->imm;
			
 
				++	s32 val, rel;
			
 
				++	u8 alu, jmp;
			
 
				++
			
 
				++	switch (code) {
			
 
				++	/* ALU operations */
			
 
				++	/* dst = imm */
			
 
				++	case BPF_ALU | BPF_MOV | BPF_K:
			
 
				++		emit_mov_i(ctx, lo(dst), imm);
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = src */
			
 
				++	case BPF_ALU | BPF_MOV | BPF_X:
			
 
				++		if (imm == 1) {
			
 
				++			/* Special mov32 for zext */
			
 
				++			emit_mov_i(ctx, hi(dst), 0);
			
 
				++		} else {
			
 
				++			emit_mov_r(ctx, lo(dst), lo(src));
			
 
				++			emit_zext_ver(ctx, dst);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* dst = -dst */
			
 
				++	case BPF_ALU | BPF_NEG:
			
 
				++		emit_alu_i(ctx, lo(dst), 0, BPF_NEG);
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = dst & imm */
			
 
				++	/* dst = dst | imm */
			
 
				++	/* dst = dst ^ imm */
			
 
				++	/* dst = dst << imm */
			
 
				++	/* dst = dst >> imm */
			
 
				++	/* dst = dst >> imm (arithmetic) */
			
 
				++	/* dst = dst + imm */
			
 
				++	/* dst = dst - imm */
			
 
				++	/* dst = dst * imm */
			
 
				++	/* dst = dst / imm */
			
 
				++	/* dst = dst % imm */
			
 
				++	case BPF_ALU | BPF_OR | BPF_K:
			
 
				++	case BPF_ALU | BPF_AND | BPF_K:
			
 
				++	case BPF_ALU | BPF_XOR | BPF_K:
			
 
				++	case BPF_ALU | BPF_LSH | BPF_K:
			
 
				++	case BPF_ALU | BPF_RSH | BPF_K:
			
 
				++	case BPF_ALU | BPF_ARSH | BPF_K:
			
 
				++	case BPF_ALU | BPF_ADD | BPF_K:
			
 
				++	case BPF_ALU | BPF_SUB | BPF_K:
			
 
				++	case BPF_ALU | BPF_MUL | BPF_K:
			
 
				++	case BPF_ALU | BPF_DIV | BPF_K:
			
 
				++	case BPF_ALU | BPF_MOD | BPF_K:
			
 
				++		if (!valid_alu_i(BPF_OP(code), imm)) {
			
 
				++			emit_mov_i(ctx, MIPS_R_T6, imm);
			
 
				++			emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code));
			
 
				++		} else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
			
 
				++			emit_alu_i(ctx, lo(dst), val, alu);
			
 
				++		}
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = dst & src */
			
 
				++	/* dst = dst | src */
			
 
				++	/* dst = dst ^ src */
			
 
				++	/* dst = dst << src */
			
 
				++	/* dst = dst >> src */
			
 
				++	/* dst = dst >> src (arithmetic) */
			
 
				++	/* dst = dst + src */
			
 
				++	/* dst = dst - src */
			
 
				++	/* dst = dst * src */
			
 
				++	/* dst = dst / src */
			
 
				++	/* dst = dst % src */
			
 
				++	case BPF_ALU | BPF_AND | BPF_X:
			
 
				++	case BPF_ALU | BPF_OR | BPF_X:
			
 
				++	case BPF_ALU | BPF_XOR | BPF_X:
			
 
				++	case BPF_ALU | BPF_LSH | BPF_X:
			
 
				++	case BPF_ALU | BPF_RSH | BPF_X:
			
 
				++	case BPF_ALU | BPF_ARSH | BPF_X:
			
 
				++	case BPF_ALU | BPF_ADD | BPF_X:
			
 
				++	case BPF_ALU | BPF_SUB | BPF_X:
			
 
				++	case BPF_ALU | BPF_MUL | BPF_X:
			
 
				++	case BPF_ALU | BPF_DIV | BPF_X:
			
 
				++	case BPF_ALU | BPF_MOD | BPF_X:
			
 
				++		emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code));
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = imm (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_MOV | BPF_K:
			
 
				++		emit_mov_se_i64(ctx, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = src (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_MOV | BPF_X:
			
 
				++		emit_mov_r(ctx, lo(dst), lo(src));
			
 
				++		emit_mov_r(ctx, hi(dst), hi(src));
			
 
				++		break;
			
 
				++	/* dst = -dst (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_NEG:
			
 
				++		emit_neg_i64(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = dst & imm (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_AND | BPF_K:
			
 
				++		emit_alu_i64(ctx, dst, imm, BPF_OP(code));
			
 
				++		break;
			
 
				++	/* dst = dst | imm (64-bit) */
			
 
				++	/* dst = dst ^ imm (64-bit) */
			
 
				++	/* dst = dst + imm (64-bit) */
			
 
				++	/* dst = dst - imm (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_OR | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_XOR | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_ADD | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_SUB | BPF_K:
			
 
				++		if (imm)
			
 
				++			emit_alu_i64(ctx, dst, imm, BPF_OP(code));
			
 
				++		break;
			
 
				++	/* dst = dst << imm (64-bit) */
			
 
				++	/* dst = dst >> imm (64-bit) */
			
 
				++	/* dst = dst >> imm (64-bit, arithmetic) */
			
 
				++	case BPF_ALU64 | BPF_LSH | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_RSH | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_ARSH | BPF_K:
			
 
				++		if (imm)
			
 
				++			emit_shift_i64(ctx, dst, imm, BPF_OP(code));
			
 
				++		break;
			
 
				++	/* dst = dst * imm (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_MUL | BPF_K:
			
 
				++		emit_mul_i64(ctx, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = dst / imm (64-bit) */
			
 
				++	/* dst = dst % imm (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_DIV | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_MOD | BPF_K:
			
 
				++		/*
			
 
				++		 * Sign-extend the immediate value into a temporary register,
			
 
				++		 * and then do the operation on this register.
			
 
				++		 */
			
 
				++		emit_mov_se_i64(ctx, tmp, imm);
			
 
				++		emit_divmod_r64(ctx, dst, tmp, BPF_OP(code));
			
 
				++		break;
			
 
				++	/* dst = dst & src (64-bit) */
			
 
				++	/* dst = dst | src (64-bit) */
			
 
				++	/* dst = dst ^ src (64-bit) */
			
 
				++	/* dst = dst + src (64-bit) */
			
 
				++	/* dst = dst - src (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_AND | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_OR | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_XOR | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_ADD | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_SUB | BPF_X:
			
 
				++		emit_alu_r64(ctx, dst, src, BPF_OP(code));
			
 
				++		break;
			
 
				++	/* dst = dst << src (64-bit) */
			
 
				++	/* dst = dst >> src (64-bit) */
			
 
				++	/* dst = dst >> src (64-bit, arithmetic) */
			
 
				++	case BPF_ALU64 | BPF_LSH | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_RSH | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_ARSH | BPF_X:
			
 
				++		emit_shift_r64(ctx, dst, lo(src), BPF_OP(code));
			
 
				++		break;
			
 
				++	/* dst = dst * src (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_MUL | BPF_X:
			
 
				++		emit_mul_r64(ctx, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst / src (64-bit) */
			
 
				++	/* dst = dst % src (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_DIV | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_MOD | BPF_X:
			
 
				++		emit_divmod_r64(ctx, dst, src, BPF_OP(code));
			
 
				++		break;
			
 
				++	/* dst = htole(dst) */
			
 
				++	/* dst = htobe(dst) */
			
 
				++	case BPF_ALU | BPF_END | BPF_FROM_LE:
			
 
				++	case BPF_ALU | BPF_END | BPF_FROM_BE:
			
 
				++		if (BPF_SRC(code) ==
			
 
				++#ifdef __BIG_ENDIAN
			
 
				++		    BPF_FROM_LE
			
 
				++#else
			
 
				++		    BPF_FROM_BE
			
 
				++#endif
			
 
				++		    )
			
 
				++			emit_bswap_r64(ctx, dst, imm);
			
 
				++		else
			
 
				++			emit_trunc_r64(ctx, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = imm64 */
			
 
				++	case BPF_LD | BPF_IMM | BPF_DW:
			
 
				++		emit_mov_i(ctx, lo(dst), imm);
			
 
				++		emit_mov_i(ctx, hi(dst), insn[1].imm);
			
 
				++		return 1;
			
 
				++	/* LDX: dst = *(size *)(src + off) */
			
 
				++	case BPF_LDX | BPF_MEM | BPF_W:
			
 
				++	case BPF_LDX | BPF_MEM | BPF_H:
			
 
				++	case BPF_LDX | BPF_MEM | BPF_B:
			
 
				++	case BPF_LDX | BPF_MEM | BPF_DW:
			
 
				++		emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code));
			
 
				++		break;
			
 
				++	/* ST: *(size *)(dst + off) = imm */
			
 
				++	case BPF_ST | BPF_MEM | BPF_W:
			
 
				++	case BPF_ST | BPF_MEM | BPF_H:
			
 
				++	case BPF_ST | BPF_MEM | BPF_B:
			
 
				++	case BPF_ST | BPF_MEM | BPF_DW:
			
 
				++		switch (BPF_SIZE(code)) {
			
 
				++		case BPF_DW:
			
 
				++			/* Sign-extend immediate value into temporary reg */
			
 
				++			emit_mov_se_i64(ctx, tmp, imm);
			
 
				++			break;
			
 
				++		case BPF_W:
			
 
				++		case BPF_H:
			
 
				++		case BPF_B:
			
 
				++			emit_mov_i(ctx, lo(tmp), imm);
			
 
				++			break;
			
 
				++		}
			
 
				++		emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code));
			
 
				++		break;
			
 
				++	/* STX: *(size *)(dst + off) = src */
			
 
				++	case BPF_STX | BPF_MEM | BPF_W:
			
 
				++	case BPF_STX | BPF_MEM | BPF_H:
			
 
				++	case BPF_STX | BPF_MEM | BPF_B:
			
 
				++	case BPF_STX | BPF_MEM | BPF_DW:
			
 
				++		emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code));
			
 
				++		break;
			
 
				++	/* Speculation barrier */
			
 
				++	case BPF_ST | BPF_NOSPEC:
			
 
				++		break;
			
 
				++	/* Atomics */
			
 
				++	case BPF_STX | BPF_XADD | BPF_W:
			
 
				++		switch (imm) {
			
 
				++		case BPF_ADD:
			
 
				++		case BPF_AND:
			
 
				++		case BPF_OR:
			
 
				++		case BPF_XOR:
			
 
				++			if (cpu_has_llsc)
			
 
				++				emit_atomic_r(ctx, lo(dst), lo(src), off, imm);
			
 
				++			else /* Non-ll/sc fallback */
			
 
				++				emit_atomic_r32(ctx, lo(dst), lo(src),
			
 
				++						off, imm);
			
 
				++			break;
			
 
				++		default:
			
 
				++			goto notyet;
			
 
				++		}
			
 
				++		break;
			
 
				++	/* Atomics (64-bit) */
			
 
				++	case BPF_STX | BPF_XADD | BPF_DW:
			
 
				++		switch (imm) {
			
 
				++		case BPF_ADD:
			
 
				++		case BPF_AND:
			
 
				++		case BPF_OR:
			
 
				++		case BPF_XOR:
			
 
				++			emit_atomic_r64(ctx, lo(dst), src, off, imm);
			
 
				++			break;
			
 
				++		default:
			
 
				++			goto notyet;
			
 
				++		}
			
 
				++		break;
			
 
				++	/* PC += off if dst == src */
			
 
				++	/* PC += off if dst != src */
			
 
				++	/* PC += off if dst & src */
			
 
				++	/* PC += off if dst > src */
			
 
				++	/* PC += off if dst >= src */
			
 
				++	/* PC += off if dst < src */
			
 
				++	/* PC += off if dst <= src */
			
 
				++	/* PC += off if dst > src (signed) */
			
 
				++	/* PC += off if dst >= src (signed) */
			
 
				++	/* PC += off if dst < src (signed) */
			
 
				++	/* PC += off if dst <= src (signed) */
			
 
				++	case BPF_JMP32 | BPF_JEQ | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JNE | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSET | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JGT | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JGE | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JLT | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JLE | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSGT | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSGE | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSLT | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSLE | BPF_X:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
			
 
				++		emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp);
			
 
				++		if (finish_jmp(ctx, jmp, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* PC += off if dst == imm */
			
 
				++	/* PC += off if dst != imm */
			
 
				++	/* PC += off if dst & imm */
			
 
				++	/* PC += off if dst > imm */
			
 
				++	/* PC += off if dst >= imm */
			
 
				++	/* PC += off if dst < imm */
			
 
				++	/* PC += off if dst <= imm */
			
 
				++	/* PC += off if dst > imm (signed) */
			
 
				++	/* PC += off if dst >= imm (signed) */
			
 
				++	/* PC += off if dst < imm (signed) */
			
 
				++	/* PC += off if dst <= imm (signed) */
			
 
				++	case BPF_JMP32 | BPF_JEQ | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JNE | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSET | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JGT | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JGE | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JLT | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JLE | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSGT | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSGE | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSLT | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSLE | BPF_K:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
			
 
				++		if (valid_jmp_i(jmp, imm)) {
			
 
				++			emit_jmp_i(ctx, lo(dst), imm, rel, jmp);
			
 
				++		} else {
			
 
				++			/* Move large immediate to register */
			
 
				++			emit_mov_i(ctx, MIPS_R_T6, imm);
			
 
				++			emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp);
			
 
				++		}
			
 
				++		if (finish_jmp(ctx, jmp, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* PC += off if dst == src */
			
 
				++	/* PC += off if dst != src */
			
 
				++	/* PC += off if dst & src */
			
 
				++	/* PC += off if dst > src */
			
 
				++	/* PC += off if dst >= src */
			
 
				++	/* PC += off if dst < src */
			
 
				++	/* PC += off if dst <= src */
			
 
				++	/* PC += off if dst > src (signed) */
			
 
				++	/* PC += off if dst >= src (signed) */
			
 
				++	/* PC += off if dst < src (signed) */
			
 
				++	/* PC += off if dst <= src (signed) */
			
 
				++	case BPF_JMP | BPF_JEQ | BPF_X:
			
 
				++	case BPF_JMP | BPF_JNE | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSET | BPF_X:
			
 
				++	case BPF_JMP | BPF_JGT | BPF_X:
			
 
				++	case BPF_JMP | BPF_JGE | BPF_X:
			
 
				++	case BPF_JMP | BPF_JLT | BPF_X:
			
 
				++	case BPF_JMP | BPF_JLE | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSGT | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSGE | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSLT | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSLE | BPF_X:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
			
 
				++		emit_jmp_r64(ctx, dst, src, rel, jmp);
			
 
				++		if (finish_jmp(ctx, jmp, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* PC += off if dst == imm */
			
 
				++	/* PC += off if dst != imm */
			
 
				++	/* PC += off if dst & imm */
			
 
				++	/* PC += off if dst > imm */
			
 
				++	/* PC += off if dst >= imm */
			
 
				++	/* PC += off if dst < imm */
			
 
				++	/* PC += off if dst <= imm */
			
 
				++	/* PC += off if dst > imm (signed) */
			
 
				++	/* PC += off if dst >= imm (signed) */
			
 
				++	/* PC += off if dst < imm (signed) */
			
 
				++	/* PC += off if dst <= imm (signed) */
			
 
				++	case BPF_JMP | BPF_JEQ | BPF_K:
			
 
				++	case BPF_JMP | BPF_JNE | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSET | BPF_K:
			
 
				++	case BPF_JMP | BPF_JGT | BPF_K:
			
 
				++	case BPF_JMP | BPF_JGE | BPF_K:
			
 
				++	case BPF_JMP | BPF_JLT | BPF_K:
			
 
				++	case BPF_JMP | BPF_JLE | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSGT | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSGE | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSLT | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSLE | BPF_K:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
			
 
				++		emit_jmp_i64(ctx, dst, imm, rel, jmp);
			
 
				++		if (finish_jmp(ctx, jmp, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* PC += off */
			
 
				++	case BPF_JMP | BPF_JA:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		if (emit_ja(ctx, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* Tail call */
			
 
				++	case BPF_JMP | BPF_TAIL_CALL:
			
 
				++		if (emit_tail_call(ctx) < 0)
			
 
				++			goto invalid;
			
 
				++		break;
			
 
				++	/* Function call */
			
 
				++	case BPF_JMP | BPF_CALL:
			
 
				++		if (emit_call(ctx, insn) < 0)
			
 
				++			goto invalid;
			
 
				++		break;
			
 
				++	/* Function return */
			
 
				++	case BPF_JMP | BPF_EXIT:
			
 
				++		/*
			
 
				++		 * Optimization: when last instruction is EXIT
			
 
				++		 * simply continue to epilogue.
			
 
				++		 */
			
 
				++		if (ctx->bpf_index == ctx->program->len - 1)
			
 
				++			break;
			
 
				++		if (emit_exit(ctx) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++
			
 
				++	default:
			
 
				++invalid:
			
 
				++		pr_err_once("unknown opcode %02x\n", code);
			
 
				++		return -EINVAL;
			
 
				++notyet:
			
 
				++		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
			
 
				++		return -EFAULT;
			
 
				++toofar:
			
 
				++		pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
			
 
				++			     ctx->bpf_index, code);
			
 
				++		return -E2BIG;
			
 
				++	}
			
 
				++	return 0;
			
 
				++}
			
--- a/target/linux/generic/backport-6.1/050-v5.16-03-mips-bpf-Add-new-eBPF-JIT-for-64-bit-MIPS.patch
+++ b/target/linux/generic/backport-6.1/050-v5.16-03-mips-bpf-Add-new-eBPF-JIT-for-64-bit-MIPS.patch
@@ -0,0 +1,1005 @@
 
				+From: Johan Almbladh <[email protected]>
			
 
				+Date: Tue, 5 Oct 2021 18:54:05 +0200
			
 
				+Subject: [PATCH] mips: bpf: Add new eBPF JIT for 64-bit MIPS
			
 
				+
			
 
				+This is an implementation on of an eBPF JIT for 64-bit MIPS III-V and
			
 
				+MIPS64r1-r6. It uses the same framework introduced by the 32-bit JIT.
			
 
				+
			
 
				+Signed-off-by: Johan Almbladh <[email protected]>
			
 
				+---
			
 
				+ create mode 100644 arch/mips/net/bpf_jit_comp64.c
			
 
				+
			
 
				+--- /dev/null
			
 
				++++ b/arch/mips/net/bpf_jit_comp64.c
			
 
				+@@ -0,0 +1,991 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-only
			
 
				++/*
			
 
				++ * Just-In-Time compiler for eBPF bytecode on MIPS.
			
 
				++ * Implementation of JIT functions for 64-bit CPUs.
			
 
				++ *
			
 
				++ * Copyright (c) 2021 Anyfi Networks AB.
			
 
				++ * Author: Johan Almbladh <[email protected]>
			
 
				++ *
			
 
				++ * Based on code and ideas from
			
 
				++ * Copyright (c) 2017 Cavium, Inc.
			
 
				++ * Copyright (c) 2017 Shubham Bansal <[email protected]>
			
 
				++ * Copyright (c) 2011 Mircea Gherzan <[email protected]>
			
 
				++ */
			
 
				++
			
 
				++#include <linux/errno.h>
			
 
				++#include <linux/filter.h>
			
 
				++#include <linux/bpf.h>
			
 
				++#include <asm/cpu-features.h>
			
 
				++#include <asm/isa-rev.h>
			
 
				++#include <asm/uasm.h>
			
 
				++
			
 
				++#include "bpf_jit_comp.h"
			
 
				++
			
 
				++/* MIPS t0-t3 are not available in the n64 ABI */
			
 
				++#undef MIPS_R_T0
			
 
				++#undef MIPS_R_T1
			
 
				++#undef MIPS_R_T2
			
 
				++#undef MIPS_R_T3
			
 
				++
			
 
				++/* Stack is 16-byte aligned in n64 ABI */
			
 
				++#define MIPS_STACK_ALIGNMENT 16
			
 
				++
			
 
				++/* Extra 64-bit eBPF registers used by JIT */
			
 
				++#define JIT_REG_TC (MAX_BPF_JIT_REG + 0)
			
 
				++#define JIT_REG_ZX (MAX_BPF_JIT_REG + 1)
			
 
				++
			
 
				++/* Number of prologue bytes to skip when doing a tail call */
			
 
				++#define JIT_TCALL_SKIP 4
			
 
				++
			
 
				++/* Callee-saved CPU registers that the JIT must preserve */
			
 
				++#define JIT_CALLEE_REGS   \
			
 
				++	(BIT(MIPS_R_S0) | \
			
 
				++	 BIT(MIPS_R_S1) | \
			
 
				++	 BIT(MIPS_R_S2) | \
			
 
				++	 BIT(MIPS_R_S3) | \
			
 
				++	 BIT(MIPS_R_S4) | \
			
 
				++	 BIT(MIPS_R_S5) | \
			
 
				++	 BIT(MIPS_R_S6) | \
			
 
				++	 BIT(MIPS_R_S7) | \
			
 
				++	 BIT(MIPS_R_GP) | \
			
 
				++	 BIT(MIPS_R_FP) | \
			
 
				++	 BIT(MIPS_R_RA))
			
 
				++
			
 
				++/* Caller-saved CPU registers available for JIT use */
			
 
				++#define JIT_CALLER_REGS	  \
			
 
				++	(BIT(MIPS_R_A5) | \
			
 
				++	 BIT(MIPS_R_A6) | \
			
 
				++	 BIT(MIPS_R_A7))
			
 
				++/*
			
 
				++ * Mapping of 64-bit eBPF registers to 64-bit native MIPS registers.
			
 
				++ * MIPS registers t4 - t7 may be used by the JIT as temporary registers.
			
 
				++ * MIPS registers t8 - t9 are reserved for single-register common functions.
			
 
				++ */
			
 
				++static const u8 bpf2mips64[] = {
			
 
				++	/* Return value from in-kernel function, and exit value from eBPF */
			
 
				++	[BPF_REG_0] = MIPS_R_V0,
			
 
				++	/* Arguments from eBPF program to in-kernel function */
			
 
				++	[BPF_REG_1] = MIPS_R_A0,
			
 
				++	[BPF_REG_2] = MIPS_R_A1,
			
 
				++	[BPF_REG_3] = MIPS_R_A2,
			
 
				++	[BPF_REG_4] = MIPS_R_A3,
			
 
				++	[BPF_REG_5] = MIPS_R_A4,
			
 
				++	/* Callee-saved registers that in-kernel function will preserve */
			
 
				++	[BPF_REG_6] = MIPS_R_S0,
			
 
				++	[BPF_REG_7] = MIPS_R_S1,
			
 
				++	[BPF_REG_8] = MIPS_R_S2,
			
 
				++	[BPF_REG_9] = MIPS_R_S3,
			
 
				++	/* Read-only frame pointer to access the eBPF stack */
			
 
				++	[BPF_REG_FP] = MIPS_R_FP,
			
 
				++	/* Temporary register for blinding constants */
			
 
				++	[BPF_REG_AX] = MIPS_R_AT,
			
 
				++	/* Tail call count register, caller-saved */
			
 
				++	[JIT_REG_TC] = MIPS_R_A5,
			
 
				++	/* Constant for register zero-extension */
			
 
				++	[JIT_REG_ZX] = MIPS_R_V1,
			
 
				++};
			
 
				++
			
 
				++/*
			
 
				++ * MIPS 32-bit operations on 64-bit registers generate a sign-extended
			
 
				++ * result. However, the eBPF ISA mandates zero-extension, so we rely on the
			
 
				++ * verifier to add that for us (emit_zext_ver). In addition, ALU arithmetic
			
 
				++ * operations, right shift and byte swap require properly sign-extended
			
 
				++ * operands or the result is unpredictable. We emit explicit sign-extensions
			
 
				++ * in those cases.
			
 
				++ */
			
 
				++
			
 
				++/* Sign extension */
			
 
				++static void emit_sext(struct jit_context *ctx, u8 dst, u8 src)
			
 
				++{
			
 
				++	emit(ctx, sll, dst, src, 0);
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Zero extension */
			
 
				++static void emit_zext(struct jit_context *ctx, u8 dst)
			
 
				++{
			
 
				++	if (cpu_has_mips64r2 || cpu_has_mips64r6) {
			
 
				++		emit(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
			
 
				++	} else {
			
 
				++		emit(ctx, and, dst, dst, bpf2mips64[JIT_REG_ZX]);
			
 
				++		access_reg(ctx, JIT_REG_ZX); /* We need the ZX register */
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Zero extension, if verifier does not do it for us  */
			
 
				++static void emit_zext_ver(struct jit_context *ctx, u8 dst)
			
 
				++{
			
 
				++	if (!ctx->program->aux->verifier_zext)
			
 
				++		emit_zext(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* dst = imm (64-bit) */
			
 
				++static void emit_mov_i64(struct jit_context *ctx, u8 dst, u64 imm64)
			
 
				++{
			
 
				++	if (imm64 >= 0xffffffffffff8000ULL || imm64 < 0x8000ULL) {
			
 
				++		emit(ctx, daddiu, dst, MIPS_R_ZERO, (s16)imm64);
			
 
				++	} else if (imm64 >= 0xffffffff80000000ULL ||
			
 
				++		   (imm64 < 0x80000000 && imm64 > 0xffff)) {
			
 
				++		emit(ctx, lui, dst, (s16)(imm64 >> 16));
			
 
				++		emit(ctx, ori, dst, dst, (u16)imm64 & 0xffff);
			
 
				++	} else {
			
 
				++		u8 acc = MIPS_R_ZERO;
			
 
				++		int k;
			
 
				++
			
 
				++		for (k = 0; k < 4; k++) {
			
 
				++			u16 half = imm64 >> (48 - 16 * k);
			
 
				++
			
 
				++			if (acc == dst)
			
 
				++				emit(ctx, dsll, dst, dst, 16);
			
 
				++
			
 
				++			if (half) {
			
 
				++				emit(ctx, ori, dst, acc, half);
			
 
				++				acc = dst;
			
 
				++			}
			
 
				++		}
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* ALU immediate operation (64-bit) */
			
 
				++static void emit_alu_i64(struct jit_context *ctx, u8 dst, s32 imm, u8 op)
			
 
				++{
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	/* dst = dst | imm */
			
 
				++	case BPF_OR:
			
 
				++		emit(ctx, ori, dst, dst, (u16)imm);
			
 
				++		break;
			
 
				++	/* dst = dst ^ imm */
			
 
				++	case BPF_XOR:
			
 
				++		emit(ctx, xori, dst, dst, (u16)imm);
			
 
				++		break;
			
 
				++	/* dst = -dst */
			
 
				++	case BPF_NEG:
			
 
				++		emit(ctx, dsubu, dst, MIPS_R_ZERO, dst);
			
 
				++		break;
			
 
				++	/* dst = dst << imm */
			
 
				++	case BPF_LSH:
			
 
				++		emit(ctx, dsll_safe, dst, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = dst >> imm */
			
 
				++	case BPF_RSH:
			
 
				++		emit(ctx, dsrl_safe, dst, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = dst >> imm (arithmetic) */
			
 
				++	case BPF_ARSH:
			
 
				++		emit(ctx, dsra_safe, dst, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = dst + imm */
			
 
				++	case BPF_ADD:
			
 
				++		emit(ctx, daddiu, dst, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = dst - imm */
			
 
				++	case BPF_SUB:
			
 
				++		emit(ctx, daddiu, dst, dst, -imm);
			
 
				++		break;
			
 
				++	default:
			
 
				++		/* Width-generic operations */
			
 
				++		emit_alu_i(ctx, dst, imm, op);
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* ALU register operation (64-bit) */
			
 
				++static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
			
 
				++{
			
 
				++	switch (BPF_OP(op)) {
			
 
				++	/* dst = dst << src */
			
 
				++	case BPF_LSH:
			
 
				++		emit(ctx, dsllv, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst >> src */
			
 
				++	case BPF_RSH:
			
 
				++		emit(ctx, dsrlv, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst >> src (arithmetic) */
			
 
				++	case BPF_ARSH:
			
 
				++		emit(ctx, dsrav, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst + src */
			
 
				++	case BPF_ADD:
			
 
				++		emit(ctx, daddu, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst - src */
			
 
				++	case BPF_SUB:
			
 
				++		emit(ctx, dsubu, dst, dst, src);
			
 
				++		break;
			
 
				++	/* dst = dst * src */
			
 
				++	case BPF_MUL:
			
 
				++		if (cpu_has_mips64r6) {
			
 
				++			emit(ctx, dmulu, dst, dst, src);
			
 
				++		} else {
			
 
				++			emit(ctx, dmultu, dst, src);
			
 
				++			emit(ctx, mflo, dst);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* dst = dst / src */
			
 
				++	case BPF_DIV:
			
 
				++		if (cpu_has_mips64r6) {
			
 
				++			emit(ctx, ddivu_r6, dst, dst, src);
			
 
				++		} else {
			
 
				++			emit(ctx, ddivu, dst, src);
			
 
				++			emit(ctx, mflo, dst);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* dst = dst % src */
			
 
				++	case BPF_MOD:
			
 
				++		if (cpu_has_mips64r6) {
			
 
				++			emit(ctx, dmodu, dst, dst, src);
			
 
				++		} else {
			
 
				++			emit(ctx, ddivu, dst, src);
			
 
				++			emit(ctx, mfhi, dst);
			
 
				++		}
			
 
				++		break;
			
 
				++	default:
			
 
				++		/* Width-generic operations */
			
 
				++		emit_alu_r(ctx, dst, src, op);
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Swap sub words in a register double word */
			
 
				++static void emit_swap_r64(struct jit_context *ctx, u8 dst, u8 mask, u32 bits)
			
 
				++{
			
 
				++	u8 tmp = MIPS_R_T9;
			
 
				++
			
 
				++	emit(ctx, and, tmp, dst, mask);  /* tmp = dst & mask  */
			
 
				++	emit(ctx, dsll, tmp, tmp, bits); /* tmp = tmp << bits */
			
 
				++	emit(ctx, dsrl, dst, dst, bits); /* dst = dst >> bits */
			
 
				++	emit(ctx, and, dst, dst, mask);  /* dst = dst & mask  */
			
 
				++	emit(ctx, or, dst, dst, tmp);    /* dst = dst | tmp   */
			
 
				++}
			
 
				++
			
 
				++/* Swap bytes and truncate a register double word, word or half word */
			
 
				++static void emit_bswap_r64(struct jit_context *ctx, u8 dst, u32 width)
			
 
				++{
			
 
				++	switch (width) {
			
 
				++	/* Swap bytes in a double word */
			
 
				++	case 64:
			
 
				++		if (cpu_has_mips64r2 || cpu_has_mips64r6) {
			
 
				++			emit(ctx, dsbh, dst, dst);
			
 
				++			emit(ctx, dshd, dst, dst);
			
 
				++		} else {
			
 
				++			u8 t1 = MIPS_R_T6;
			
 
				++			u8 t2 = MIPS_R_T7;
			
 
				++
			
 
				++			emit(ctx, dsll32, t2, dst, 0);  /* t2 = dst << 32    */
			
 
				++			emit(ctx, dsrl32, dst, dst, 0); /* dst = dst >> 32   */
			
 
				++			emit(ctx, or, dst, dst, t2);    /* dst = dst | t2    */
			
 
				++
			
 
				++			emit(ctx, ori, t2, MIPS_R_ZERO, 0xffff);
			
 
				++			emit(ctx, dsll32, t1, t2, 0);   /* t1 = t2 << 32     */
			
 
				++			emit(ctx, or, t1, t1, t2);      /* t1 = t1 | t2      */
			
 
				++			emit_swap_r64(ctx, dst, t1, 16);/* dst = swap16(dst) */
			
 
				++
			
 
				++			emit(ctx, lui, t2, 0xff);       /* t2 = 0x00ff0000   */
			
 
				++			emit(ctx, ori, t2, t2, 0xff);   /* t2 = t2 | 0x00ff  */
			
 
				++			emit(ctx, dsll32, t1, t2, 0);   /* t1 = t2 << 32     */
			
 
				++			emit(ctx, or, t1, t1, t2);      /* t1 = t1 | t2      */
			
 
				++			emit_swap_r64(ctx, dst, t1, 8); /* dst = swap8(dst)  */
			
 
				++		}
			
 
				++		break;
			
 
				++	/* Swap bytes in a half word */
			
 
				++	/* Swap bytes in a word */
			
 
				++	case 32:
			
 
				++	case 16:
			
 
				++		emit_sext(ctx, dst, dst);
			
 
				++		emit_bswap_r(ctx, dst, width);
			
 
				++		if (cpu_has_mips64r2 || cpu_has_mips64r6)
			
 
				++			emit_zext(ctx, dst);
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Truncate a register double word, word or half word */
			
 
				++static void emit_trunc_r64(struct jit_context *ctx, u8 dst, u32 width)
			
 
				++{
			
 
				++	switch (width) {
			
 
				++	case 64:
			
 
				++		break;
			
 
				++	/* Zero-extend a word */
			
 
				++	case 32:
			
 
				++		emit_zext(ctx, dst);
			
 
				++		break;
			
 
				++	/* Zero-extend a half word */
			
 
				++	case 16:
			
 
				++		emit(ctx, andi, dst, dst, 0xffff);
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Load operation: dst = *(size*)(src + off) */
			
 
				++static void emit_ldx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
			
 
				++{
			
 
				++	switch (size) {
			
 
				++	/* Load a byte */
			
 
				++	case BPF_B:
			
 
				++		emit(ctx, lbu, dst, off, src);
			
 
				++		break;
			
 
				++	/* Load a half word */
			
 
				++	case BPF_H:
			
 
				++		emit(ctx, lhu, dst, off, src);
			
 
				++		break;
			
 
				++	/* Load a word */
			
 
				++	case BPF_W:
			
 
				++		emit(ctx, lwu, dst, off, src);
			
 
				++		break;
			
 
				++	/* Load a double word */
			
 
				++	case BPF_DW:
			
 
				++		emit(ctx, ld, dst, off, src);
			
 
				++		break;
			
 
				++	}
			
 
				++	clobber_reg(ctx, dst);
			
 
				++}
			
 
				++
			
 
				++/* Store operation: *(size *)(dst + off) = src */
			
 
				++static void emit_stx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size)
			
 
				++{
			
 
				++	switch (size) {
			
 
				++	/* Store a byte */
			
 
				++	case BPF_B:
			
 
				++		emit(ctx, sb, src, off, dst);
			
 
				++		break;
			
 
				++	/* Store a half word */
			
 
				++	case BPF_H:
			
 
				++		emit(ctx, sh, src, off, dst);
			
 
				++		break;
			
 
				++	/* Store a word */
			
 
				++	case BPF_W:
			
 
				++		emit(ctx, sw, src, off, dst);
			
 
				++		break;
			
 
				++	/* Store a double word */
			
 
				++	case BPF_DW:
			
 
				++		emit(ctx, sd, src, off, dst);
			
 
				++		break;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++/* Atomic read-modify-write */
			
 
				++static void emit_atomic_r64(struct jit_context *ctx,
			
 
				++			    u8 dst, u8 src, s16 off, u8 code)
			
 
				++{
			
 
				++	u8 t1 = MIPS_R_T6;
			
 
				++	u8 t2 = MIPS_R_T7;
			
 
				++
			
 
				++	emit(ctx, lld, t1, off, dst);
			
 
				++	switch (code) {
			
 
				++	case BPF_ADD:
			
 
				++		emit(ctx, daddu, t2, t1, src);
			
 
				++		break;
			
 
				++	case BPF_AND:
			
 
				++		emit(ctx, and, t2, t1, src);
			
 
				++		break;
			
 
				++	case BPF_OR:
			
 
				++		emit(ctx, or, t2, t1, src);
			
 
				++		break;
			
 
				++	case BPF_XOR:
			
 
				++		emit(ctx, xor, t2, t1, src);
			
 
				++		break;
			
 
				++	}
			
 
				++	emit(ctx, scd, t2, off, dst);
			
 
				++	emit(ctx, beqz, t2, -16);
			
 
				++	emit(ctx, nop); /* Delay slot */
			
 
				++}
			
 
				++
			
 
				++/* Function call */
			
 
				++static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
			
 
				++{
			
 
				++	u8 zx = bpf2mips64[JIT_REG_ZX];
			
 
				++	u8 tmp = MIPS_R_T6;
			
 
				++	bool fixed;
			
 
				++	u64 addr;
			
 
				++
			
 
				++	/* Decode the call address */
			
 
				++	if (bpf_jit_get_func_addr(ctx->program, insn, false,
			
 
				++				  &addr, &fixed) < 0)
			
 
				++		return -1;
			
 
				++	if (!fixed)
			
 
				++		return -1;
			
 
				++
			
 
				++	/* Push caller-saved registers on stack */
			
 
				++	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
			
 
				++
			
 
				++	/* Emit function call */
			
 
				++	emit_mov_i64(ctx, tmp, addr);
			
 
				++	emit(ctx, jalr, MIPS_R_RA, tmp);
			
 
				++	emit(ctx, nop); /* Delay slot */
			
 
				++
			
 
				++	/* Restore caller-saved registers */
			
 
				++	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
			
 
				++
			
 
				++	/* Re-initialize the JIT zero-extension register if accessed */
			
 
				++	if (ctx->accessed & BIT(JIT_REG_ZX)) {
			
 
				++		emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
			
 
				++		emit(ctx, dsrl32, zx, zx, 0);
			
 
				++	}
			
 
				++
			
 
				++	clobber_reg(ctx, MIPS_R_RA);
			
 
				++	clobber_reg(ctx, MIPS_R_V0);
			
 
				++	clobber_reg(ctx, MIPS_R_V1);
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++/* Function tail call */
			
 
				++static int emit_tail_call(struct jit_context *ctx)
			
 
				++{
			
 
				++	u8 ary = bpf2mips64[BPF_REG_2];
			
 
				++	u8 ind = bpf2mips64[BPF_REG_3];
			
 
				++	u8 tcc = bpf2mips64[JIT_REG_TC];
			
 
				++	u8 tmp = MIPS_R_T6;
			
 
				++	int off;
			
 
				++
			
 
				++	/*
			
 
				++	 * Tail call:
			
 
				++	 * eBPF R1 - function argument (context ptr), passed in a0-a1
			
 
				++	 * eBPF R2 - ptr to object with array of function entry points
			
 
				++	 * eBPF R3 - array index of function to be called
			
 
				++	 */
			
 
				++
			
 
				++	/* if (ind >= ary->map.max_entries) goto out */
			
 
				++	off = offsetof(struct bpf_array, map.max_entries);
			
 
				++	if (off > 0x7fff)
			
 
				++		return -1;
			
 
				++	emit(ctx, lwu, tmp, off, ary);            /* tmp = ary->map.max_entrs*/
			
 
				++	emit(ctx, sltu, tmp, ind, tmp);           /* tmp = ind < t1          */
			
 
				++	emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
			
 
				++
			
 
				++	/* if (--TCC < 0) goto out */
			
 
				++	emit(ctx, daddiu, tcc, tcc, -1);          /* tcc-- (delay slot)      */
			
 
				++	emit(ctx, bltz, tcc, get_offset(ctx, 1)); /* PC += off(1) if tcc < 0 */
			
 
				++						  /* (next insn delay slot)  */
			
 
				++	/* prog = ary->ptrs[ind] */
			
 
				++	off = offsetof(struct bpf_array, ptrs);
			
 
				++	if (off > 0x7fff)
			
 
				++		return -1;
			
 
				++	emit(ctx, dsll, tmp, ind, 3);             /* tmp = ind << 3          */
			
 
				++	emit(ctx, daddu, tmp, tmp, ary);          /* tmp += ary              */
			
 
				++	emit(ctx, ld, tmp, off, tmp);             /* tmp = *(tmp + off)      */
			
 
				++
			
 
				++	/* if (prog == 0) goto out */
			
 
				++	emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/
			
 
				++	emit(ctx, nop);                           /* Delay slot              */
			
 
				++
			
 
				++	/* func = prog->bpf_func + 8 (prologue skip offset) */
			
 
				++	off = offsetof(struct bpf_prog, bpf_func);
			
 
				++	if (off > 0x7fff)
			
 
				++		return -1;
			
 
				++	emit(ctx, ld, tmp, off, tmp);                /* tmp = *(tmp + off)   */
			
 
				++	emit(ctx, daddiu, tmp, tmp, JIT_TCALL_SKIP); /* tmp += skip (4)      */
			
 
				++
			
 
				++	/* goto func */
			
 
				++	build_epilogue(ctx, tmp);
			
 
				++	access_reg(ctx, JIT_REG_TC);
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++/*
			
 
				++ * Stack frame layout for a JITed program (stack grows down).
			
 
				++ *
			
 
				++ * Higher address  : Previous stack frame      :
			
 
				++ *                 +===========================+  <--- MIPS sp before call
			
 
				++ *                 | Callee-saved registers,   |
			
 
				++ *                 | including RA and FP       |
			
 
				++ *                 +---------------------------+  <--- eBPF FP (MIPS fp)
			
 
				++ *                 | Local eBPF variables      |
			
 
				++ *                 | allocated by program      |
			
 
				++ *                 +---------------------------+
			
 
				++ *                 | Reserved for caller-saved |
			
 
				++ *                 | registers                 |
			
 
				++ * Lower address   +===========================+  <--- MIPS sp
			
 
				++ */
			
 
				++
			
 
				++/* Build program prologue to set up the stack and registers */
			
 
				++void build_prologue(struct jit_context *ctx)
			
 
				++{
			
 
				++	u8 fp = bpf2mips64[BPF_REG_FP];
			
 
				++	u8 tc = bpf2mips64[JIT_REG_TC];
			
 
				++	u8 zx = bpf2mips64[JIT_REG_ZX];
			
 
				++	int stack, saved, locals, reserved;
			
 
				++
			
 
				++	/*
			
 
				++	 * The first instruction initializes the tail call count register.
			
 
				++	 * On a tail call, the calling function jumps into the prologue
			
 
				++	 * after this instruction.
			
 
				++	 */
			
 
				++	emit(ctx, addiu, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT + 1, 0xffff));
			
 
				++
			
 
				++	/* === Entry-point for tail calls === */
			
 
				++
			
 
				++	/*
			
 
				++	 * If the eBPF frame pointer and tail call count registers were
			
 
				++	 * accessed they must be preserved. Mark them as clobbered here
			
 
				++	 * to save and restore them on the stack as needed.
			
 
				++	 */
			
 
				++	if (ctx->accessed & BIT(BPF_REG_FP))
			
 
				++		clobber_reg(ctx, fp);
			
 
				++	if (ctx->accessed & BIT(JIT_REG_TC))
			
 
				++		clobber_reg(ctx, tc);
			
 
				++	if (ctx->accessed & BIT(JIT_REG_ZX))
			
 
				++		clobber_reg(ctx, zx);
			
 
				++
			
 
				++	/* Compute the stack space needed for callee-saved registers */
			
 
				++	saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u64);
			
 
				++	saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
			
 
				++
			
 
				++	/* Stack space used by eBPF program local data */
			
 
				++	locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
			
 
				++
			
 
				++	/*
			
 
				++	 * If we are emitting function calls, reserve extra stack space for
			
 
				++	 * caller-saved registers needed by the JIT. The required space is
			
 
				++	 * computed automatically during resource usage discovery (pass 1).
			
 
				++	 */
			
 
				++	reserved = ctx->stack_used;
			
 
				++
			
 
				++	/* Allocate the stack frame */
			
 
				++	stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
			
 
				++	if (stack)
			
 
				++		emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack);
			
 
				++
			
 
				++	/* Store callee-saved registers on stack */
			
 
				++	push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
			
 
				++
			
 
				++	/* Initialize the eBPF frame pointer if accessed */
			
 
				++	if (ctx->accessed & BIT(BPF_REG_FP))
			
 
				++		emit(ctx, daddiu, fp, MIPS_R_SP, stack - saved);
			
 
				++
			
 
				++	/* Initialize the ePF JIT zero-extension register if accessed */
			
 
				++	if (ctx->accessed & BIT(JIT_REG_ZX)) {
			
 
				++		emit(ctx, daddiu, zx, MIPS_R_ZERO, -1);
			
 
				++		emit(ctx, dsrl32, zx, zx, 0);
			
 
				++	}
			
 
				++
			
 
				++	ctx->saved_size = saved;
			
 
				++	ctx->stack_size = stack;
			
 
				++}
			
 
				++
			
 
				++/* Build the program epilogue to restore the stack and registers */
			
 
				++void build_epilogue(struct jit_context *ctx, int dest_reg)
			
 
				++{
			
 
				++	/* Restore callee-saved registers from stack */
			
 
				++	pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
			
 
				++		 ctx->stack_size - ctx->saved_size);
			
 
				++
			
 
				++	/* Release the stack frame */
			
 
				++	if (ctx->stack_size)
			
 
				++		emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
			
 
				++
			
 
				++	/* Jump to return address and sign-extend the 32-bit return value */
			
 
				++	emit(ctx, jr, dest_reg);
			
 
				++	emit(ctx, sll, MIPS_R_V0, MIPS_R_V0, 0); /* Delay slot */
			
 
				++}
			
 
				++
			
 
				++/* Build one eBPF instruction */
			
 
				++int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
			
 
				++{
			
 
				++	u8 dst = bpf2mips64[insn->dst_reg];
			
 
				++	u8 src = bpf2mips64[insn->src_reg];
			
 
				++	u8 code = insn->code;
			
 
				++	s16 off = insn->off;
			
 
				++	s32 imm = insn->imm;
			
 
				++	s32 val, rel;
			
 
				++	u8 alu, jmp;
			
 
				++
			
 
				++	switch (code) {
			
 
				++	/* ALU operations */
			
 
				++	/* dst = imm */
			
 
				++	case BPF_ALU | BPF_MOV | BPF_K:
			
 
				++		emit_mov_i(ctx, dst, imm);
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = src */
			
 
				++	case BPF_ALU | BPF_MOV | BPF_X:
			
 
				++		if (imm == 1) {
			
 
				++			/* Special mov32 for zext */
			
 
				++			emit_zext(ctx, dst);
			
 
				++		} else {
			
 
				++			emit_mov_r(ctx, dst, src);
			
 
				++			emit_zext_ver(ctx, dst);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* dst = -dst */
			
 
				++	case BPF_ALU | BPF_NEG:
			
 
				++		emit_sext(ctx, dst, dst);
			
 
				++		emit_alu_i(ctx, dst, 0, BPF_NEG);
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = dst & imm */
			
 
				++	/* dst = dst | imm */
			
 
				++	/* dst = dst ^ imm */
			
 
				++	/* dst = dst << imm */
			
 
				++	case BPF_ALU | BPF_OR | BPF_K:
			
 
				++	case BPF_ALU | BPF_AND | BPF_K:
			
 
				++	case BPF_ALU | BPF_XOR | BPF_K:
			
 
				++	case BPF_ALU | BPF_LSH | BPF_K:
			
 
				++		if (!valid_alu_i(BPF_OP(code), imm)) {
			
 
				++			emit_mov_i(ctx, MIPS_R_T4, imm);
			
 
				++			emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
			
 
				++		} else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
			
 
				++			emit_alu_i(ctx, dst, val, alu);
			
 
				++		}
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = dst >> imm */
			
 
				++	/* dst = dst >> imm (arithmetic) */
			
 
				++	/* dst = dst + imm */
			
 
				++	/* dst = dst - imm */
			
 
				++	/* dst = dst * imm */
			
 
				++	/* dst = dst / imm */
			
 
				++	/* dst = dst % imm */
			
 
				++	case BPF_ALU | BPF_RSH | BPF_K:
			
 
				++	case BPF_ALU | BPF_ARSH | BPF_K:
			
 
				++	case BPF_ALU | BPF_ADD | BPF_K:
			
 
				++	case BPF_ALU | BPF_SUB | BPF_K:
			
 
				++	case BPF_ALU | BPF_MUL | BPF_K:
			
 
				++	case BPF_ALU | BPF_DIV | BPF_K:
			
 
				++	case BPF_ALU | BPF_MOD | BPF_K:
			
 
				++		if (!valid_alu_i(BPF_OP(code), imm)) {
			
 
				++			emit_sext(ctx, dst, dst);
			
 
				++			emit_mov_i(ctx, MIPS_R_T4, imm);
			
 
				++			emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
			
 
				++		} else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
			
 
				++			emit_sext(ctx, dst, dst);
			
 
				++			emit_alu_i(ctx, dst, val, alu);
			
 
				++		}
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = dst & src */
			
 
				++	/* dst = dst | src */
			
 
				++	/* dst = dst ^ src */
			
 
				++	/* dst = dst << src */
			
 
				++	case BPF_ALU | BPF_AND | BPF_X:
			
 
				++	case BPF_ALU | BPF_OR | BPF_X:
			
 
				++	case BPF_ALU | BPF_XOR | BPF_X:
			
 
				++	case BPF_ALU | BPF_LSH | BPF_X:
			
 
				++		emit_alu_r(ctx, dst, src, BPF_OP(code));
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = dst >> src */
			
 
				++	/* dst = dst >> src (arithmetic) */
			
 
				++	/* dst = dst + src */
			
 
				++	/* dst = dst - src */
			
 
				++	/* dst = dst * src */
			
 
				++	/* dst = dst / src */
			
 
				++	/* dst = dst % src */
			
 
				++	case BPF_ALU | BPF_RSH | BPF_X:
			
 
				++	case BPF_ALU | BPF_ARSH | BPF_X:
			
 
				++	case BPF_ALU | BPF_ADD | BPF_X:
			
 
				++	case BPF_ALU | BPF_SUB | BPF_X:
			
 
				++	case BPF_ALU | BPF_MUL | BPF_X:
			
 
				++	case BPF_ALU | BPF_DIV | BPF_X:
			
 
				++	case BPF_ALU | BPF_MOD | BPF_X:
			
 
				++		emit_sext(ctx, dst, dst);
			
 
				++		emit_sext(ctx, MIPS_R_T4, src);
			
 
				++		emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code));
			
 
				++		emit_zext_ver(ctx, dst);
			
 
				++		break;
			
 
				++	/* dst = imm (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_MOV | BPF_K:
			
 
				++		emit_mov_i(ctx, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = src (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_MOV | BPF_X:
			
 
				++		emit_mov_r(ctx, dst, src);
			
 
				++		break;
			
 
				++	/* dst = -dst (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_NEG:
			
 
				++		emit_alu_i64(ctx, dst, 0, BPF_NEG);
			
 
				++		break;
			
 
				++	/* dst = dst & imm (64-bit) */
			
 
				++	/* dst = dst | imm (64-bit) */
			
 
				++	/* dst = dst ^ imm (64-bit) */
			
 
				++	/* dst = dst << imm (64-bit) */
			
 
				++	/* dst = dst >> imm (64-bit) */
			
 
				++	/* dst = dst >> imm ((64-bit, arithmetic) */
			
 
				++	/* dst = dst + imm (64-bit) */
			
 
				++	/* dst = dst - imm (64-bit) */
			
 
				++	/* dst = dst * imm (64-bit) */
			
 
				++	/* dst = dst / imm (64-bit) */
			
 
				++	/* dst = dst % imm (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_AND | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_OR | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_XOR | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_LSH | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_RSH | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_ARSH | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_ADD | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_SUB | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_MUL | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_DIV | BPF_K:
			
 
				++	case BPF_ALU64 | BPF_MOD | BPF_K:
			
 
				++		if (!valid_alu_i(BPF_OP(code), imm)) {
			
 
				++			emit_mov_i(ctx, MIPS_R_T4, imm);
			
 
				++			emit_alu_r64(ctx, dst, MIPS_R_T4, BPF_OP(code));
			
 
				++		} else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
			
 
				++			emit_alu_i64(ctx, dst, val, alu);
			
 
				++		}
			
 
				++		break;
			
 
				++	/* dst = dst & src (64-bit) */
			
 
				++	/* dst = dst | src (64-bit) */
			
 
				++	/* dst = dst ^ src (64-bit) */
			
 
				++	/* dst = dst << src (64-bit) */
			
 
				++	/* dst = dst >> src (64-bit) */
			
 
				++	/* dst = dst >> src (64-bit, arithmetic) */
			
 
				++	/* dst = dst + src (64-bit) */
			
 
				++	/* dst = dst - src (64-bit) */
			
 
				++	/* dst = dst * src (64-bit) */
			
 
				++	/* dst = dst / src (64-bit) */
			
 
				++	/* dst = dst % src (64-bit) */
			
 
				++	case BPF_ALU64 | BPF_AND | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_OR | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_XOR | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_LSH | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_RSH | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_ARSH | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_ADD | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_SUB | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_MUL | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_DIV | BPF_X:
			
 
				++	case BPF_ALU64 | BPF_MOD | BPF_X:
			
 
				++		emit_alu_r64(ctx, dst, src, BPF_OP(code));
			
 
				++		break;
			
 
				++	/* dst = htole(dst) */
			
 
				++	/* dst = htobe(dst) */
			
 
				++	case BPF_ALU | BPF_END | BPF_FROM_LE:
			
 
				++	case BPF_ALU | BPF_END | BPF_FROM_BE:
			
 
				++		if (BPF_SRC(code) ==
			
 
				++#ifdef __BIG_ENDIAN
			
 
				++		    BPF_FROM_LE
			
 
				++#else
			
 
				++		    BPF_FROM_BE
			
 
				++#endif
			
 
				++		    )
			
 
				++			emit_bswap_r64(ctx, dst, imm);
			
 
				++		else
			
 
				++			emit_trunc_r64(ctx, dst, imm);
			
 
				++		break;
			
 
				++	/* dst = imm64 */
			
 
				++	case BPF_LD | BPF_IMM | BPF_DW:
			
 
				++		emit_mov_i64(ctx, dst, (u32)imm | ((u64)insn[1].imm << 32));
			
 
				++		return 1;
			
 
				++	/* LDX: dst = *(size *)(src + off) */
			
 
				++	case BPF_LDX | BPF_MEM | BPF_W:
			
 
				++	case BPF_LDX | BPF_MEM | BPF_H:
			
 
				++	case BPF_LDX | BPF_MEM | BPF_B:
			
 
				++	case BPF_LDX | BPF_MEM | BPF_DW:
			
 
				++		emit_ldx(ctx, dst, src, off, BPF_SIZE(code));
			
 
				++		break;
			
 
				++	/* ST: *(size *)(dst + off) = imm */
			
 
				++	case BPF_ST | BPF_MEM | BPF_W:
			
 
				++	case BPF_ST | BPF_MEM | BPF_H:
			
 
				++	case BPF_ST | BPF_MEM | BPF_B:
			
 
				++	case BPF_ST | BPF_MEM | BPF_DW:
			
 
				++		emit_mov_i(ctx, MIPS_R_T4, imm);
			
 
				++		emit_stx(ctx, dst, MIPS_R_T4, off, BPF_SIZE(code));
			
 
				++		break;
			
 
				++	/* STX: *(size *)(dst + off) = src */
			
 
				++	case BPF_STX | BPF_MEM | BPF_W:
			
 
				++	case BPF_STX | BPF_MEM | BPF_H:
			
 
				++	case BPF_STX | BPF_MEM | BPF_B:
			
 
				++	case BPF_STX | BPF_MEM | BPF_DW:
			
 
				++		emit_stx(ctx, dst, src, off, BPF_SIZE(code));
			
 
				++		break;
			
 
				++	/* Speculation barrier */
			
 
				++	case BPF_ST | BPF_NOSPEC:
			
 
				++		break;
			
 
				++	/* Atomics */
			
 
				++	case BPF_STX | BPF_XADD | BPF_W:
			
 
				++	case BPF_STX | BPF_XADD | BPF_DW:
			
 
				++		switch (imm) {
			
 
				++		case BPF_ADD:
			
 
				++		case BPF_AND:
			
 
				++		case BPF_OR:
			
 
				++		case BPF_XOR:
			
 
				++			if (BPF_SIZE(code) == BPF_DW) {
			
 
				++				emit_atomic_r64(ctx, dst, src, off, imm);
			
 
				++			} else { /* 32-bit, no fetch */
			
 
				++				emit_sext(ctx, MIPS_R_T4, src);
			
 
				++				emit_atomic_r(ctx, dst, MIPS_R_T4, off, imm);
			
 
				++			}
			
 
				++			break;
			
 
				++		default:
			
 
				++			goto notyet;
			
 
				++		}
			
 
				++		break;
			
 
				++	/* PC += off if dst == src */
			
 
				++	/* PC += off if dst != src */
			
 
				++	/* PC += off if dst & src */
			
 
				++	/* PC += off if dst > src */
			
 
				++	/* PC += off if dst >= src */
			
 
				++	/* PC += off if dst < src */
			
 
				++	/* PC += off if dst <= src */
			
 
				++	/* PC += off if dst > src (signed) */
			
 
				++	/* PC += off if dst >= src (signed) */
			
 
				++	/* PC += off if dst < src (signed) */
			
 
				++	/* PC += off if dst <= src (signed) */
			
 
				++	case BPF_JMP32 | BPF_JEQ | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JNE | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSET | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JGT | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JGE | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JLT | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JLE | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSGT | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSGE | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSLT | BPF_X:
			
 
				++	case BPF_JMP32 | BPF_JSLE | BPF_X:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
			
 
				++		emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
			
 
				++		emit_sext(ctx, MIPS_R_T5, src); /* Sign-extended src */
			
 
				++		emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
			
 
				++		if (finish_jmp(ctx, jmp, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* PC += off if dst == imm */
			
 
				++	/* PC += off if dst != imm */
			
 
				++	/* PC += off if dst & imm */
			
 
				++	/* PC += off if dst > imm */
			
 
				++	/* PC += off if dst >= imm */
			
 
				++	/* PC += off if dst < imm */
			
 
				++	/* PC += off if dst <= imm */
			
 
				++	/* PC += off if dst > imm (signed) */
			
 
				++	/* PC += off if dst >= imm (signed) */
			
 
				++	/* PC += off if dst < imm (signed) */
			
 
				++	/* PC += off if dst <= imm (signed) */
			
 
				++	case BPF_JMP32 | BPF_JEQ | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JNE | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSET | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JGT | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JGE | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JLT | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JLE | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSGT | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSGE | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSLT | BPF_K:
			
 
				++	case BPF_JMP32 | BPF_JSLE | BPF_K:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
			
 
				++		emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */
			
 
				++		if (valid_jmp_i(jmp, imm)) {
			
 
				++			emit_jmp_i(ctx, MIPS_R_T4, imm, rel, jmp);
			
 
				++		} else {
			
 
				++			/* Move large immediate to register, sign-extended */
			
 
				++			emit_mov_i(ctx, MIPS_R_T5, imm);
			
 
				++			emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp);
			
 
				++		}
			
 
				++		if (finish_jmp(ctx, jmp, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* PC += off if dst == src */
			
 
				++	/* PC += off if dst != src */
			
 
				++	/* PC += off if dst & src */
			
 
				++	/* PC += off if dst > src */
			
 
				++	/* PC += off if dst >= src */
			
 
				++	/* PC += off if dst < src */
			
 
				++	/* PC += off if dst <= src */
			
 
				++	/* PC += off if dst > src (signed) */
			
 
				++	/* PC += off if dst >= src (signed) */
			
 
				++	/* PC += off if dst < src (signed) */
			
 
				++	/* PC += off if dst <= src (signed) */
			
 
				++	case BPF_JMP | BPF_JEQ | BPF_X:
			
 
				++	case BPF_JMP | BPF_JNE | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSET | BPF_X:
			
 
				++	case BPF_JMP | BPF_JGT | BPF_X:
			
 
				++	case BPF_JMP | BPF_JGE | BPF_X:
			
 
				++	case BPF_JMP | BPF_JLT | BPF_X:
			
 
				++	case BPF_JMP | BPF_JLE | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSGT | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSGE | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSLT | BPF_X:
			
 
				++	case BPF_JMP | BPF_JSLE | BPF_X:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
			
 
				++		emit_jmp_r(ctx, dst, src, rel, jmp);
			
 
				++		if (finish_jmp(ctx, jmp, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* PC += off if dst == imm */
			
 
				++	/* PC += off if dst != imm */
			
 
				++	/* PC += off if dst & imm */
			
 
				++	/* PC += off if dst > imm */
			
 
				++	/* PC += off if dst >= imm */
			
 
				++	/* PC += off if dst < imm */
			
 
				++	/* PC += off if dst <= imm */
			
 
				++	/* PC += off if dst > imm (signed) */
			
 
				++	/* PC += off if dst >= imm (signed) */
			
 
				++	/* PC += off if dst < imm (signed) */
			
 
				++	/* PC += off if dst <= imm (signed) */
			
 
				++	case BPF_JMP | BPF_JEQ | BPF_K:
			
 
				++	case BPF_JMP | BPF_JNE | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSET | BPF_K:
			
 
				++	case BPF_JMP | BPF_JGT | BPF_K:
			
 
				++	case BPF_JMP | BPF_JGE | BPF_K:
			
 
				++	case BPF_JMP | BPF_JLT | BPF_K:
			
 
				++	case BPF_JMP | BPF_JLE | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSGT | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSGE | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSLT | BPF_K:
			
 
				++	case BPF_JMP | BPF_JSLE | BPF_K:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
			
 
				++		if (valid_jmp_i(jmp, imm)) {
			
 
				++			emit_jmp_i(ctx, dst, imm, rel, jmp);
			
 
				++		} else {
			
 
				++			/* Move large immediate to register */
			
 
				++			emit_mov_i(ctx, MIPS_R_T4, imm);
			
 
				++			emit_jmp_r(ctx, dst, MIPS_R_T4, rel, jmp);
			
 
				++		}
			
 
				++		if (finish_jmp(ctx, jmp, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* PC += off */
			
 
				++	case BPF_JMP | BPF_JA:
			
 
				++		if (off == 0)
			
 
				++			break;
			
 
				++		if (emit_ja(ctx, off) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++	/* Tail call */
			
 
				++	case BPF_JMP | BPF_TAIL_CALL:
			
 
				++		if (emit_tail_call(ctx) < 0)
			
 
				++			goto invalid;
			
 
				++		break;
			
 
				++	/* Function call */
			
 
				++	case BPF_JMP | BPF_CALL:
			
 
				++		if (emit_call(ctx, insn) < 0)
			
 
				++			goto invalid;
			
 
				++		break;
			
 
				++	/* Function return */
			
 
				++	case BPF_JMP | BPF_EXIT:
			
 
				++		/*
			
 
				++		 * Optimization: when last instruction is EXIT
			
 
				++		 * simply continue to epilogue.
			
 
				++		 */
			
 
				++		if (ctx->bpf_index == ctx->program->len - 1)
			
 
				++			break;
			
 
				++		if (emit_exit(ctx) < 0)
			
 
				++			goto toofar;
			
 
				++		break;
			
 
				++
			
 
				++	default:
			
 
				++invalid:
			
 
				++		pr_err_once("unknown opcode %02x\n", code);
			
 
				++		return -EINVAL;
			
 
				++notyet:
			
 
				++		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
			
 
				++		return -EFAULT;
			
 
				++toofar:
			
 
				++		pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
			
 
				++			     ctx->bpf_index, code);
			
 
				++		return -E2BIG;
			
 
				++	}
			
 
				++	return 0;
			
 
				++}
			
--- a/target/linux/generic/backport-6.1/050-v5.16-04-mips-bpf-Add-JIT-workarounds-for-CPU-errata.patch
+++ b/target/linux/generic/backport-6.1/050-v5.16-04-mips-bpf-Add-JIT-workarounds-for-CPU-errata.patch
@@ -0,0 +1,120 @@
 
				+From: Johan Almbladh <[email protected]>
			
 
				+Date: Tue, 5 Oct 2021 18:54:06 +0200
			
 
				+Subject: [PATCH] mips: bpf: Add JIT workarounds for CPU errata
			
 
				+
			
 
				+This patch adds workarounds for the following CPU errata to the MIPS
			
 
				+eBPF JIT, if enabled in the kernel configuration.
			
 
				+
			
 
				+  - R10000 ll/sc weak ordering
			
 
				+  - Loongson-3 ll/sc weak ordering
			
 
				+  - Loongson-2F jump hang
			
 
				+
			
 
				+The Loongson-2F nop errata is implemented in uasm, which the JIT uses,
			
 
				+so no additional mitigations are needed for that.
			
 
				+
			
 
				+Signed-off-by: Johan Almbladh <[email protected]>
			
 
				+Reviewed-by: Jiaxun Yang <[email protected]>
			
 
				+---
			
 
				+
			
 
				+--- a/arch/mips/net/bpf_jit_comp.c
			
 
				++++ b/arch/mips/net/bpf_jit_comp.c
			
 
				+@@ -404,6 +404,7 @@ void emit_alu_r(struct jit_context *ctx,
			
 
				+ /* Atomic read-modify-write (32-bit) */
			
 
				+ void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code)
			
 
				+ {
			
 
				++	LLSC_sync(ctx);
			
 
				+ 	emit(ctx, ll, MIPS_R_T9, off, dst);
			
 
				+ 	switch (code) {
			
 
				+ 	case BPF_ADD:
			
 
				+@@ -420,18 +421,19 @@ void emit_atomic_r(struct jit_context *c
			
 
				+ 		break;
			
 
				+ 	}
			
 
				+ 	emit(ctx, sc, MIPS_R_T8, off, dst);
			
 
				+-	emit(ctx, beqz, MIPS_R_T8, -16);
			
 
				++	emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset);
			
 
				+ 	emit(ctx, nop); /* Delay slot */
			
 
				+ }
			
 
				+ 
			
 
				+ /* Atomic compare-and-exchange (32-bit) */
			
 
				+ void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off)
			
 
				+ {
			
 
				++	LLSC_sync(ctx);
			
 
				+ 	emit(ctx, ll, MIPS_R_T9, off, dst);
			
 
				+ 	emit(ctx, bne, MIPS_R_T9, res, 12);
			
 
				+ 	emit(ctx, move, MIPS_R_T8, src);     /* Delay slot */
			
 
				+ 	emit(ctx, sc, MIPS_R_T8, off, dst);
			
 
				+-	emit(ctx, beqz, MIPS_R_T8, -20);
			
 
				++	emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset);
			
 
				+ 	emit(ctx, move, res, MIPS_R_T9);     /* Delay slot */
			
 
				+ 	clobber_reg(ctx, res);
			
 
				+ }
			
 
				+--- a/arch/mips/net/bpf_jit_comp.h
			
 
				++++ b/arch/mips/net/bpf_jit_comp.h
			
 
				+@@ -87,7 +87,7 @@ struct jit_context {
			
 
				+ };
			
 
				+ 
			
 
				+ /* Emit the instruction if the JIT memory space has been allocated */
			
 
				+-#define emit(ctx, func, ...)					\
			
 
				++#define __emit(ctx, func, ...)					\
			
 
				+ do {								\
			
 
				+ 	if ((ctx)->target != NULL) {				\
			
 
				+ 		u32 *p = &(ctx)->target[ctx->jit_index];	\
			
 
				+@@ -95,6 +95,30 @@ do {								\
			
 
				+ 	}							\
			
 
				+ 	(ctx)->jit_index++;					\
			
 
				+ } while (0)
			
 
				++#define emit(...) __emit(__VA_ARGS__)
			
 
				++
			
 
				++/* Workaround for R10000 ll/sc errata */
			
 
				++#ifdef CONFIG_WAR_R10000
			
 
				++#define LLSC_beqz	beqzl
			
 
				++#else
			
 
				++#define LLSC_beqz	beqz
			
 
				++#endif
			
 
				++
			
 
				++/* Workaround for Loongson-3 ll/sc errata */
			
 
				++#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
			
 
				++#define LLSC_sync(ctx)	emit(ctx, sync, 0)
			
 
				++#define LLSC_offset	4
			
 
				++#else
			
 
				++#define LLSC_sync(ctx)
			
 
				++#define LLSC_offset	0
			
 
				++#endif
			
 
				++
			
 
				++/* Workaround for Loongson-2F jump errata */
			
 
				++#ifdef CONFIG_CPU_JUMP_WORKAROUNDS
			
 
				++#define JALR_MASK	0xffffffffcfffffffULL
			
 
				++#else
			
 
				++#define JALR_MASK	(~0ULL)
			
 
				++#endif
			
 
				+ 
			
 
				+ /*
			
 
				+  * Mark a BPF register as accessed, it needs to be
			
 
				+--- a/arch/mips/net/bpf_jit_comp64.c
			
 
				++++ b/arch/mips/net/bpf_jit_comp64.c
			
 
				+@@ -375,6 +375,7 @@ static void emit_atomic_r64(struct jit_c
			
 
				+ 	u8 t1 = MIPS_R_T6;
			
 
				+ 	u8 t2 = MIPS_R_T7;
			
 
				+ 
			
 
				++	LLSC_sync(ctx);
			
 
				+ 	emit(ctx, lld, t1, off, dst);
			
 
				+ 	switch (code) {
			
 
				+ 	case BPF_ADD:
			
 
				+@@ -391,7 +392,7 @@ static void emit_atomic_r64(struct jit_c
			
 
				+ 		break;
			
 
				+ 	}
			
 
				+ 	emit(ctx, scd, t2, off, dst);
			
 
				+-	emit(ctx, beqz, t2, -16);
			
 
				++	emit(ctx, LLSC_beqz, t2, -16 - LLSC_offset);
			
 
				+ 	emit(ctx, nop); /* Delay slot */
			
 
				+ }
			
 
				+ 
			
 
				+@@ -414,7 +415,7 @@ static int emit_call(struct jit_context
			
 
				+ 	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0);
			
 
				+ 
			
 
				+ 	/* Emit function call */
			
 
				+-	emit_mov_i64(ctx, tmp, addr);
			
 
				++	emit_mov_i64(ctx, tmp, addr & JALR_MASK);
			
 
				+ 	emit(ctx, jalr, MIPS_R_RA, tmp);
			
 
				+ 	emit(ctx, nop); /* Delay slot */
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/050-v5.16-05-mips-bpf-Enable-eBPF-JITs.patch
+++ b/target/linux/generic/backport-6.1/050-v5.16-05-mips-bpf-Enable-eBPF-JITs.patch
@@ -0,0 +1,61 @@
 
				+From: Johan Almbladh <[email protected]>
			
 
				+Date: Tue, 5 Oct 2021 18:54:07 +0200
			
 
				+Subject: [PATCH] mips: bpf: Enable eBPF JITs
			
 
				+
			
 
				+This patch enables the new eBPF JITs for 32-bit and 64-bit MIPS. It also
			
 
				+disables the old cBPF JIT to so cBPF programs are converted to use the
			
 
				+new JIT.
			
 
				+
			
 
				+Workarounds for R4000 CPU errata are not implemented by the JIT, so the
			
 
				+JIT is disabled if any of those workarounds are configured.
			
 
				+
			
 
				+Signed-off-by: Johan Almbladh <[email protected]>
			
 
				+---
			
 
				+
			
 
				+--- a/MAINTAINERS
			
 
				++++ b/MAINTAINERS
			
 
				+@@ -3431,6 +3431,7 @@ S:	Supported
			
 
				+ F:	arch/arm64/net/
			
 
				+ 
			
 
				+ BPF JIT for MIPS (32-BIT AND 64-BIT)
			
 
				++M:	Johan Almbladh <[email protected]>
			
 
				+ M:	Paul Burton <[email protected]>
			
 
				+ L:	[email protected]
			
 
				+ L:	[email protected]
			
 
				+--- a/arch/mips/Kconfig
			
 
				++++ b/arch/mips/Kconfig
			
 
				+@@ -57,7 +57,6 @@ config MIPS
			
 
				+ 	select HAVE_ARCH_TRACEHOOK
			
 
				+ 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
			
 
				+ 	select HAVE_ASM_MODVERSIONS
			
 
				+-	select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS
			
 
				+ 	select HAVE_CONTEXT_TRACKING
			
 
				+ 	select HAVE_TIF_NOHZ
			
 
				+ 	select HAVE_C_RECORDMCOUNT
			
 
				+@@ -65,7 +64,10 @@ config MIPS
			
 
				+ 	select HAVE_DEBUG_STACKOVERFLOW
			
 
				+ 	select HAVE_DMA_CONTIGUOUS
			
 
				+ 	select HAVE_DYNAMIC_FTRACE
			
 
				+-	select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
			
 
				++	select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
			
 
				++				!CPU_DADDI_WORKAROUNDS && \
			
 
				++				!CPU_R4000_WORKAROUNDS && \
			
 
				++				!CPU_R4400_WORKAROUNDS
			
 
				+ 	select HAVE_EXIT_THREAD
			
 
				+ 	select HAVE_FAST_GUP
			
 
				+ 	select HAVE_FTRACE_MCOUNT_RECORD
			
 
				+--- a/arch/mips/net/Makefile
			
 
				++++ b/arch/mips/net/Makefile
			
 
				+@@ -2,9 +2,10 @@
			
 
				+ # MIPS networking code
			
 
				+ 
			
 
				+ obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
			
 
				++obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o
			
 
				+ 
			
 
				+ ifeq ($(CONFIG_32BIT),y)
			
 
				+-        obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o bpf_jit_comp32.o
			
 
				++        obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp32.o
			
 
				+ else
			
 
				+-        obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
			
 
				++        obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp64.o
			
 
				+ endif
			
--- a/target/linux/generic/backport-6.1/050-v5.16-06-mips-bpf-Remove-old-BPF-JIT-implementations.patch
+++ b/target/linux/generic/backport-6.1/050-v5.16-06-mips-bpf-Remove-old-BPF-JIT-implementations.patch
@@ -0,0 +1,387 @@
 
				+From: Johan Almbladh <[email protected]>
			
 
				+Date: Tue, 5 Oct 2021 18:54:08 +0200
			
 
				+Subject: [PATCH] mips: bpf: Remove old BPF JIT implementations
			
 
				+
			
 
				+This patch removes the old 32-bit cBPF and 64-bit eBPF JIT implementations.
			
 
				+They are replaced by a new eBPF implementation that supports both 32-bit
			
 
				+and 64-bit MIPS CPUs.
			
 
				+
			
 
				+Signed-off-by: Johan Almbladh <[email protected]>
			
 
				+---
			
 
				+ delete mode 100644 arch/mips/net/bpf_jit.c
			
 
				+ delete mode 100644 arch/mips/net/bpf_jit.h
			
 
				+ delete mode 100644 arch/mips/net/bpf_jit_asm.S
			
 
				+ delete mode 100644 arch/mips/net/ebpf_jit.c
			
 
				+
			
 
				+--- a/arch/mips/net/bpf_jit.h
			
 
				++++ /dev/null
			
 
				+@@ -1,81 +0,0 @@
			
 
				+-/* SPDX-License-Identifier: GPL-2.0-only */
			
 
				+-/*
			
 
				+- * Just-In-Time compiler for BPF filters on MIPS
			
 
				+- *
			
 
				+- * Copyright (c) 2014 Imagination Technologies Ltd.
			
 
				+- * Author: Markos Chandras <[email protected]>
			
 
				+- */
			
 
				+-
			
 
				+-#ifndef BPF_JIT_MIPS_OP_H
			
 
				+-#define BPF_JIT_MIPS_OP_H
			
 
				+-
			
 
				+-/* Registers used by JIT */
			
 
				+-#define MIPS_R_ZERO	0
			
 
				+-#define MIPS_R_V0	2
			
 
				+-#define MIPS_R_A0	4
			
 
				+-#define MIPS_R_A1	5
			
 
				+-#define MIPS_R_T4	12
			
 
				+-#define MIPS_R_T5	13
			
 
				+-#define MIPS_R_T6	14
			
 
				+-#define MIPS_R_T7	15
			
 
				+-#define MIPS_R_S0	16
			
 
				+-#define MIPS_R_S1	17
			
 
				+-#define MIPS_R_S2	18
			
 
				+-#define MIPS_R_S3	19
			
 
				+-#define MIPS_R_S4	20
			
 
				+-#define MIPS_R_S5	21
			
 
				+-#define MIPS_R_S6	22
			
 
				+-#define MIPS_R_S7	23
			
 
				+-#define MIPS_R_SP	29
			
 
				+-#define MIPS_R_RA	31
			
 
				+-
			
 
				+-/* Conditional codes */
			
 
				+-#define MIPS_COND_EQ	0x1
			
 
				+-#define MIPS_COND_GE	(0x1 << 1)
			
 
				+-#define MIPS_COND_GT	(0x1 << 2)
			
 
				+-#define MIPS_COND_NE	(0x1 << 3)
			
 
				+-#define MIPS_COND_ALL	(0x1 << 4)
			
 
				+-/* Conditionals on X register or K immediate */
			
 
				+-#define MIPS_COND_X	(0x1 << 5)
			
 
				+-#define MIPS_COND_K	(0x1 << 6)
			
 
				+-
			
 
				+-#define r_ret	MIPS_R_V0
			
 
				+-
			
 
				+-/*
			
 
				+- * Use 2 scratch registers to avoid pipeline interlocks.
			
 
				+- * There is no overhead during epilogue and prologue since
			
 
				+- * any of the $s0-$s6 registers will only be preserved if
			
 
				+- * they are going to actually be used.
			
 
				+- */
			
 
				+-#define r_skb_hl	MIPS_R_S0 /* skb header length */
			
 
				+-#define r_skb_data	MIPS_R_S1 /* skb actual data */
			
 
				+-#define r_off		MIPS_R_S2
			
 
				+-#define r_A		MIPS_R_S3
			
 
				+-#define r_X		MIPS_R_S4
			
 
				+-#define r_skb		MIPS_R_S5
			
 
				+-#define r_M		MIPS_R_S6
			
 
				+-#define r_skb_len	MIPS_R_S7
			
 
				+-#define r_s0		MIPS_R_T4 /* scratch reg 1 */
			
 
				+-#define r_s1		MIPS_R_T5 /* scratch reg 2 */
			
 
				+-#define r_tmp_imm	MIPS_R_T6 /* No need to preserve this */
			
 
				+-#define r_tmp		MIPS_R_T7 /* No need to preserve this */
			
 
				+-#define r_zero		MIPS_R_ZERO
			
 
				+-#define r_sp		MIPS_R_SP
			
 
				+-#define r_ra		MIPS_R_RA
			
 
				+-
			
 
				+-#ifndef __ASSEMBLY__
			
 
				+-
			
 
				+-/* Declare ASM helpers */
			
 
				+-
			
 
				+-#define DECLARE_LOAD_FUNC(func) \
			
 
				+-	extern u8 func(unsigned long *skb, int offset); \
			
 
				+-	extern u8 func##_negative(unsigned long *skb, int offset); \
			
 
				+-	extern u8 func##_positive(unsigned long *skb, int offset)
			
 
				+-
			
 
				+-DECLARE_LOAD_FUNC(sk_load_word);
			
 
				+-DECLARE_LOAD_FUNC(sk_load_half);
			
 
				+-DECLARE_LOAD_FUNC(sk_load_byte);
			
 
				+-
			
 
				+-#endif
			
 
				+-
			
 
				+-#endif /* BPF_JIT_MIPS_OP_H */
			
 
				+--- a/arch/mips/net/bpf_jit_asm.S
			
 
				++++ /dev/null
			
 
				+@@ -1,285 +0,0 @@
			
 
				+-/*
			
 
				+- * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF
			
 
				+- * compiler.
			
 
				+- *
			
 
				+- * Copyright (C) 2015 Imagination Technologies Ltd.
			
 
				+- * Author: Markos Chandras <[email protected]>
			
 
				+- *
			
 
				+- * This program is free software; you can redistribute it and/or modify it
			
 
				+- * under the terms of the GNU General Public License as published by the
			
 
				+- * Free Software Foundation; version 2 of the License.
			
 
				+- */
			
 
				+-
			
 
				+-#include <asm/asm.h>
			
 
				+-#include <asm/isa-rev.h>
			
 
				+-#include <asm/regdef.h>
			
 
				+-#include "bpf_jit.h"
			
 
				+-
			
 
				+-/* ABI
			
 
				+- *
			
 
				+- * r_skb_hl	skb header length
			
 
				+- * r_skb_data	skb data
			
 
				+- * r_off(a1)	offset register
			
 
				+- * r_A		BPF register A
			
 
				+- * r_X		PF register X
			
 
				+- * r_skb(a0)	*skb
			
 
				+- * r_M		*scratch memory
			
 
				+- * r_skb_le	skb length
			
 
				+- * r_s0		Scratch register 0
			
 
				+- * r_s1		Scratch register 1
			
 
				+- *
			
 
				+- * On entry:
			
 
				+- * a0: *skb
			
 
				+- * a1: offset (imm or imm + X)
			
 
				+- *
			
 
				+- * All non-BPF-ABI registers are free for use. On return, we only
			
 
				+- * care about r_ret. The BPF-ABI registers are assumed to remain
			
 
				+- * unmodified during the entire filter operation.
			
 
				+- */
			
 
				+-
			
 
				+-#define skb	a0
			
 
				+-#define offset	a1
			
 
				+-#define SKF_LL_OFF  (-0x200000) /* Can't include linux/filter.h in assembly */
			
 
				+-
			
 
				+-	/* We know better :) so prevent assembler reordering etc */
			
 
				+-	.set 	noreorder
			
 
				+-
			
 
				+-#define is_offset_negative(TYPE)				\
			
 
				+-	/* If offset is negative we have more work to do */	\
			
 
				+-	slti	t0, offset, 0;					\
			
 
				+-	bgtz	t0, bpf_slow_path_##TYPE##_neg;			\
			
 
				+-	/* Be careful what follows in DS. */
			
 
				+-
			
 
				+-#define is_offset_in_header(SIZE, TYPE)				\
			
 
				+-	/* Reading from header? */				\
			
 
				+-	addiu	$r_s0, $r_skb_hl, -SIZE;			\
			
 
				+-	slt	t0, $r_s0, offset;				\
			
 
				+-	bgtz	t0, bpf_slow_path_##TYPE;			\
			
 
				+-
			
 
				+-LEAF(sk_load_word)
			
 
				+-	is_offset_negative(word)
			
 
				+-FEXPORT(sk_load_word_positive)
			
 
				+-	is_offset_in_header(4, word)
			
 
				+-	/* Offset within header boundaries */
			
 
				+-	PTR_ADDU t1, $r_skb_data, offset
			
 
				+-	.set	reorder
			
 
				+-	lw	$r_A, 0(t1)
			
 
				+-	.set	noreorder
			
 
				+-#ifdef CONFIG_CPU_LITTLE_ENDIAN
			
 
				+-# if MIPS_ISA_REV >= 2
			
 
				+-	wsbh	t0, $r_A
			
 
				+-	rotr	$r_A, t0, 16
			
 
				+-# else
			
 
				+-	sll	t0, $r_A, 24
			
 
				+-	srl	t1, $r_A, 24
			
 
				+-	srl	t2, $r_A, 8
			
 
				+-	or	t0, t0, t1
			
 
				+-	andi	t2, t2, 0xff00
			
 
				+-	andi	t1, $r_A, 0xff00
			
 
				+-	or	t0, t0, t2
			
 
				+-	sll	t1, t1, 8
			
 
				+-	or	$r_A, t0, t1
			
 
				+-# endif
			
 
				+-#endif
			
 
				+-	jr	$r_ra
			
 
				+-	 move	$r_ret, zero
			
 
				+-	END(sk_load_word)
			
 
				+-
			
 
				+-LEAF(sk_load_half)
			
 
				+-	is_offset_negative(half)
			
 
				+-FEXPORT(sk_load_half_positive)
			
 
				+-	is_offset_in_header(2, half)
			
 
				+-	/* Offset within header boundaries */
			
 
				+-	PTR_ADDU t1, $r_skb_data, offset
			
 
				+-	lhu	$r_A, 0(t1)
			
 
				+-#ifdef CONFIG_CPU_LITTLE_ENDIAN
			
 
				+-# if MIPS_ISA_REV >= 2
			
 
				+-	wsbh	$r_A, $r_A
			
 
				+-# else
			
 
				+-	sll	t0, $r_A, 8
			
 
				+-	srl	t1, $r_A, 8
			
 
				+-	andi	t0, t0, 0xff00
			
 
				+-	or	$r_A, t0, t1
			
 
				+-# endif
			
 
				+-#endif
			
 
				+-	jr	$r_ra
			
 
				+-	 move	$r_ret, zero
			
 
				+-	END(sk_load_half)
			
 
				+-
			
 
				+-LEAF(sk_load_byte)
			
 
				+-	is_offset_negative(byte)
			
 
				+-FEXPORT(sk_load_byte_positive)
			
 
				+-	is_offset_in_header(1, byte)
			
 
				+-	/* Offset within header boundaries */
			
 
				+-	PTR_ADDU t1, $r_skb_data, offset
			
 
				+-	lbu	$r_A, 0(t1)
			
 
				+-	jr	$r_ra
			
 
				+-	 move	$r_ret, zero
			
 
				+-	END(sk_load_byte)
			
 
				+-
			
 
				+-/*
			
 
				+- * call skb_copy_bits:
			
 
				+- * (prototype in linux/skbuff.h)
			
 
				+- *
			
 
				+- * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len)
			
 
				+- *
			
 
				+- * o32 mandates we leave 4 spaces for argument registers in case
			
 
				+- * the callee needs to use them. Even though we don't care about
			
 
				+- * the argument registers ourselves, we need to allocate that space
			
 
				+- * to remain ABI compliant since the callee may want to use that space.
			
 
				+- * We also allocate 2 more spaces for $r_ra and our return register (*to).
			
 
				+- *
			
 
				+- * n64 is a bit different. The *caller* will allocate the space to preserve
			
 
				+- * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no
			
 
				+- * good reason but it does not matter that much really.
			
 
				+- *
			
 
				+- * (void *to) is returned in r_s0
			
 
				+- *
			
 
				+- */
			
 
				+-#ifdef CONFIG_CPU_LITTLE_ENDIAN
			
 
				+-#define DS_OFFSET(SIZE) (4 * SZREG)
			
 
				+-#else
			
 
				+-#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE))
			
 
				+-#endif
			
 
				+-#define bpf_slow_path_common(SIZE)				\
			
 
				+-	/* Quick check. Are we within reasonable boundaries? */ \
			
 
				+-	LONG_ADDIU	$r_s1, $r_skb_len, -SIZE;		\
			
 
				+-	sltu		$r_s0, offset, $r_s1;			\
			
 
				+-	beqz		$r_s0, fault;				\
			
 
				+-	/* Load 4th argument in DS */				\
			
 
				+-	 LONG_ADDIU	a3, zero, SIZE;				\
			
 
				+-	PTR_ADDIU	$r_sp, $r_sp, -(6 * SZREG);		\
			
 
				+-	PTR_LA		t0, skb_copy_bits;			\
			
 
				+-	PTR_S		$r_ra, (5 * SZREG)($r_sp);		\
			
 
				+-	/* Assign low slot to a2 */				\
			
 
				+-	PTR_ADDIU	a2, $r_sp, DS_OFFSET(SIZE);		\
			
 
				+-	jalr		t0;					\
			
 
				+-	/* Reset our destination slot (DS but it's ok) */	\
			
 
				+-	 INT_S		zero, (4 * SZREG)($r_sp);		\
			
 
				+-	/*							\
			
 
				+-	 * skb_copy_bits returns 0 on success and -EFAULT	\
			
 
				+-	 * on error. Our data live in a2. Do not bother with	\
			
 
				+-	 * our data if an error has been returned.		\
			
 
				+-	 */							\
			
 
				+-	/* Restore our frame */					\
			
 
				+-	PTR_L		$r_ra, (5 * SZREG)($r_sp);		\
			
 
				+-	INT_L		$r_s0, (4 * SZREG)($r_sp);		\
			
 
				+-	bltz		v0, fault;				\
			
 
				+-	 PTR_ADDIU	$r_sp, $r_sp, 6 * SZREG;		\
			
 
				+-	move		$r_ret, zero;				\
			
 
				+-
			
 
				+-NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
			
 
				+-	bpf_slow_path_common(4)
			
 
				+-#ifdef CONFIG_CPU_LITTLE_ENDIAN
			
 
				+-# if MIPS_ISA_REV >= 2
			
 
				+-	wsbh	t0, $r_s0
			
 
				+-	jr	$r_ra
			
 
				+-	 rotr	$r_A, t0, 16
			
 
				+-# else
			
 
				+-	sll	t0, $r_s0, 24
			
 
				+-	srl	t1, $r_s0, 24
			
 
				+-	srl	t2, $r_s0, 8
			
 
				+-	or	t0, t0, t1
			
 
				+-	andi	t2, t2, 0xff00
			
 
				+-	andi	t1, $r_s0, 0xff00
			
 
				+-	or	t0, t0, t2
			
 
				+-	sll	t1, t1, 8
			
 
				+-	jr	$r_ra
			
 
				+-	 or	$r_A, t0, t1
			
 
				+-# endif
			
 
				+-#else
			
 
				+-	jr	$r_ra
			
 
				+-	 move	$r_A, $r_s0
			
 
				+-#endif
			
 
				+-
			
 
				+-	END(bpf_slow_path_word)
			
 
				+-
			
 
				+-NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
			
 
				+-	bpf_slow_path_common(2)
			
 
				+-#ifdef CONFIG_CPU_LITTLE_ENDIAN
			
 
				+-# if MIPS_ISA_REV >= 2
			
 
				+-	jr	$r_ra
			
 
				+-	 wsbh	$r_A, $r_s0
			
 
				+-# else
			
 
				+-	sll	t0, $r_s0, 8
			
 
				+-	andi	t1, $r_s0, 0xff00
			
 
				+-	andi	t0, t0, 0xff00
			
 
				+-	srl	t1, t1, 8
			
 
				+-	jr	$r_ra
			
 
				+-	 or	$r_A, t0, t1
			
 
				+-# endif
			
 
				+-#else
			
 
				+-	jr	$r_ra
			
 
				+-	 move	$r_A, $r_s0
			
 
				+-#endif
			
 
				+-
			
 
				+-	END(bpf_slow_path_half)
			
 
				+-
			
 
				+-NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp)
			
 
				+-	bpf_slow_path_common(1)
			
 
				+-	jr	$r_ra
			
 
				+-	 move	$r_A, $r_s0
			
 
				+-
			
 
				+-	END(bpf_slow_path_byte)
			
 
				+-
			
 
				+-/*
			
 
				+- * Negative entry points
			
 
				+- */
			
 
				+-	.macro bpf_is_end_of_data
			
 
				+-	li	t0, SKF_LL_OFF
			
 
				+-	/* Reading link layer data? */
			
 
				+-	slt	t1, offset, t0
			
 
				+-	bgtz	t1, fault
			
 
				+-	/* Be careful what follows in DS. */
			
 
				+-	.endm
			
 
				+-/*
			
 
				+- * call skb_copy_bits:
			
 
				+- * (prototype in linux/filter.h)
			
 
				+- *
			
 
				+- * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
			
 
				+- *                                            int k, unsigned int size)
			
 
				+- *
			
 
				+- * see above (bpf_slow_path_common) for ABI restrictions
			
 
				+- */
			
 
				+-#define bpf_negative_common(SIZE)					\
			
 
				+-	PTR_ADDIU	$r_sp, $r_sp, -(6 * SZREG);			\
			
 
				+-	PTR_LA		t0, bpf_internal_load_pointer_neg_helper;	\
			
 
				+-	PTR_S		$r_ra, (5 * SZREG)($r_sp);			\
			
 
				+-	jalr		t0;						\
			
 
				+-	 li		a2, SIZE;					\
			
 
				+-	PTR_L		$r_ra, (5 * SZREG)($r_sp);			\
			
 
				+-	/* Check return pointer */					\
			
 
				+-	beqz		v0, fault;					\
			
 
				+-	 PTR_ADDIU	$r_sp, $r_sp, 6 * SZREG;			\
			
 
				+-	/* Preserve our pointer */					\
			
 
				+-	move		$r_s0, v0;					\
			
 
				+-	/* Set return value */						\
			
 
				+-	move		$r_ret, zero;					\
			
 
				+-
			
 
				+-bpf_slow_path_word_neg:
			
 
				+-	bpf_is_end_of_data
			
 
				+-NESTED(sk_load_word_negative, (6 * SZREG), $r_sp)
			
 
				+-	bpf_negative_common(4)
			
 
				+-	jr	$r_ra
			
 
				+-	 lw	$r_A, 0($r_s0)
			
 
				+-	END(sk_load_word_negative)
			
 
				+-
			
 
				+-bpf_slow_path_half_neg:
			
 
				+-	bpf_is_end_of_data
			
 
				+-NESTED(sk_load_half_negative, (6 * SZREG), $r_sp)
			
 
				+-	bpf_negative_common(2)
			
 
				+-	jr	$r_ra
			
 
				+-	 lhu	$r_A, 0($r_s0)
			
 
				+-	END(sk_load_half_negative)
			
 
				+-
			
 
				+-bpf_slow_path_byte_neg:
			
 
				+-	bpf_is_end_of_data
			
 
				+-NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp)
			
 
				+-	bpf_negative_common(1)
			
 
				+-	jr	$r_ra
			
 
				+-	 lbu	$r_A, 0($r_s0)
			
 
				+-	END(sk_load_byte_negative)
			
 
				+-
			
 
				+-fault:
			
 
				+-	jr	$r_ra
			
 
				+-	 addiu $r_ret, zero, 1
			
--- a/target/linux/generic/backport-6.1/080-v5.17-clk-gate-Add-devm_clk_hw_register_gate.patch
+++ b/target/linux/generic/backport-6.1/080-v5.17-clk-gate-Add-devm_clk_hw_register_gate.patch
@@ -0,0 +1,105 @@
 
				+From 815f0e738a8d5663a02350e2580706829144a722 Mon Sep 17 00:00:00 2001
			
 
				+From: Horatiu Vultur <[email protected]>
			
 
				+Date: Wed, 3 Nov 2021 09:50:59 +0100
			
 
				+Subject: [PATCH] clk: gate: Add devm_clk_hw_register_gate()
			
 
				+
			
 
				+Add devm_clk_hw_register_gate() - devres-managed version of
			
 
				+clk_hw_register_gate()
			
 
				+
			
 
				+Suggested-by: Stephen Boyd <[email protected]>
			
 
				+Signed-off-by: Horatiu Vultur <[email protected]>
			
 
				+Acked-by: Nicolas Ferre <[email protected]>
			
 
				+Signed-off-by: Nicolas Ferre <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+---
			
 
				+ drivers/clk/clk-gate.c       | 35 +++++++++++++++++++++++++++++++++++
			
 
				+ include/linux/clk-provider.h | 23 +++++++++++++++++++++++
			
 
				+ 2 files changed, 58 insertions(+)
			
 
				+
			
 
				+--- a/drivers/clk/clk-gate.c
			
 
				++++ b/drivers/clk/clk-gate.c
			
 
				+@@ -7,6 +7,7 @@
			
 
				+  */
			
 
				+ 
			
 
				+ #include <linux/clk-provider.h>
			
 
				++#include <linux/device.h>
			
 
				+ #include <linux/module.h>
			
 
				+ #include <linux/slab.h>
			
 
				+ #include <linux/io.h>
			
 
				+@@ -222,3 +223,37 @@ void clk_hw_unregister_gate(struct clk_h
			
 
				+ 	kfree(gate);
			
 
				+ }
			
 
				+ EXPORT_SYMBOL_GPL(clk_hw_unregister_gate);
			
 
				++
			
 
				++static void devm_clk_hw_release_gate(struct device *dev, void *res)
			
 
				++{
			
 
				++	clk_hw_unregister_gate(*(struct clk_hw **)res);
			
 
				++}
			
 
				++
			
 
				++struct clk_hw *__devm_clk_hw_register_gate(struct device *dev,
			
 
				++		struct device_node *np, const char *name,
			
 
				++		const char *parent_name, const struct clk_hw *parent_hw,
			
 
				++		const struct clk_parent_data *parent_data,
			
 
				++		unsigned long flags,
			
 
				++		void __iomem *reg, u8 bit_idx,
			
 
				++		u8 clk_gate_flags, spinlock_t *lock)
			
 
				++{
			
 
				++	struct clk_hw **ptr, *hw;
			
 
				++
			
 
				++	ptr = devres_alloc(devm_clk_hw_release_gate, sizeof(*ptr), GFP_KERNEL);
			
 
				++	if (!ptr)
			
 
				++		return ERR_PTR(-ENOMEM);
			
 
				++
			
 
				++	hw = __clk_hw_register_gate(dev, np, name, parent_name, parent_hw,
			
 
				++				    parent_data, flags, reg, bit_idx,
			
 
				++				    clk_gate_flags, lock);
			
 
				++
			
 
				++	if (!IS_ERR(hw)) {
			
 
				++		*ptr = hw;
			
 
				++		devres_add(dev, ptr);
			
 
				++	} else {
			
 
				++		devres_free(ptr);
			
 
				++	}
			
 
				++
			
 
				++	return hw;
			
 
				++}
			
 
				++EXPORT_SYMBOL_GPL(__devm_clk_hw_register_gate);
			
 
				+--- a/include/linux/clk-provider.h
			
 
				++++ b/include/linux/clk-provider.h
			
 
				+@@ -490,6 +490,13 @@ struct clk_hw *__clk_hw_register_gate(st
			
 
				+ 		unsigned long flags,
			
 
				+ 		void __iomem *reg, u8 bit_idx,
			
 
				+ 		u8 clk_gate_flags, spinlock_t *lock);
			
 
				++struct clk_hw *__devm_clk_hw_register_gate(struct device *dev,
			
 
				++		struct device_node *np, const char *name,
			
 
				++		const char *parent_name, const struct clk_hw *parent_hw,
			
 
				++		const struct clk_parent_data *parent_data,
			
 
				++		unsigned long flags,
			
 
				++		void __iomem *reg, u8 bit_idx,
			
 
				++		u8 clk_gate_flags, spinlock_t *lock);
			
 
				+ struct clk *clk_register_gate(struct device *dev, const char *name,
			
 
				+ 		const char *parent_name, unsigned long flags,
			
 
				+ 		void __iomem *reg, u8 bit_idx,
			
 
				+@@ -544,6 +551,22 @@ struct clk *clk_register_gate(struct dev
			
 
				+ 	__clk_hw_register_gate((dev), NULL, (name), NULL, NULL, (parent_data), \
			
 
				+ 			       (flags), (reg), (bit_idx),		      \
			
 
				+ 			       (clk_gate_flags), (lock))
			
 
				++/**
			
 
				++ * devm_clk_hw_register_gate - register a gate clock with the clock framework
			
 
				++ * @dev: device that is registering this clock
			
 
				++ * @name: name of this clock
			
 
				++ * @parent_name: name of this clock's parent
			
 
				++ * @flags: framework-specific flags for this clock
			
 
				++ * @reg: register address to control gating of this clock
			
 
				++ * @bit_idx: which bit in the register controls gating of this clock
			
 
				++ * @clk_gate_flags: gate-specific flags for this clock
			
 
				++ * @lock: shared register lock for this clock
			
 
				++ */
			
 
				++#define devm_clk_hw_register_gate(dev, name, parent_name, flags, reg, bit_idx,\
			
 
				++				  clk_gate_flags, lock)			      \
			
 
				++	__devm_clk_hw_register_gate((dev), NULL, (name), (parent_name), NULL, \
			
 
				++			       NULL, (flags), (reg), (bit_idx),		      \
			
 
				++			       (clk_gate_flags), (lock))
			
 
				+ void clk_unregister_gate(struct clk *clk);
			
 
				+ void clk_hw_unregister_gate(struct clk_hw *hw);
			
 
				+ int clk_gate_is_enabled(struct clk_hw *hw);
			
--- a/target/linux/generic/backport-6.1/081-v5.17-regmap-allow-to-define-reg_update_bits-for-no-bus.patch
+++ b/target/linux/generic/backport-6.1/081-v5.17-regmap-allow-to-define-reg_update_bits-for-no-bus.patch
@@ -0,0 +1,52 @@
 
				+From 02d6fdecb9c38de19065f6bed8d5214556fd061d Mon Sep 17 00:00:00 2001
			
 
				+From: Ansuel Smith <[email protected]>
			
 
				+Date: Thu, 4 Nov 2021 16:00:40 +0100
			
 
				+Subject: regmap: allow to define reg_update_bits for no bus configuration
			
 
				+
			
 
				+Some device requires a special handling for reg_update_bits and can't use
			
 
				+the normal regmap read write logic. An example is when locking is
			
 
				+handled by the device and rmw operations requires to do atomic operations.
			
 
				+Allow to declare a dedicated function in regmap_config for
			
 
				+reg_update_bits in no bus configuration.
			
 
				+
			
 
				+Signed-off-by: Ansuel Smith <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Mark Brown <[email protected]>
			
 
				+---
			
 
				+ drivers/base/regmap/regmap.c | 1 +
			
 
				+ include/linux/regmap.h       | 7 +++++++
			
 
				+ 2 files changed, 8 insertions(+)
			
 
				+
			
 
				+--- a/drivers/base/regmap/regmap.c
			
 
				++++ b/drivers/base/regmap/regmap.c
			
 
				+@@ -877,6 +877,7 @@ struct regmap *__regmap_init(struct devi
			
 
				+ 	if (!bus) {
			
 
				+ 		map->reg_read  = config->reg_read;
			
 
				+ 		map->reg_write = config->reg_write;
			
 
				++		map->reg_update_bits = config->reg_update_bits;
			
 
				+ 
			
 
				+ 		map->defer_caching = false;
			
 
				+ 		goto skip_format_initialization;
			
 
				+--- a/include/linux/regmap.h
			
 
				++++ b/include/linux/regmap.h
			
 
				+@@ -290,6 +290,11 @@ typedef void (*regmap_unlock)(void *);
			
 
				+  *		  read operation on a bus such as SPI, I2C, etc. Most of the
			
 
				+  *		  devices do not need this.
			
 
				+  * @reg_write:	  Same as above for writing.
			
 
				++ * @reg_update_bits: Optional callback that if filled will be used to perform
			
 
				++ *		     all the update_bits(rmw) operation. Should only be provided
			
 
				++ *		     if the function require special handling with lock and reg
			
 
				++ *		     handling and the operation cannot be represented as a simple
			
 
				++ *		     update_bits operation on a bus such as SPI, I2C, etc.
			
 
				+  * @fast_io:	  Register IO is fast. Use a spinlock instead of a mutex
			
 
				+  *	     	  to perform locking. This field is ignored if custom lock/unlock
			
 
				+  *	     	  functions are used (see fields lock/unlock of struct regmap_config).
			
 
				+@@ -372,6 +377,8 @@ struct regmap_config {
			
 
				+ 
			
 
				+ 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
			
 
				+ 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
			
 
				++	int (*reg_update_bits)(void *context, unsigned int reg,
			
 
				++			       unsigned int mask, unsigned int val);
			
 
				+ 
			
 
				+ 	bool fast_io;
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/100-v5.18-tty-serial-bcm63xx-use-more-precise-Kconfig-symbol.patch
+++ b/target/linux/generic/backport-6.1/100-v5.18-tty-serial-bcm63xx-use-more-precise-Kconfig-symbol.patch
@@ -0,0 +1,37 @@
 
				+From 0dc0da881b4574d1e04a079ab2ea75da61f5ad2e Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <[email protected]>
			
 
				+Date: Fri, 11 Mar 2022 10:32:33 +0100
			
 
				+Subject: [PATCH] tty: serial: bcm63xx: use more precise Kconfig symbol
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Patches lowering SERIAL_BCM63XX dependencies led to a discussion and
			
 
				+documentation change regarding "depends" usage. Adjust Kconfig entry to
			
 
				+match current guidelines. Make this symbol available for relevant
			
 
				+architectures only.
			
 
				+
			
 
				+Cc: Geert Uytterhoeven <[email protected]>
			
 
				+Reviewed-by: Geert Uytterhoeven <[email protected]>
			
 
				+Acked-by: Florian Fainelli <[email protected]>
			
 
				+Signed-off-by: Rafał Miłecki <[email protected]>
			
 
				+Ref: f35a07f92616 ("tty: serial: bcm63xx: lower driver dependencies")
			
 
				+Ref: 18084e435ff6 ("Documentation/kbuild: Document platform dependency practises")
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Greg Kroah-Hartman <[email protected]>
			
 
				+---
			
 
				+ drivers/tty/serial/Kconfig | 3 ++-
			
 
				+ 1 file changed, 2 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/drivers/tty/serial/Kconfig
			
 
				++++ b/drivers/tty/serial/Kconfig
			
 
				+@@ -1098,7 +1098,8 @@ config SERIAL_TIMBERDALE
			
 
				+ config SERIAL_BCM63XX
			
 
				+ 	tristate "Broadcom BCM63xx/BCM33xx UART support"
			
 
				+ 	select SERIAL_CORE
			
 
				+-	depends on COMMON_CLK
			
 
				++	depends on ARCH_BCM4908 || ARCH_BCM_63XX || BCM63XX || BMIPS_GENERIC || COMPILE_TEST
			
 
				++	default ARCH_BCM4908 || ARCH_BCM_63XX || BCM63XX || BMIPS_GENERIC
			
 
				+ 	help
			
 
				+ 	  This enables the driver for the onchip UART core found on
			
 
				+ 	  the following chipsets:
			
--- a/target/linux/generic/backport-6.1/200-v5.18-tools-resolve_btfids-Build-with-host-flags.patch
+++ b/target/linux/generic/backport-6.1/200-v5.18-tools-resolve_btfids-Build-with-host-flags.patch
@@ -0,0 +1,49 @@
 
				+From cdbc4e3399ed8cdcf234a85f7a2482b622379e82 Mon Sep 17 00:00:00 2001
			
 
				+From: Connor O'Brien <[email protected]>
			
 
				+Date: Wed, 12 Jan 2022 00:25:03 +0000
			
 
				+Subject: [PATCH] tools/resolve_btfids: Build with host flags
			
 
				+
			
 
				+resolve_btfids is built using $(HOSTCC) and $(HOSTLD) but does not
			
 
				+pick up the corresponding flags. As a result, host-specific settings
			
 
				+(such as a sysroot specified via HOSTCFLAGS=--sysroot=..., or a linker
			
 
				+specified via HOSTLDFLAGS=-fuse-ld=...) will not be respected.
			
 
				+
			
 
				+Fix this by setting CFLAGS to KBUILD_HOSTCFLAGS and LDFLAGS to
			
 
				+KBUILD_HOSTLDFLAGS.
			
 
				+
			
 
				+Also pass the cflags through to libbpf via EXTRA_CFLAGS to ensure that
			
 
				+the host libbpf is built with flags consistent with resolve_btfids.
			
 
				+
			
 
				+Signed-off-by: Connor O'Brien <[email protected]>
			
 
				+Signed-off-by: Andrii Nakryiko <[email protected]>
			
 
				+Acked-by: Song Liu <[email protected]>
			
 
				+Link: https://lore.kernel.org/bpf/[email protected]
			
 
				+(cherry picked from commit 0e3a1c902ffb56e9fe4416f0cd382c97b09ecbf6)
			
 
				+Signed-off-by: Stijn Tintel <[email protected]>
			
 
				+---
			
 
				+ tools/bpf/resolve_btfids/Makefile | 6 ++++--
			
 
				+ 1 file changed, 4 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+--- a/tools/bpf/resolve_btfids/Makefile
			
 
				++++ b/tools/bpf/resolve_btfids/Makefile
			
 
				+@@ -23,6 +23,8 @@ CC       = $(HOSTCC)
			
 
				+ LD       = $(HOSTLD)
			
 
				+ ARCH     = $(HOSTARCH)
			
 
				+ RM      ?= rm
			
 
				++CFLAGS  := $(KBUILD_HOSTCFLAGS)
			
 
				++LDFLAGS := $(KBUILD_HOSTLDFLAGS)
			
 
				+ 
			
 
				+ OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
			
 
				+ 
			
 
				+@@ -45,9 +47,9 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/l
			
 
				+ 	$(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
			
 
				+ 
			
 
				+ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
			
 
				+-	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)  OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
			
 
				++	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)  OUTPUT=$(abspath $(dir $@))/ EXTRA_CFLAGS="$(CFLAGS)" $(abspath $@)
			
 
				+ 
			
 
				+-CFLAGS := -g \
			
 
				++CFLAGS += -g \
			
 
				+           -I$(srctree)/tools/include \
			
 
				+           -I$(srctree)/tools/include/uapi \
			
 
				+           -I$(LIBBPF_SRC) \
			
--- a/target/linux/generic/backport-6.1/201-v5.16-scripts-dtc-Update-to-upstream-version-v1.6.1-19-g0a.patch
+++ b/target/linux/generic/backport-6.1/201-v5.16-scripts-dtc-Update-to-upstream-version-v1.6.1-19-g0a.patch
@@ -0,0 +1,997 @@
 
				+From a77725a9a3c5924e2fd4cd5b3557dd92a8e46f87 Mon Sep 17 00:00:00 2001
			
 
				+From: Rob Herring <[email protected]>
			
 
				+Date: Mon, 25 Oct 2021 11:05:45 -0500
			
 
				+Subject: [PATCH 1/1] scripts/dtc: Update to upstream version
			
 
				+ v1.6.1-19-g0a3a9d3449c8
			
 
				+
			
 
				+This adds the following commits from upstream:
			
 
				+
			
 
				+0a3a9d3449c8 checks: Add an interrupt-map check
			
 
				+8fd24744e361 checks: Ensure '#interrupt-cells' only exists in interrupt providers
			
 
				+d8d1a9a77863 checks: Drop interrupt provider '#address-cells' check
			
 
				+52a16fd72824 checks: Make interrupt_provider check dependent on interrupts_extended_is_cell
			
 
				+37fd700685da treesource: Maintain phandle label/path on output
			
 
				+e33ce1d6a8c7 flattree: Use '\n', not ';' to separate asm pseudo-ops
			
 
				+d24cc189dca6 asm: Use assembler macros instead of cpp macros
			
 
				+ff3a30c115ad asm: Use .asciz and .ascii instead of .string
			
 
				+5eb5927d81ee fdtdump: fix -Werror=int-to-pointer-cast
			
 
				+0869f8269161 libfdt: Add ALIGNMENT error string
			
 
				+69595a167f06 checks: Fix bus-range check
			
 
				+72d09e2682a4 Makefile: add -Wsign-compare to warning options
			
 
				+b587787ef388 checks: Fix signedness comparisons warnings
			
 
				+69bed6c2418f dtc: Wrap phandle validity check
			
 
				+910221185560 fdtget: Fix signedness comparisons warnings
			
 
				+d966f08fcd21 tests: Fix signedness comparisons warnings
			
 
				+ecfb438c07fa dtc: Fix signedness comparisons warnings: pointer diff
			
 
				+5bec74a6d135 dtc: Fix signedness comparisons warnings: reservednum
			
 
				+24e7f511fd4a fdtdump: Fix signedness comparisons warnings
			
 
				+b6910bec1161 Bump version to v1.6.1
			
 
				+21d61d18f968 Fix CID 1461557
			
 
				+4c2ef8f4d14c checks: Introduce is_multiple_of()
			
 
				+e59ca36fb70e Make handling of cpp line information more tolerant
			
 
				+0c3fd9b6aceb checks: Drop interrupt_cells_is_cell check
			
 
				+6b3081abc4ac checks: Add check_is_cell() for all phandle+arg properties
			
 
				+2dffc192a77f yamltree: Remove marker ordering dependency
			
 
				+61e513439e40 pylibfdt: Rework "avoid unused variable warning" lines
			
 
				+c8bddd106095 tests: add a positive gpio test case
			
 
				+ad4abfadb687 checks: replace strstr and strrchr with strends
			
 
				+09c6a6e88718 dtc.h: add strends for suffix matching
			
 
				+9bb9b8d0b4a0 checks: tigthen up nr-gpios prop exception
			
 
				+b07b62ee3342 libfdt: Add FDT alignment check to fdt_check_header()
			
 
				+a2def5479950 libfdt: Check that the root-node name is empty
			
 
				+4ca61f84dc21 libfdt: Check that there is only one root node
			
 
				+34d708249a91 dtc: Remove -O dtbo support
			
 
				+8e7ff260f755 libfdt: Fix a possible "unchecked return value" warning
			
 
				+88875268c05c checks: Warn on node-name and property name being the same
			
 
				+9d2279e7e6ee checks: Change node-name check to match devicetree spec
			
 
				+f527c867a8c6 util: limit gnu_printf format attribute to gcc >= 4.4.0
			
 
				+
			
 
				+Reviewed-by: Frank Rowand <[email protected]>
			
 
				+Tested-by: Frank Rowand <[email protected]>
			
 
				+Signed-off-by: Rob Herring <[email protected]>
			
 
				+---
			
 
				+ scripts/dtc/checks.c              | 222 ++++++++++++++++++++++--------
			
 
				+ scripts/dtc/dtc-lexer.l           |   2 +-
			
 
				+ scripts/dtc/dtc.c                 |   6 +-
			
 
				+ scripts/dtc/dtc.h                 |  40 +++++-
			
 
				+ scripts/dtc/flattree.c            |  11 +-
			
 
				+ scripts/dtc/libfdt/fdt.c          |   4 +
			
 
				+ scripts/dtc/libfdt/fdt_rw.c       |  18 ++-
			
 
				+ scripts/dtc/libfdt/fdt_strerror.c |   1 +
			
 
				+ scripts/dtc/libfdt/libfdt.h       |   7 +
			
 
				+ scripts/dtc/livetree.c            |   6 +-
			
 
				+ scripts/dtc/treesource.c          |  48 +++----
			
 
				+ scripts/dtc/util.h                |   6 +-
			
 
				+ scripts/dtc/version_gen.h         |   2 +-
			
 
				+ scripts/dtc/yamltree.c            |  16 ++-
			
 
				+ 14 files changed, 275 insertions(+), 114 deletions(-)
			
 
				+
			
 
				+--- a/scripts/dtc/checks.c
			
 
				++++ b/scripts/dtc/checks.c
			
 
				+@@ -143,6 +143,14 @@ static void check_nodes_props(struct che
			
 
				+ 		check_nodes_props(c, dti, child);
			
 
				+ }
			
 
				+ 
			
 
				++static bool is_multiple_of(int multiple, int divisor)
			
 
				++{
			
 
				++	if (divisor == 0)
			
 
				++		return multiple == 0;
			
 
				++	else
			
 
				++		return (multiple % divisor) == 0;
			
 
				++}
			
 
				++
			
 
				+ static bool run_check(struct check *c, struct dt_info *dti)
			
 
				+ {
			
 
				+ 	struct node *dt = dti->dt;
			
 
				+@@ -297,19 +305,20 @@ ERROR(duplicate_property_names, check_du
			
 
				+ #define LOWERCASE	"abcdefghijklmnopqrstuvwxyz"
			
 
				+ #define UPPERCASE	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
			
 
				+ #define DIGITS		"0123456789"
			
 
				+-#define PROPNODECHARS	LOWERCASE UPPERCASE DIGITS ",._+*#?-"
			
 
				++#define NODECHARS	LOWERCASE UPPERCASE DIGITS ",._+-@"
			
 
				++#define PROPCHARS	LOWERCASE UPPERCASE DIGITS ",._+*#?-"
			
 
				+ #define PROPNODECHARSSTRICT	LOWERCASE UPPERCASE DIGITS ",-"
			
 
				+ 
			
 
				+ static void check_node_name_chars(struct check *c, struct dt_info *dti,
			
 
				+ 				  struct node *node)
			
 
				+ {
			
 
				+-	int n = strspn(node->name, c->data);
			
 
				++	size_t n = strspn(node->name, c->data);
			
 
				+ 
			
 
				+ 	if (n < strlen(node->name))
			
 
				+ 		FAIL(c, dti, node, "Bad character '%c' in node name",
			
 
				+ 		     node->name[n]);
			
 
				+ }
			
 
				+-ERROR(node_name_chars, check_node_name_chars, PROPNODECHARS "@");
			
 
				++ERROR(node_name_chars, check_node_name_chars, NODECHARS);
			
 
				+ 
			
 
				+ static void check_node_name_chars_strict(struct check *c, struct dt_info *dti,
			
 
				+ 					 struct node *node)
			
 
				+@@ -330,6 +339,20 @@ static void check_node_name_format(struc
			
 
				+ }
			
 
				+ ERROR(node_name_format, check_node_name_format, NULL, &node_name_chars);
			
 
				+ 
			
 
				++static void check_node_name_vs_property_name(struct check *c,
			
 
				++					     struct dt_info *dti,
			
 
				++					     struct node *node)
			
 
				++{
			
 
				++	if (!node->parent)
			
 
				++		return;
			
 
				++
			
 
				++	if (get_property(node->parent, node->name)) {
			
 
				++		FAIL(c, dti, node, "node name and property name conflict");
			
 
				++	}
			
 
				++}
			
 
				++WARNING(node_name_vs_property_name, check_node_name_vs_property_name,
			
 
				++	NULL, &node_name_chars);
			
 
				++
			
 
				+ static void check_unit_address_vs_reg(struct check *c, struct dt_info *dti,
			
 
				+ 				      struct node *node)
			
 
				+ {
			
 
				+@@ -363,14 +386,14 @@ static void check_property_name_chars(st
			
 
				+ 	struct property *prop;
			
 
				+ 
			
 
				+ 	for_each_property(node, prop) {
			
 
				+-		int n = strspn(prop->name, c->data);
			
 
				++		size_t n = strspn(prop->name, c->data);
			
 
				+ 
			
 
				+ 		if (n < strlen(prop->name))
			
 
				+ 			FAIL_PROP(c, dti, node, prop, "Bad character '%c' in property name",
			
 
				+ 				  prop->name[n]);
			
 
				+ 	}
			
 
				+ }
			
 
				+-ERROR(property_name_chars, check_property_name_chars, PROPNODECHARS);
			
 
				++ERROR(property_name_chars, check_property_name_chars, PROPCHARS);
			
 
				+ 
			
 
				+ static void check_property_name_chars_strict(struct check *c,
			
 
				+ 					     struct dt_info *dti,
			
 
				+@@ -380,7 +403,7 @@ static void check_property_name_chars_st
			
 
				+ 
			
 
				+ 	for_each_property(node, prop) {
			
 
				+ 		const char *name = prop->name;
			
 
				+-		int n = strspn(name, c->data);
			
 
				++		size_t n = strspn(name, c->data);
			
 
				+ 
			
 
				+ 		if (n == strlen(prop->name))
			
 
				+ 			continue;
			
 
				+@@ -497,7 +520,7 @@ static cell_t check_phandle_prop(struct
			
 
				+ 
			
 
				+ 	phandle = propval_cell(prop);
			
 
				+ 
			
 
				+-	if ((phandle == 0) || (phandle == -1)) {
			
 
				++	if (!phandle_is_valid(phandle)) {
			
 
				+ 		FAIL_PROP(c, dti, node, prop, "bad value (0x%x) in %s property",
			
 
				+ 		     phandle, prop->name);
			
 
				+ 		return 0;
			
 
				+@@ -556,7 +579,7 @@ static void check_name_properties(struct
			
 
				+ 	if (!prop)
			
 
				+ 		return; /* No name property, that's fine */
			
 
				+ 
			
 
				+-	if ((prop->val.len != node->basenamelen+1)
			
 
				++	if ((prop->val.len != node->basenamelen + 1U)
			
 
				+ 	    || (memcmp(prop->val.val, node->name, node->basenamelen) != 0)) {
			
 
				+ 		FAIL(c, dti, node, "\"name\" property is incorrect (\"%s\" instead"
			
 
				+ 		     " of base node name)", prop->val.val);
			
 
				+@@ -657,7 +680,6 @@ ERROR(omit_unused_nodes, fixup_omit_unus
			
 
				+  */
			
 
				+ WARNING_IF_NOT_CELL(address_cells_is_cell, "#address-cells");
			
 
				+ WARNING_IF_NOT_CELL(size_cells_is_cell, "#size-cells");
			
 
				+-WARNING_IF_NOT_CELL(interrupt_cells_is_cell, "#interrupt-cells");
			
 
				+ 
			
 
				+ WARNING_IF_NOT_STRING(device_type_is_string, "device_type");
			
 
				+ WARNING_IF_NOT_STRING(model_is_string, "model");
			
 
				+@@ -672,8 +694,7 @@ static void check_names_is_string_list(s
			
 
				+ 	struct property *prop;
			
 
				+ 
			
 
				+ 	for_each_property(node, prop) {
			
 
				+-		const char *s = strrchr(prop->name, '-');
			
 
				+-		if (!s || !streq(s, "-names"))
			
 
				++		if (!strends(prop->name, "-names"))
			
 
				+ 			continue;
			
 
				+ 
			
 
				+ 		c->data = prop->name;
			
 
				+@@ -753,7 +774,7 @@ static void check_reg_format(struct chec
			
 
				+ 	size_cells = node_size_cells(node->parent);
			
 
				+ 	entrylen = (addr_cells + size_cells) * sizeof(cell_t);
			
 
				+ 
			
 
				+-	if (!entrylen || (prop->val.len % entrylen) != 0)
			
 
				++	if (!is_multiple_of(prop->val.len, entrylen))
			
 
				+ 		FAIL_PROP(c, dti, node, prop, "property has invalid length (%d bytes) "
			
 
				+ 			  "(#address-cells == %d, #size-cells == %d)",
			
 
				+ 			  prop->val.len, addr_cells, size_cells);
			
 
				+@@ -794,7 +815,7 @@ static void check_ranges_format(struct c
			
 
				+ 				  "#size-cells (%d) differs from %s (%d)",
			
 
				+ 				  ranges, c_size_cells, node->parent->fullpath,
			
 
				+ 				  p_size_cells);
			
 
				+-	} else if ((prop->val.len % entrylen) != 0) {
			
 
				++	} else if (!is_multiple_of(prop->val.len, entrylen)) {
			
 
				+ 		FAIL_PROP(c, dti, node, prop, "\"%s\" property has invalid length (%d bytes) "
			
 
				+ 			  "(parent #address-cells == %d, child #address-cells == %d, "
			
 
				+ 			  "#size-cells == %d)", ranges, prop->val.len,
			
 
				+@@ -871,7 +892,7 @@ static void check_pci_device_bus_num(str
			
 
				+ 	} else {
			
 
				+ 		cells = (cell_t *)prop->val.val;
			
 
				+ 		min_bus = fdt32_to_cpu(cells[0]);
			
 
				+-		max_bus = fdt32_to_cpu(cells[0]);
			
 
				++		max_bus = fdt32_to_cpu(cells[1]);
			
 
				+ 	}
			
 
				+ 	if ((bus_num < min_bus) || (bus_num > max_bus))
			
 
				+ 		FAIL_PROP(c, dti, node, prop, "PCI bus number %d out of range, expected (%d - %d)",
			
 
				+@@ -1367,9 +1388,9 @@ static void check_property_phandle_args(
			
 
				+ 				          const struct provider *provider)
			
 
				+ {
			
 
				+ 	struct node *root = dti->dt;
			
 
				+-	int cell, cellsize = 0;
			
 
				++	unsigned int cell, cellsize = 0;
			
 
				+ 
			
 
				+-	if (prop->val.len % sizeof(cell_t)) {
			
 
				++	if (!is_multiple_of(prop->val.len, sizeof(cell_t))) {
			
 
				+ 		FAIL_PROP(c, dti, node, prop,
			
 
				+ 			  "property size (%d) is invalid, expected multiple of %zu",
			
 
				+ 			  prop->val.len, sizeof(cell_t));
			
 
				+@@ -1379,14 +1400,14 @@ static void check_property_phandle_args(
			
 
				+ 	for (cell = 0; cell < prop->val.len / sizeof(cell_t); cell += cellsize + 1) {
			
 
				+ 		struct node *provider_node;
			
 
				+ 		struct property *cellprop;
			
 
				+-		int phandle;
			
 
				++		cell_t phandle;
			
 
				+ 
			
 
				+ 		phandle = propval_cell_n(prop, cell);
			
 
				+ 		/*
			
 
				+ 		 * Some bindings use a cell value 0 or -1 to skip over optional
			
 
				+ 		 * entries when each index position has a specific definition.
			
 
				+ 		 */
			
 
				+-		if (phandle == 0 || phandle == -1) {
			
 
				++		if (!phandle_is_valid(phandle)) {
			
 
				+ 			/* Give up if this is an overlay with external references */
			
 
				+ 			if (dti->dtsflags & DTSF_PLUGIN)
			
 
				+ 				break;
			
 
				+@@ -1452,7 +1473,8 @@ static void check_provider_cells_propert
			
 
				+ }
			
 
				+ #define WARNING_PROPERTY_PHANDLE_CELLS(nm, propname, cells_name, ...) \
			
 
				+ 	static struct provider nm##_provider = { (propname), (cells_name), __VA_ARGS__ }; \
			
 
				+-	WARNING(nm##_property, check_provider_cells_property, &nm##_provider, &phandle_references);
			
 
				++	WARNING_IF_NOT_CELL(nm##_is_cell, cells_name); \
			
 
				++	WARNING(nm##_property, check_provider_cells_property, &nm##_provider, &nm##_is_cell, &phandle_references);
			
 
				+ 
			
 
				+ WARNING_PROPERTY_PHANDLE_CELLS(clocks, "clocks", "#clock-cells");
			
 
				+ WARNING_PROPERTY_PHANDLE_CELLS(cooling_device, "cooling-device", "#cooling-cells");
			
 
				+@@ -1473,24 +1495,17 @@ WARNING_PROPERTY_PHANDLE_CELLS(thermal_s
			
 
				+ 
			
 
				+ static bool prop_is_gpio(struct property *prop)
			
 
				+ {
			
 
				+-	char *str;
			
 
				+-
			
 
				+ 	/*
			
 
				+ 	 * *-gpios and *-gpio can appear in property names,
			
 
				+ 	 * so skip over any false matches (only one known ATM)
			
 
				+ 	 */
			
 
				+-	if (strstr(prop->name, "nr-gpio"))
			
 
				++	if (strends(prop->name, ",nr-gpios"))
			
 
				+ 		return false;
			
 
				+ 
			
 
				+-	str = strrchr(prop->name, '-');
			
 
				+-	if (str)
			
 
				+-		str++;
			
 
				+-	else
			
 
				+-		str = prop->name;
			
 
				+-	if (!(streq(str, "gpios") || streq(str, "gpio")))
			
 
				+-		return false;
			
 
				+-
			
 
				+-	return true;
			
 
				++	return strends(prop->name, "-gpios") ||
			
 
				++		streq(prop->name, "gpios") ||
			
 
				++		strends(prop->name, "-gpio") ||
			
 
				++		streq(prop->name, "gpio");
			
 
				+ }
			
 
				+ 
			
 
				+ static void check_gpios_property(struct check *c,
			
 
				+@@ -1525,13 +1540,10 @@ static void check_deprecated_gpio_proper
			
 
				+ 	struct property *prop;
			
 
				+ 
			
 
				+ 	for_each_property(node, prop) {
			
 
				+-		char *str;
			
 
				+-
			
 
				+ 		if (!prop_is_gpio(prop))
			
 
				+ 			continue;
			
 
				+ 
			
 
				+-		str = strstr(prop->name, "gpio");
			
 
				+-		if (!streq(str, "gpio"))
			
 
				++		if (!strends(prop->name, "gpio"))
			
 
				+ 			continue;
			
 
				+ 
			
 
				+ 		FAIL_PROP(c, dti, node, prop,
			
 
				+@@ -1561,21 +1573,106 @@ static void check_interrupt_provider(str
			
 
				+ 				     struct node *node)
			
 
				+ {
			
 
				+ 	struct property *prop;
			
 
				++	bool irq_provider = node_is_interrupt_provider(node);
			
 
				+ 
			
 
				+-	if (!node_is_interrupt_provider(node))
			
 
				++	prop = get_property(node, "#interrupt-cells");
			
 
				++	if (irq_provider && !prop) {
			
 
				++		FAIL(c, dti, node,
			
 
				++		     "Missing '#interrupt-cells' in interrupt provider");
			
 
				+ 		return;
			
 
				++	}
			
 
				+ 
			
 
				+-	prop = get_property(node, "#interrupt-cells");
			
 
				+-	if (!prop)
			
 
				++	if (!irq_provider && prop) {
			
 
				+ 		FAIL(c, dti, node,
			
 
				+-		     "Missing #interrupt-cells in interrupt provider");
			
 
				++		     "'#interrupt-cells' found, but node is not an interrupt provider");
			
 
				++		return;
			
 
				++	}
			
 
				++}
			
 
				++WARNING(interrupt_provider, check_interrupt_provider, NULL, &interrupts_extended_is_cell);
			
 
				+ 
			
 
				+-	prop = get_property(node, "#address-cells");
			
 
				+-	if (!prop)
			
 
				++static void check_interrupt_map(struct check *c,
			
 
				++				struct dt_info *dti,
			
 
				++				struct node *node)
			
 
				++{
			
 
				++	struct node *root = dti->dt;
			
 
				++	struct property *prop, *irq_map_prop;
			
 
				++	size_t cellsize, cell, map_cells;
			
 
				++
			
 
				++	irq_map_prop = get_property(node, "interrupt-map");
			
 
				++	if (!irq_map_prop)
			
 
				++		return;
			
 
				++
			
 
				++	if (node->addr_cells < 0) {
			
 
				+ 		FAIL(c, dti, node,
			
 
				+-		     "Missing #address-cells in interrupt provider");
			
 
				++		     "Missing '#address-cells' in interrupt-map provider");
			
 
				++		return;
			
 
				++	}
			
 
				++	cellsize = node_addr_cells(node);
			
 
				++	cellsize += propval_cell(get_property(node, "#interrupt-cells"));
			
 
				++
			
 
				++	prop = get_property(node, "interrupt-map-mask");
			
 
				++	if (prop && (prop->val.len != (cellsize * sizeof(cell_t))))
			
 
				++		FAIL_PROP(c, dti, node, prop,
			
 
				++			  "property size (%d) is invalid, expected %zu",
			
 
				++			  prop->val.len, cellsize * sizeof(cell_t));
			
 
				++
			
 
				++	if (!is_multiple_of(irq_map_prop->val.len, sizeof(cell_t))) {
			
 
				++		FAIL_PROP(c, dti, node, irq_map_prop,
			
 
				++			  "property size (%d) is invalid, expected multiple of %zu",
			
 
				++			  irq_map_prop->val.len, sizeof(cell_t));
			
 
				++		return;
			
 
				++	}
			
 
				++
			
 
				++	map_cells = irq_map_prop->val.len / sizeof(cell_t);
			
 
				++	for (cell = 0; cell < map_cells; ) {
			
 
				++		struct node *provider_node;
			
 
				++		struct property *cellprop;
			
 
				++		int phandle;
			
 
				++		size_t parent_cellsize;
			
 
				++
			
 
				++		if ((cell + cellsize) >= map_cells) {
			
 
				++			FAIL_PROP(c, dti, node, irq_map_prop,
			
 
				++				  "property size (%d) too small, expected > %zu",
			
 
				++				  irq_map_prop->val.len, (cell + cellsize) * sizeof(cell_t));
			
 
				++			break;
			
 
				++		}
			
 
				++		cell += cellsize;
			
 
				++
			
 
				++		phandle = propval_cell_n(irq_map_prop, cell);
			
 
				++		if (!phandle_is_valid(phandle)) {
			
 
				++			/* Give up if this is an overlay with external references */
			
 
				++			if (!(dti->dtsflags & DTSF_PLUGIN))
			
 
				++				FAIL_PROP(c, dti, node, irq_map_prop,
			
 
				++					  "Cell %zu is not a phandle(%d)",
			
 
				++					  cell, phandle);
			
 
				++			break;
			
 
				++		}
			
 
				++
			
 
				++		provider_node = get_node_by_phandle(root, phandle);
			
 
				++		if (!provider_node) {
			
 
				++			FAIL_PROP(c, dti, node, irq_map_prop,
			
 
				++				  "Could not get phandle(%d) node for (cell %zu)",
			
 
				++				  phandle, cell);
			
 
				++			break;
			
 
				++		}
			
 
				++
			
 
				++		cellprop = get_property(provider_node, "#interrupt-cells");
			
 
				++		if (cellprop) {
			
 
				++			parent_cellsize = propval_cell(cellprop);
			
 
				++		} else {
			
 
				++			FAIL(c, dti, node, "Missing property '#interrupt-cells' in node %s or bad phandle (referred from interrupt-map[%zu])",
			
 
				++			     provider_node->fullpath, cell);
			
 
				++			break;
			
 
				++		}
			
 
				++
			
 
				++		cellprop = get_property(provider_node, "#address-cells");
			
 
				++		if (cellprop)
			
 
				++			parent_cellsize += propval_cell(cellprop);
			
 
				++
			
 
				++		cell += 1 + parent_cellsize;
			
 
				++	}
			
 
				+ }
			
 
				+-WARNING(interrupt_provider, check_interrupt_provider, NULL);
			
 
				++WARNING(interrupt_map, check_interrupt_map, NULL, &phandle_references, &addr_size_cells, &interrupt_provider);
			
 
				+ 
			
 
				+ static void check_interrupts_property(struct check *c,
			
 
				+ 				      struct dt_info *dti,
			
 
				+@@ -1584,13 +1681,13 @@ static void check_interrupts_property(st
			
 
				+ 	struct node *root = dti->dt;
			
 
				+ 	struct node *irq_node = NULL, *parent = node;
			
 
				+ 	struct property *irq_prop, *prop = NULL;
			
 
				+-	int irq_cells, phandle;
			
 
				++	cell_t irq_cells, phandle;
			
 
				+ 
			
 
				+ 	irq_prop = get_property(node, "interrupts");
			
 
				+ 	if (!irq_prop)
			
 
				+ 		return;
			
 
				+ 
			
 
				+-	if (irq_prop->val.len % sizeof(cell_t))
			
 
				++	if (!is_multiple_of(irq_prop->val.len, sizeof(cell_t)))
			
 
				+ 		FAIL_PROP(c, dti, node, irq_prop, "size (%d) is invalid, expected multiple of %zu",
			
 
				+ 		     irq_prop->val.len, sizeof(cell_t));
			
 
				+ 
			
 
				+@@ -1603,7 +1700,7 @@ static void check_interrupts_property(st
			
 
				+ 		prop = get_property(parent, "interrupt-parent");
			
 
				+ 		if (prop) {
			
 
				+ 			phandle = propval_cell(prop);
			
 
				+-			if ((phandle == 0) || (phandle == -1)) {
			
 
				++			if (!phandle_is_valid(phandle)) {
			
 
				+ 				/* Give up if this is an overlay with
			
 
				+ 				 * external references */
			
 
				+ 				if (dti->dtsflags & DTSF_PLUGIN)
			
 
				+@@ -1639,7 +1736,7 @@ static void check_interrupts_property(st
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	irq_cells = propval_cell(prop);
			
 
				+-	if (irq_prop->val.len % (irq_cells * sizeof(cell_t))) {
			
 
				++	if (!is_multiple_of(irq_prop->val.len, irq_cells * sizeof(cell_t))) {
			
 
				+ 		FAIL_PROP(c, dti, node, prop,
			
 
				+ 			  "size is (%d), expected multiple of %d",
			
 
				+ 			  irq_prop->val.len, (int)(irq_cells * sizeof(cell_t)));
			
 
				+@@ -1750,7 +1847,7 @@ WARNING(graph_port, check_graph_port, NU
			
 
				+ static struct node *get_remote_endpoint(struct check *c, struct dt_info *dti,
			
 
				+ 					struct node *endpoint)
			
 
				+ {
			
 
				+-	int phandle;
			
 
				++	cell_t phandle;
			
 
				+ 	struct node *node;
			
 
				+ 	struct property *prop;
			
 
				+ 
			
 
				+@@ -1760,7 +1857,7 @@ static struct node *get_remote_endpoint(
			
 
				+ 
			
 
				+ 	phandle = propval_cell(prop);
			
 
				+ 	/* Give up if this is an overlay with external references */
			
 
				+-	if (phandle == 0 || phandle == -1)
			
 
				++	if (!phandle_is_valid(phandle))
			
 
				+ 		return NULL;
			
 
				+ 
			
 
				+ 	node = get_node_by_phandle(dti->dt, phandle);
			
 
				+@@ -1796,7 +1893,7 @@ WARNING(graph_endpoint, check_graph_endp
			
 
				+ static struct check *check_table[] = {
			
 
				+ 	&duplicate_node_names, &duplicate_property_names,
			
 
				+ 	&node_name_chars, &node_name_format, &property_name_chars,
			
 
				+-	&name_is_string, &name_properties,
			
 
				++	&name_is_string, &name_properties, &node_name_vs_property_name,
			
 
				+ 
			
 
				+ 	&duplicate_label,
			
 
				+ 
			
 
				+@@ -1804,7 +1901,7 @@ static struct check *check_table[] = {
			
 
				+ 	&phandle_references, &path_references,
			
 
				+ 	&omit_unused_nodes,
			
 
				+ 
			
 
				+-	&address_cells_is_cell, &size_cells_is_cell, &interrupt_cells_is_cell,
			
 
				++	&address_cells_is_cell, &size_cells_is_cell,
			
 
				+ 	&device_type_is_string, &model_is_string, &status_is_string,
			
 
				+ 	&label_is_string,
			
 
				+ 
			
 
				+@@ -1839,26 +1936,43 @@ static struct check *check_table[] = {
			
 
				+ 	&chosen_node_is_root, &chosen_node_bootargs, &chosen_node_stdout_path,
			
 
				+ 
			
 
				+ 	&clocks_property,
			
 
				++	&clocks_is_cell,
			
 
				+ 	&cooling_device_property,
			
 
				++	&cooling_device_is_cell,
			
 
				+ 	&dmas_property,
			
 
				++	&dmas_is_cell,
			
 
				+ 	&hwlocks_property,
			
 
				++	&hwlocks_is_cell,
			
 
				+ 	&interrupts_extended_property,
			
 
				++	&interrupts_extended_is_cell,
			
 
				+ 	&io_channels_property,
			
 
				++	&io_channels_is_cell,
			
 
				+ 	&iommus_property,
			
 
				++	&iommus_is_cell,
			
 
				+ 	&mboxes_property,
			
 
				++	&mboxes_is_cell,
			
 
				+ 	&msi_parent_property,
			
 
				++	&msi_parent_is_cell,
			
 
				+ 	&mux_controls_property,
			
 
				++	&mux_controls_is_cell,
			
 
				+ 	&phys_property,
			
 
				++	&phys_is_cell,
			
 
				+ 	&power_domains_property,
			
 
				++	&power_domains_is_cell,
			
 
				+ 	&pwms_property,
			
 
				++	&pwms_is_cell,
			
 
				+ 	&resets_property,
			
 
				++	&resets_is_cell,
			
 
				+ 	&sound_dai_property,
			
 
				++	&sound_dai_is_cell,
			
 
				+ 	&thermal_sensors_property,
			
 
				++	&thermal_sensors_is_cell,
			
 
				+ 
			
 
				+ 	&deprecated_gpio_property,
			
 
				+ 	&gpios_property,
			
 
				+ 	&interrupts_property,
			
 
				+ 	&interrupt_provider,
			
 
				++	&interrupt_map,
			
 
				+ 
			
 
				+ 	&alias_paths,
			
 
				+ 
			
 
				+@@ -1882,7 +1996,7 @@ static void enable_warning_error(struct
			
 
				+ 
			
 
				+ static void disable_warning_error(struct check *c, bool warn, bool error)
			
 
				+ {
			
 
				+-	int i;
			
 
				++	unsigned int i;
			
 
				+ 
			
 
				+ 	/* Lowering level, also lower it for things this is the prereq
			
 
				+ 	 * for */
			
 
				+@@ -1903,7 +2017,7 @@ static void disable_warning_error(struct
			
 
				+ 
			
 
				+ void parse_checks_option(bool warn, bool error, const char *arg)
			
 
				+ {
			
 
				+-	int i;
			
 
				++	unsigned int i;
			
 
				+ 	const char *name = arg;
			
 
				+ 	bool enable = true;
			
 
				+ 
			
 
				+@@ -1930,7 +2044,7 @@ void parse_checks_option(bool warn, bool
			
 
				+ 
			
 
				+ void process_checks(bool force, struct dt_info *dti)
			
 
				+ {
			
 
				+-	int i;
			
 
				++	unsigned int i;
			
 
				+ 	int error = 0;
			
 
				+ 
			
 
				+ 	for (i = 0; i < ARRAY_SIZE(check_table); i++) {
			
 
				+--- a/scripts/dtc/dtc-lexer.l
			
 
				++++ b/scripts/dtc/dtc-lexer.l
			
 
				+@@ -57,7 +57,7 @@ static void PRINTF(1, 2) lexical_error(c
			
 
				+ 			push_input_file(name);
			
 
				+ 		}
			
 
				+ 
			
 
				+-<*>^"#"(line)?[ \t]+[0-9]+[ \t]+{STRING}([ \t]+[0-9]+)? {
			
 
				++<*>^"#"(line)?[ \t]+[0-9]+[ \t]+{STRING}([ \t]+[0-9]+)* {
			
 
				+ 			char *line, *fnstart, *fnend;
			
 
				+ 			struct data fn;
			
 
				+ 			/* skip text before line # */
			
 
				+--- a/scripts/dtc/dtc.c
			
 
				++++ b/scripts/dtc/dtc.c
			
 
				+@@ -12,7 +12,7 @@
			
 
				+  * Command line options
			
 
				+  */
			
 
				+ int quiet;		/* Level of quietness */
			
 
				+-int reservenum;		/* Number of memory reservation slots */
			
 
				++unsigned int reservenum;/* Number of memory reservation slots */
			
 
				+ int minsize;		/* Minimum blob size */
			
 
				+ int padsize;		/* Additional padding to blob */
			
 
				+ int alignsize;		/* Additional padding to blob accroding to the alignsize */
			
 
				+@@ -197,7 +197,7 @@ int main(int argc, char *argv[])
			
 
				+ 			depname = optarg;
			
 
				+ 			break;
			
 
				+ 		case 'R':
			
 
				+-			reservenum = strtol(optarg, NULL, 0);
			
 
				++			reservenum = strtoul(optarg, NULL, 0);
			
 
				+ 			break;
			
 
				+ 		case 'S':
			
 
				+ 			minsize = strtol(optarg, NULL, 0);
			
 
				+@@ -359,8 +359,6 @@ int main(int argc, char *argv[])
			
 
				+ #endif
			
 
				+ 	} else if (streq(outform, "dtb")) {
			
 
				+ 		dt_to_blob(outf, dti, outversion);
			
 
				+-	} else if (streq(outform, "dtbo")) {
			
 
				+-		dt_to_blob(outf, dti, outversion);
			
 
				+ 	} else if (streq(outform, "asm")) {
			
 
				+ 		dt_to_asm(outf, dti, outversion);
			
 
				+ 	} else if (streq(outform, "null")) {
			
 
				+--- a/scripts/dtc/dtc.h
			
 
				++++ b/scripts/dtc/dtc.h
			
 
				+@@ -35,7 +35,7 @@
			
 
				+  * Command line options
			
 
				+  */
			
 
				+ extern int quiet;		/* Level of quietness */
			
 
				+-extern int reservenum;		/* Number of memory reservation slots */
			
 
				++extern unsigned int reservenum;	/* Number of memory reservation slots */
			
 
				+ extern int minsize;		/* Minimum blob size */
			
 
				+ extern int padsize;		/* Additional padding to blob */
			
 
				+ extern int alignsize;		/* Additional padding to blob accroding to the alignsize */
			
 
				+@@ -51,6 +51,11 @@ extern int annotate;		/* annotate .dts w
			
 
				+ 
			
 
				+ typedef uint32_t cell_t;
			
 
				+ 
			
 
				++static inline bool phandle_is_valid(cell_t phandle)
			
 
				++{
			
 
				++	return phandle != 0 && phandle != ~0U;
			
 
				++}
			
 
				++
			
 
				+ static inline uint16_t dtb_ld16(const void *p)
			
 
				+ {
			
 
				+ 	const uint8_t *bp = (const uint8_t *)p;
			
 
				+@@ -86,6 +91,16 @@ static inline uint64_t dtb_ld64(const vo
			
 
				+ #define streq(a, b)	(strcmp((a), (b)) == 0)
			
 
				+ #define strstarts(s, prefix)	(strncmp((s), (prefix), strlen(prefix)) == 0)
			
 
				+ #define strprefixeq(a, n, b)	(strlen(b) == (n) && (memcmp(a, b, n) == 0))
			
 
				++static inline bool strends(const char *str, const char *suffix)
			
 
				++{
			
 
				++	unsigned int len, suffix_len;
			
 
				++
			
 
				++	len = strlen(str);
			
 
				++	suffix_len = strlen(suffix);
			
 
				++	if (len < suffix_len)
			
 
				++		return false;
			
 
				++	return streq(str + len - suffix_len, suffix);
			
 
				++}
			
 
				+ 
			
 
				+ #define ALIGN(x, a)	(((x) + (a) - 1) & ~((a) - 1))
			
 
				+ 
			
 
				+@@ -101,6 +116,12 @@ enum markertype {
			
 
				+ 	TYPE_UINT64,
			
 
				+ 	TYPE_STRING,
			
 
				+ };
			
 
				++
			
 
				++static inline bool is_type_marker(enum markertype type)
			
 
				++{
			
 
				++	return type >= TYPE_UINT8;
			
 
				++}
			
 
				++
			
 
				+ extern const char *markername(enum markertype markertype);
			
 
				+ 
			
 
				+ struct  marker {
			
 
				+@@ -125,7 +146,22 @@ struct data {
			
 
				+ 	for_each_marker(m) \
			
 
				+ 		if ((m)->type == (t))
			
 
				+ 
			
 
				+-size_t type_marker_length(struct marker *m);
			
 
				++static inline struct marker *next_type_marker(struct marker *m)
			
 
				++{
			
 
				++	for_each_marker(m)
			
 
				++		if (is_type_marker(m->type))
			
 
				++			break;
			
 
				++	return m;
			
 
				++}
			
 
				++
			
 
				++static inline size_t type_marker_length(struct marker *m)
			
 
				++{
			
 
				++	struct marker *next = next_type_marker(m->next);
			
 
				++
			
 
				++	if (next)
			
 
				++		return next->offset - m->offset;
			
 
				++	return 0;
			
 
				++}
			
 
				+ 
			
 
				+ void data_free(struct data d);
			
 
				+ 
			
 
				+--- a/scripts/dtc/flattree.c
			
 
				++++ b/scripts/dtc/flattree.c
			
 
				+@@ -124,7 +124,8 @@ static void asm_emit_cell(void *e, cell_
			
 
				+ {
			
 
				+ 	FILE *f = e;
			
 
				+ 
			
 
				+-	fprintf(f, "\t.byte 0x%02x; .byte 0x%02x; .byte 0x%02x; .byte 0x%02x\n",
			
 
				++	fprintf(f, "\t.byte\t0x%02x\n" "\t.byte\t0x%02x\n"
			
 
				++		"\t.byte\t0x%02x\n" "\t.byte\t0x%02x\n",
			
 
				+ 		(val >> 24) & 0xff, (val >> 16) & 0xff,
			
 
				+ 		(val >> 8) & 0xff, val & 0xff);
			
 
				+ }
			
 
				+@@ -134,9 +135,9 @@ static void asm_emit_string(void *e, con
			
 
				+ 	FILE *f = e;
			
 
				+ 
			
 
				+ 	if (len != 0)
			
 
				+-		fprintf(f, "\t.string\t\"%.*s\"\n", len, str);
			
 
				++		fprintf(f, "\t.asciz\t\"%.*s\"\n", len, str);
			
 
				+ 	else
			
 
				+-		fprintf(f, "\t.string\t\"%s\"\n", str);
			
 
				++		fprintf(f, "\t.asciz\t\"%s\"\n", str);
			
 
				+ }
			
 
				+ 
			
 
				+ static void asm_emit_align(void *e, int a)
			
 
				+@@ -295,7 +296,7 @@ static struct data flatten_reserve_list(
			
 
				+ {
			
 
				+ 	struct reserve_info *re;
			
 
				+ 	struct data d = empty_data;
			
 
				+-	int    j;
			
 
				++	unsigned int j;
			
 
				+ 
			
 
				+ 	for (re = reservelist; re; re = re->next) {
			
 
				+ 		d = data_append_re(d, re->address, re->size);
			
 
				+@@ -438,7 +439,7 @@ static void dump_stringtable_asm(FILE *f
			
 
				+ 
			
 
				+ 	while (p < (strbuf.val + strbuf.len)) {
			
 
				+ 		len = strlen(p);
			
 
				+-		fprintf(f, "\t.string \"%s\"\n", p);
			
 
				++		fprintf(f, "\t.asciz \"%s\"\n", p);
			
 
				+ 		p += len+1;
			
 
				+ 	}
			
 
				+ }
			
 
				+--- a/scripts/dtc/libfdt/fdt.c
			
 
				++++ b/scripts/dtc/libfdt/fdt.c
			
 
				+@@ -90,6 +90,10 @@ int fdt_check_header(const void *fdt)
			
 
				+ {
			
 
				+ 	size_t hdrsize;
			
 
				+ 
			
 
				++	/* The device tree must be at an 8-byte aligned address */
			
 
				++	if ((uintptr_t)fdt & 7)
			
 
				++		return -FDT_ERR_ALIGNMENT;
			
 
				++
			
 
				+ 	if (fdt_magic(fdt) != FDT_MAGIC)
			
 
				+ 		return -FDT_ERR_BADMAGIC;
			
 
				+ 	if (!can_assume(LATEST)) {
			
 
				+--- a/scripts/dtc/libfdt/fdt_rw.c
			
 
				++++ b/scripts/dtc/libfdt/fdt_rw.c
			
 
				+@@ -349,7 +349,10 @@ int fdt_add_subnode_namelen(void *fdt, i
			
 
				+ 		return offset;
			
 
				+ 
			
 
				+ 	/* Try to place the new node after the parent's properties */
			
 
				+-	fdt_next_tag(fdt, parentoffset, &nextoffset); /* skip the BEGIN_NODE */
			
 
				++	tag = fdt_next_tag(fdt, parentoffset, &nextoffset);
			
 
				++	/* the fdt_subnode_offset_namelen() should ensure this never hits */
			
 
				++	if (!can_assume(LIBFDT_FLAWLESS) && (tag != FDT_BEGIN_NODE))
			
 
				++		return -FDT_ERR_INTERNAL;
			
 
				+ 	do {
			
 
				+ 		offset = nextoffset;
			
 
				+ 		tag = fdt_next_tag(fdt, offset, &nextoffset);
			
 
				+@@ -391,7 +394,9 @@ int fdt_del_node(void *fdt, int nodeoffs
			
 
				+ }
			
 
				+ 
			
 
				+ static void fdt_packblocks_(const char *old, char *new,
			
 
				+-			    int mem_rsv_size, int struct_size)
			
 
				++			    int mem_rsv_size,
			
 
				++			    int struct_size,
			
 
				++			    int strings_size)
			
 
				+ {
			
 
				+ 	int mem_rsv_off, struct_off, strings_off;
			
 
				+ 
			
 
				+@@ -406,8 +411,7 @@ static void fdt_packblocks_(const char *
			
 
				+ 	fdt_set_off_dt_struct(new, struct_off);
			
 
				+ 	fdt_set_size_dt_struct(new, struct_size);
			
 
				+ 
			
 
				+-	memmove(new + strings_off, old + fdt_off_dt_strings(old),
			
 
				+-		fdt_size_dt_strings(old));
			
 
				++	memmove(new + strings_off, old + fdt_off_dt_strings(old), strings_size);
			
 
				+ 	fdt_set_off_dt_strings(new, strings_off);
			
 
				+ 	fdt_set_size_dt_strings(new, fdt_size_dt_strings(old));
			
 
				+ }
			
 
				+@@ -467,7 +471,8 @@ int fdt_open_into(const void *fdt, void
			
 
				+ 			return -FDT_ERR_NOSPACE;
			
 
				+ 	}
			
 
				+ 
			
 
				+-	fdt_packblocks_(fdt, tmp, mem_rsv_size, struct_size);
			
 
				++	fdt_packblocks_(fdt, tmp, mem_rsv_size, struct_size,
			
 
				++			fdt_size_dt_strings(fdt));
			
 
				+ 	memmove(buf, tmp, newsize);
			
 
				+ 
			
 
				+ 	fdt_set_magic(buf, FDT_MAGIC);
			
 
				+@@ -487,7 +492,8 @@ int fdt_pack(void *fdt)
			
 
				+ 
			
 
				+ 	mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
			
 
				+ 		* sizeof(struct fdt_reserve_entry);
			
 
				+-	fdt_packblocks_(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt));
			
 
				++	fdt_packblocks_(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt),
			
 
				++			fdt_size_dt_strings(fdt));
			
 
				+ 	fdt_set_totalsize(fdt, fdt_data_size_(fdt));
			
 
				+ 
			
 
				+ 	return 0;
			
 
				+--- a/scripts/dtc/libfdt/fdt_strerror.c
			
 
				++++ b/scripts/dtc/libfdt/fdt_strerror.c
			
 
				+@@ -39,6 +39,7 @@ static struct fdt_errtabent fdt_errtable
			
 
				+ 	FDT_ERRTABENT(FDT_ERR_BADOVERLAY),
			
 
				+ 	FDT_ERRTABENT(FDT_ERR_NOPHANDLES),
			
 
				+ 	FDT_ERRTABENT(FDT_ERR_BADFLAGS),
			
 
				++	FDT_ERRTABENT(FDT_ERR_ALIGNMENT),
			
 
				+ };
			
 
				+ #define FDT_ERRTABSIZE	((int)(sizeof(fdt_errtable) / sizeof(fdt_errtable[0])))
			
 
				+ 
			
 
				+--- a/scripts/dtc/libfdt/libfdt.h
			
 
				++++ b/scripts/dtc/libfdt/libfdt.h
			
 
				+@@ -131,6 +131,13 @@ uint32_t fdt_next_tag(const void *fdt, i
			
 
				+  * to work even with unaligned pointers on platforms (such as ARMv5) that don't
			
 
				+  * like unaligned loads and stores.
			
 
				+  */
			
 
				++static inline uint16_t fdt16_ld(const fdt16_t *p)
			
 
				++{
			
 
				++	const uint8_t *bp = (const uint8_t *)p;
			
 
				++
			
 
				++	return ((uint16_t)bp[0] << 8) | bp[1];
			
 
				++}
			
 
				++
			
 
				+ static inline uint32_t fdt32_ld(const fdt32_t *p)
			
 
				+ {
			
 
				+ 	const uint8_t *bp = (const uint8_t *)p;
			
 
				+--- a/scripts/dtc/livetree.c
			
 
				++++ b/scripts/dtc/livetree.c
			
 
				+@@ -526,7 +526,7 @@ struct node *get_node_by_path(struct nod
			
 
				+ 	p = strchr(path, '/');
			
 
				+ 
			
 
				+ 	for_each_child(tree, child) {
			
 
				+-		if (p && strprefixeq(path, p - path, child->name))
			
 
				++		if (p && strprefixeq(path, (size_t)(p - path), child->name))
			
 
				+ 			return get_node_by_path(child, p+1);
			
 
				+ 		else if (!p && streq(path, child->name))
			
 
				+ 			return child;
			
 
				+@@ -559,7 +559,7 @@ struct node *get_node_by_phandle(struct
			
 
				+ {
			
 
				+ 	struct node *child, *node;
			
 
				+ 
			
 
				+-	if ((phandle == 0) || (phandle == -1)) {
			
 
				++	if (!phandle_is_valid(phandle)) {
			
 
				+ 		assert(generate_fixups);
			
 
				+ 		return NULL;
			
 
				+ 	}
			
 
				+@@ -594,7 +594,7 @@ cell_t get_node_phandle(struct node *roo
			
 
				+ 	static cell_t phandle = 1; /* FIXME: ick, static local */
			
 
				+ 	struct data d = empty_data;
			
 
				+ 
			
 
				+-	if ((node->phandle != 0) && (node->phandle != -1))
			
 
				++	if (phandle_is_valid(node->phandle))
			
 
				+ 		return node->phandle;
			
 
				+ 
			
 
				+ 	while (get_node_by_phandle(root, phandle))
			
 
				+--- a/scripts/dtc/treesource.c
			
 
				++++ b/scripts/dtc/treesource.c
			
 
				+@@ -124,27 +124,6 @@ static void write_propval_int(FILE *f, c
			
 
				+ 	}
			
 
				+ }
			
 
				+ 
			
 
				+-static bool has_data_type_information(struct marker *m)
			
 
				+-{
			
 
				+-	return m->type >= TYPE_UINT8;
			
 
				+-}
			
 
				+-
			
 
				+-static struct marker *next_type_marker(struct marker *m)
			
 
				+-{
			
 
				+-	while (m && !has_data_type_information(m))
			
 
				+-		m = m->next;
			
 
				+-	return m;
			
 
				+-}
			
 
				+-
			
 
				+-size_t type_marker_length(struct marker *m)
			
 
				+-{
			
 
				+-	struct marker *next = next_type_marker(m->next);
			
 
				+-
			
 
				+-	if (next)
			
 
				+-		return next->offset - m->offset;
			
 
				+-	return 0;
			
 
				+-}
			
 
				+-
			
 
				+ static const char *delim_start[] = {
			
 
				+ 	[TYPE_UINT8] = "[",
			
 
				+ 	[TYPE_UINT16] = "/bits/ 16 <",
			
 
				+@@ -229,26 +208,39 @@ static void write_propval(FILE *f, struc
			
 
				+ 		size_t chunk_len = (m->next ? m->next->offset : len) - m->offset;
			
 
				+ 		size_t data_len = type_marker_length(m) ? : len - m->offset;
			
 
				+ 		const char *p = &prop->val.val[m->offset];
			
 
				++		struct marker *m_phandle;
			
 
				+ 
			
 
				+-		if (has_data_type_information(m)) {
			
 
				++		if (is_type_marker(m->type)) {
			
 
				+ 			emit_type = m->type;
			
 
				+ 			fprintf(f, " %s", delim_start[emit_type]);
			
 
				+ 		} else if (m->type == LABEL)
			
 
				+ 			fprintf(f, " %s:", m->ref);
			
 
				+-		else if (m->offset)
			
 
				+-			fputc(' ', f);
			
 
				+ 
			
 
				+-		if (emit_type == TYPE_NONE) {
			
 
				+-			assert(chunk_len == 0);
			
 
				++		if (emit_type == TYPE_NONE || chunk_len == 0)
			
 
				+ 			continue;
			
 
				+-		}
			
 
				+ 
			
 
				+ 		switch(emit_type) {
			
 
				+ 		case TYPE_UINT16:
			
 
				+ 			write_propval_int(f, p, chunk_len, 2);
			
 
				+ 			break;
			
 
				+ 		case TYPE_UINT32:
			
 
				+-			write_propval_int(f, p, chunk_len, 4);
			
 
				++			m_phandle = prop->val.markers;
			
 
				++			for_each_marker_of_type(m_phandle, REF_PHANDLE)
			
 
				++				if (m->offset == m_phandle->offset)
			
 
				++					break;
			
 
				++
			
 
				++			if (m_phandle) {
			
 
				++				if (m_phandle->ref[0] == '/')
			
 
				++					fprintf(f, "&{%s}", m_phandle->ref);
			
 
				++				else
			
 
				++					fprintf(f, "&%s", m_phandle->ref);
			
 
				++				if (chunk_len > 4) {
			
 
				++					fputc(' ', f);
			
 
				++					write_propval_int(f, p + 4, chunk_len - 4, 4);
			
 
				++				}
			
 
				++			} else {
			
 
				++				write_propval_int(f, p, chunk_len, 4);
			
 
				++			}
			
 
				+ 			break;
			
 
				+ 		case TYPE_UINT64:
			
 
				+ 			write_propval_int(f, p, chunk_len, 8);
			
 
				+--- a/scripts/dtc/util.h
			
 
				++++ b/scripts/dtc/util.h
			
 
				+@@ -13,10 +13,10 @@
			
 
				+  */
			
 
				+ 
			
 
				+ #ifdef __GNUC__
			
 
				+-#ifdef __clang__
			
 
				+-#define PRINTF(i, j)	__attribute__((format (printf, i, j)))
			
 
				+-#else
			
 
				++#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
			
 
				+ #define PRINTF(i, j)	__attribute__((format (gnu_printf, i, j)))
			
 
				++#else
			
 
				++#define PRINTF(i, j)	__attribute__((format (printf, i, j)))
			
 
				+ #endif
			
 
				+ #define NORETURN	__attribute__((noreturn))
			
 
				+ #else
			
 
				+--- a/scripts/dtc/version_gen.h
			
 
				++++ b/scripts/dtc/version_gen.h
			
 
				+@@ -1 +1 @@
			
 
				+-#define DTC_VERSION "DTC 1.6.0-g183df9e9"
			
 
				++#define DTC_VERSION "DTC 1.6.1-g0a3a9d34"
			
 
				+--- a/scripts/dtc/yamltree.c
			
 
				++++ b/scripts/dtc/yamltree.c
			
 
				+@@ -29,11 +29,12 @@ char *yaml_error_name[] = {
			
 
				+ 		    (emitter)->problem, __func__, __LINE__);		\
			
 
				+ })
			
 
				+ 
			
 
				+-static void yaml_propval_int(yaml_emitter_t *emitter, struct marker *markers, char *data, unsigned int len, int width)
			
 
				++static void yaml_propval_int(yaml_emitter_t *emitter, struct marker *markers,
			
 
				++	char *data, unsigned int seq_offset, unsigned int len, int width)
			
 
				+ {
			
 
				+ 	yaml_event_t event;
			
 
				+ 	void *tag;
			
 
				+-	unsigned int off, start_offset = markers->offset;
			
 
				++	unsigned int off;
			
 
				+ 
			
 
				+ 	switch(width) {
			
 
				+ 		case 1: tag = "!u8"; break;
			
 
				+@@ -66,7 +67,7 @@ static void yaml_propval_int(yaml_emitte
			
 
				+ 			m = markers;
			
 
				+ 			is_phandle = false;
			
 
				+ 			for_each_marker_of_type(m, REF_PHANDLE) {
			
 
				+-				if (m->offset == (start_offset + off)) {
			
 
				++				if (m->offset == (seq_offset + off)) {
			
 
				+ 					is_phandle = true;
			
 
				+ 					break;
			
 
				+ 				}
			
 
				+@@ -114,6 +115,7 @@ static void yaml_propval(yaml_emitter_t
			
 
				+ 	yaml_event_t event;
			
 
				+ 	unsigned int len = prop->val.len;
			
 
				+ 	struct marker *m = prop->val.markers;
			
 
				++	struct marker *markers = prop->val.markers;
			
 
				+ 
			
 
				+ 	/* Emit the property name */
			
 
				+ 	yaml_scalar_event_initialize(&event, NULL,
			
 
				+@@ -151,19 +153,19 @@ static void yaml_propval(yaml_emitter_t
			
 
				+ 
			
 
				+ 		switch(m->type) {
			
 
				+ 		case TYPE_UINT16:
			
 
				+-			yaml_propval_int(emitter, m, data, chunk_len, 2);
			
 
				++			yaml_propval_int(emitter, markers, data, m->offset, chunk_len, 2);
			
 
				+ 			break;
			
 
				+ 		case TYPE_UINT32:
			
 
				+-			yaml_propval_int(emitter, m, data, chunk_len, 4);
			
 
				++			yaml_propval_int(emitter, markers, data, m->offset, chunk_len, 4);
			
 
				+ 			break;
			
 
				+ 		case TYPE_UINT64:
			
 
				+-			yaml_propval_int(emitter, m, data, chunk_len, 8);
			
 
				++			yaml_propval_int(emitter, markers, data, m->offset, chunk_len, 8);
			
 
				+ 			break;
			
 
				+ 		case TYPE_STRING:
			
 
				+ 			yaml_propval_string(emitter, data, chunk_len);
			
 
				+ 			break;
			
 
				+ 		default:
			
 
				+-			yaml_propval_int(emitter, m, data, chunk_len, 1);
			
 
				++			yaml_propval_int(emitter, markers, data, m->offset, chunk_len, 1);
			
 
				+ 			break;
			
 
				+ 		}
			
 
				+ 	}
			
--- a/target/linux/generic/backport-6.1/300-v5.18-pinctrl-qcom-Return--EINVAL-for-setting-affinity-if-no-IRQ-parent.patch
+++ b/target/linux/generic/backport-6.1/300-v5.18-pinctrl-qcom-Return--EINVAL-for-setting-affinity-if-no-IRQ-parent.patch
@@ -0,0 +1,48 @@
 
				+From: Manivannan Sadhasivam <[email protected]>
			
 
				+To: [email protected]
			
 
				+Cc: [email protected], [email protected],
			
 
				+        [email protected], [email protected],
			
 
				+        [email protected],
			
 
				+        Manivannan Sadhasivam <[email protected]>
			
 
				+Subject: [PATCH] pinctrl: qcom: Return -EINVAL for setting affinity if no IRQ
			
 
				+ parent
			
 
				+Date: Thu, 13 Jan 2022 21:56:17 +0530
			
 
				+Message-Id: <[email protected]>
			
 
				+
			
 
				+The MSM GPIO IRQ controller relies on the parent IRQ controller to set the
			
 
				+CPU affinity for the IRQ. And this is only valid if there is any wakeup
			
 
				+parent available and defined in DT.
			
 
				+
			
 
				+For the case of no parent IRQ controller defined in DT,
			
 
				+msm_gpio_irq_set_affinity() and msm_gpio_irq_set_vcpu_affinity() should
			
 
				+return -EINVAL instead of 0 as the affinity can't be set.
			
 
				+
			
 
				+Otherwise, below warning will be printed by genirq:
			
 
				+
			
 
				+genirq: irq_chip msmgpio did not update eff. affinity mask of irq 70
			
 
				+
			
 
				+Signed-off-by: Manivannan Sadhasivam <[email protected]>
			
 
				+---
			
 
				+ drivers/pinctrl/qcom/pinctrl-msm.c | 4 ++--
			
 
				+ 1 file changed, 2 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+--- a/drivers/pinctrl/qcom/pinctrl-msm.c
			
 
				++++ b/drivers/pinctrl/qcom/pinctrl-msm.c
			
 
				+@@ -1157,7 +1157,7 @@ static int msm_gpio_irq_set_affinity(str
			
 
				+ 	if (d->parent_data && test_bit(d->hwirq, pctrl->skip_wake_irqs))
			
 
				+ 		return irq_chip_set_affinity_parent(d, dest, force);
			
 
				+ 
			
 
				+-	return 0;
			
 
				++	return -EINVAL;
			
 
				+ }
			
 
				+ 
			
 
				+ static int msm_gpio_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
			
 
				+@@ -1168,7 +1168,7 @@ static int msm_gpio_irq_set_vcpu_affinit
			
 
				+ 	if (d->parent_data && test_bit(d->hwirq, pctrl->skip_wake_irqs))
			
 
				+ 		return irq_chip_set_vcpu_affinity_parent(d, vcpu_info);
			
 
				+ 
			
 
				+-	return 0;
			
 
				++	return -EINVAL;
			
 
				+ }
			
 
				+ 
			
 
				+ static void msm_gpio_irq_handler(struct irq_desc *desc)
			
--- a/target/linux/generic/backport-6.1/301-v5.16-soc-qcom-smem-Support-reserved-memory-description.patch
+++ b/target/linux/generic/backport-6.1/301-v5.16-soc-qcom-smem-Support-reserved-memory-description.patch
@@ -0,0 +1,166 @@
 
				+From b5af64fceb04dc298c5e69c517b4d83893ff060b Mon Sep 17 00:00:00 2001
			
 
				+From: Bjorn Andersson <[email protected]>
			
 
				+Date: Thu, 30 Sep 2021 11:21:10 -0700
			
 
				+Subject: [PATCH 1/1] soc: qcom: smem: Support reserved-memory description
			
 
				+
			
 
				+Practically all modern Qualcomm platforms has a single reserved-memory
			
 
				+region for SMEM. So rather than having to describe SMEM in the form of a
			
 
				+node with a reference to a reserved-memory node, allow the SMEM device
			
 
				+to be instantiated directly from the reserved-memory node.
			
 
				+
			
 
				+The current means of falling back to dereferencing the "memory-region"
			
 
				+is kept as a fallback, if it's determined that the SMEM node is a
			
 
				+reserved-memory node.
			
 
				+
			
 
				+The "qcom,smem" compatible is added to the reserved_mem_matches list, to
			
 
				+allow the reserved-memory device to be probed.
			
 
				+
			
 
				+In order to retain the readability of the code, the resolution of
			
 
				+resources is split from the actual ioremapping.
			
 
				+
			
 
				+Signed-off-by: Bjorn Andersson <[email protected]>
			
 
				+Acked-by: Rob Herring <[email protected]>
			
 
				+Reviewed-by: Vladimir Zapolskiy <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+---
			
 
				+ drivers/of/platform.c   |  1 +
			
 
				+ drivers/soc/qcom/smem.c | 57 ++++++++++++++++++++++++++++-------------
			
 
				+ 2 files changed, 40 insertions(+), 18 deletions(-)
			
 
				+
			
 
				+--- a/drivers/of/platform.c
			
 
				++++ b/drivers/of/platform.c
			
 
				+@@ -509,6 +509,7 @@ EXPORT_SYMBOL_GPL(of_platform_default_po
			
 
				+ static const struct of_device_id reserved_mem_matches[] = {
			
 
				+ 	{ .compatible = "qcom,rmtfs-mem" },
			
 
				+ 	{ .compatible = "qcom,cmd-db" },
			
 
				++	{ .compatible = "qcom,smem" },
			
 
				+ 	{ .compatible = "ramoops" },
			
 
				+ 	{ .compatible = "nvmem-rmem" },
			
 
				+ 	{}
			
 
				+--- a/drivers/soc/qcom/smem.c
			
 
				++++ b/drivers/soc/qcom/smem.c
			
 
				+@@ -9,6 +9,7 @@
			
 
				+ #include <linux/module.h>
			
 
				+ #include <linux/of.h>
			
 
				+ #include <linux/of_address.h>
			
 
				++#include <linux/of_reserved_mem.h>
			
 
				+ #include <linux/platform_device.h>
			
 
				+ #include <linux/sizes.h>
			
 
				+ #include <linux/slab.h>
			
 
				+@@ -240,7 +241,7 @@ static const u8 SMEM_INFO_MAGIC[] = { 0x
			
 
				+  * @size:	size of the memory region
			
 
				+  */
			
 
				+ struct smem_region {
			
 
				+-	u32 aux_base;
			
 
				++	phys_addr_t aux_base;
			
 
				+ 	void __iomem *virt_base;
			
 
				+ 	size_t size;
			
 
				+ };
			
 
				+@@ -499,7 +500,7 @@ static void *qcom_smem_get_global(struct
			
 
				+ 	for (i = 0; i < smem->num_regions; i++) {
			
 
				+ 		region = &smem->regions[i];
			
 
				+ 
			
 
				+-		if (region->aux_base == aux_base || !aux_base) {
			
 
				++		if ((u32)region->aux_base == aux_base || !aux_base) {
			
 
				+ 			if (size != NULL)
			
 
				+ 				*size = le32_to_cpu(entry->size);
			
 
				+ 			return region->virt_base + le32_to_cpu(entry->offset);
			
 
				+@@ -664,7 +665,7 @@ phys_addr_t qcom_smem_virt_to_phys(void
			
 
				+ 		if (p < region->virt_base + region->size) {
			
 
				+ 			u64 offset = p - region->virt_base;
			
 
				+ 
			
 
				+-			return (phys_addr_t)region->aux_base + offset;
			
 
				++			return region->aux_base + offset;
			
 
				+ 		}
			
 
				+ 	}
			
 
				+ 
			
 
				+@@ -863,12 +864,12 @@ qcom_smem_enumerate_partitions(struct qc
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+ 
			
 
				+-static int qcom_smem_map_memory(struct qcom_smem *smem, struct device *dev,
			
 
				+-				const char *name, int i)
			
 
				++static int qcom_smem_resolve_mem(struct qcom_smem *smem, const char *name,
			
 
				++				 struct smem_region *region)
			
 
				+ {
			
 
				++	struct device *dev = smem->dev;
			
 
				+ 	struct device_node *np;
			
 
				+ 	struct resource r;
			
 
				+-	resource_size_t size;
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+ 	np = of_parse_phandle(dev->of_node, name, 0);
			
 
				+@@ -881,13 +882,9 @@ static int qcom_smem_map_memory(struct q
			
 
				+ 	of_node_put(np);
			
 
				+ 	if (ret)
			
 
				+ 		return ret;
			
 
				+-	size = resource_size(&r);
			
 
				+ 
			
 
				+-	smem->regions[i].virt_base = devm_ioremap_wc(dev, r.start, size);
			
 
				+-	if (!smem->regions[i].virt_base)
			
 
				+-		return -ENOMEM;
			
 
				+-	smem->regions[i].aux_base = (u32)r.start;
			
 
				+-	smem->regions[i].size = size;
			
 
				++	region->aux_base = r.start;
			
 
				++	region->size = resource_size(&r);
			
 
				+ 
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+@@ -895,12 +892,14 @@ static int qcom_smem_map_memory(struct q
			
 
				+ static int qcom_smem_probe(struct platform_device *pdev)
			
 
				+ {
			
 
				+ 	struct smem_header *header;
			
 
				++	struct reserved_mem *rmem;
			
 
				+ 	struct qcom_smem *smem;
			
 
				+ 	size_t array_size;
			
 
				+ 	int num_regions;
			
 
				+ 	int hwlock_id;
			
 
				+ 	u32 version;
			
 
				+ 	int ret;
			
 
				++	int i;
			
 
				+ 
			
 
				+ 	num_regions = 1;
			
 
				+ 	if (of_find_property(pdev->dev.of_node, "qcom,rpm-msg-ram", NULL))
			
 
				+@@ -914,13 +913,35 @@ static int qcom_smem_probe(struct platfo
			
 
				+ 	smem->dev = &pdev->dev;
			
 
				+ 	smem->num_regions = num_regions;
			
 
				+ 
			
 
				+-	ret = qcom_smem_map_memory(smem, &pdev->dev, "memory-region", 0);
			
 
				+-	if (ret)
			
 
				+-		return ret;
			
 
				+-
			
 
				+-	if (num_regions > 1 && (ret = qcom_smem_map_memory(smem, &pdev->dev,
			
 
				+-					"qcom,rpm-msg-ram", 1)))
			
 
				+-		return ret;
			
 
				++	rmem = of_reserved_mem_lookup(pdev->dev.of_node);
			
 
				++	if (rmem) {
			
 
				++		smem->regions[0].aux_base = rmem->base;
			
 
				++		smem->regions[0].size = rmem->size;
			
 
				++	} else {
			
 
				++		/*
			
 
				++		 * Fall back to the memory-region reference, if we're not a
			
 
				++		 * reserved-memory node.
			
 
				++		 */
			
 
				++		ret = qcom_smem_resolve_mem(smem, "memory-region", &smem->regions[0]);
			
 
				++		if (ret)
			
 
				++			return ret;
			
 
				++	}
			
 
				++
			
 
				++	if (num_regions > 1) {
			
 
				++		ret = qcom_smem_resolve_mem(smem, "qcom,rpm-msg-ram", &smem->regions[1]);
			
 
				++		if (ret)
			
 
				++			return ret;
			
 
				++	}
			
 
				++
			
 
				++	for (i = 0; i < num_regions; i++) {
			
 
				++		smem->regions[i].virt_base = devm_ioremap_wc(&pdev->dev,
			
 
				++							     smem->regions[i].aux_base,
			
 
				++							     smem->regions[i].size);
			
 
				++		if (!smem->regions[i].virt_base) {
			
 
				++			dev_err(&pdev->dev, "failed to remap %pa\n", &smem->regions[i].aux_base);
			
 
				++			return -ENOMEM;
			
 
				++		}
			
 
				++	}
			
 
				+ 
			
 
				+ 	header = smem->regions[0].virt_base;
			
 
				+ 	if (le32_to_cpu(header->initialized) != 1 ||
			
--- a/target/linux/generic/backport-6.1/302-v5.16-watchdog-bcm63xx_wdt-fix-fallthrough-warning.patch
+++ b/target/linux/generic/backport-6.1/302-v5.16-watchdog-bcm63xx_wdt-fix-fallthrough-warning.patch
@@ -0,0 +1,33 @@
 
				+From ee1a0696934a8b77a6a2098f92832c46d34ec5da Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <[email protected]>
			
 
				+Date: Wed, 27 Oct 2021 14:31:35 +0200
			
 
				+Subject: [PATCH] watchdog: bcm63xx_wdt: fix fallthrough warning
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+This fixes:
			
 
				+drivers/watchdog/bcm63xx_wdt.c: In function 'bcm63xx_wdt_ioctl':
			
 
				+drivers/watchdog/bcm63xx_wdt.c:208:17: warning: this statement may fall through [-Wimplicit-fallthrough=]
			
 
				+
			
 
				+Signed-off-by: Rafał Miłecki <[email protected]>
			
 
				+Reviewed-by: Florian Fainelli <[email protected]>
			
 
				+Reviewed-by: Guenter Roeck <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Guenter Roeck <[email protected]>
			
 
				+Signed-off-by: Wim Van Sebroeck <[email protected]>
			
 
				+---
			
 
				+ drivers/watchdog/bcm63xx_wdt.c | 2 ++
			
 
				+ 1 file changed, 2 insertions(+)
			
 
				+
			
 
				+--- a/drivers/watchdog/bcm63xx_wdt.c
			
 
				++++ b/drivers/watchdog/bcm63xx_wdt.c
			
 
				+@@ -207,6 +207,8 @@ static long bcm63xx_wdt_ioctl(struct fil
			
 
				+ 
			
 
				+ 		bcm63xx_wdt_pet();
			
 
				+ 
			
 
				++		fallthrough;
			
 
				++
			
 
				+ 	case WDIOC_GETTIMEOUT:
			
 
				+ 		return put_user(wdt_time, p);
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/330-v5.16-01-MIPS-kernel-proc-add-CPU-option-reporting.patch
+++ b/target/linux/generic/backport-6.1/330-v5.16-01-MIPS-kernel-proc-add-CPU-option-reporting.patch
@@ -0,0 +1,162 @@
 
				+From 626bfa03729959ea9917181fb3d8ffaa1594d02a Mon Sep 17 00:00:00 2001
			
 
				+From: Hauke Mehrtens <[email protected]>
			
 
				+Date: Wed, 13 Oct 2021 22:40:18 -0700
			
 
				+Subject: [PATCH 1/1] MIPS: kernel: proc: add CPU option reporting
			
 
				+
			
 
				+Many MIPS CPUs have optional CPU features which are not activated for
			
 
				+all CPU cores. Print the CPU options, which are implemented in the core,
			
 
				+in /proc/cpuinfo. This makes it possible to see which features are
			
 
				+supported and which are not supported. This should cover all standard
			
 
				+MIPS extensions. Before, it only printed information about the main MIPS
			
 
				+ASEs.
			
 
				+
			
 
				+Signed-off-by: Hauke Mehrtens <[email protected]>
			
 
				+
			
 
				+Changes from original patch[0]:
			
 
				+- Remove cpu_has_6k_cache and cpu_has_8k_cache due to commit 6ce91ba8589a
			
 
				+  ("MIPS: Remove cpu_has_6k_cache and cpu_has_8k_cache in cpu_cache_init()")
			
 
				+- Add new options: mac2008_only, ftlbparex, gsexcex, mmid, mm_sysad,
			
 
				+  mm_full
			
 
				+- Use seq_puts instead of seq_printf as suggested by checkpatch
			
 
				+- Minor commit message reword
			
 
				+
			
 
				+[0]: https://lore.kernel.org/linux-mips/[email protected]/
			
 
				+
			
 
				+Signed-off-by: Ilya Lipnitskiy <[email protected]>
			
 
				+Acked-by: Hauke Mehrtens <[email protected]>
			
 
				+Signed-off-by: Thomas Bogendoerfer <[email protected]>
			
 
				+---
			
 
				+ arch/mips/kernel/proc.c | 122 ++++++++++++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 122 insertions(+)
			
 
				+
			
 
				+--- a/arch/mips/kernel/proc.c
			
 
				++++ b/arch/mips/kernel/proc.c
			
 
				+@@ -138,6 +138,128 @@ static int show_cpuinfo(struct seq_file
			
 
				+ 		seq_printf(m, "micromips kernel\t: %s\n",
			
 
				+ 		      (read_c0_config3() & MIPS_CONF3_ISA_OE) ?  "yes" : "no");
			
 
				+ 	}
			
 
				++
			
 
				++	seq_puts(m, "Options implemented\t:");
			
 
				++	if (cpu_has_tlb)
			
 
				++		seq_puts(m, " tlb");
			
 
				++	if (cpu_has_ftlb)
			
 
				++		seq_puts(m, " ftlb");
			
 
				++	if (cpu_has_tlbinv)
			
 
				++		seq_puts(m, " tlbinv");
			
 
				++	if (cpu_has_segments)
			
 
				++		seq_puts(m, " segments");
			
 
				++	if (cpu_has_rixiex)
			
 
				++		seq_puts(m, " rixiex");
			
 
				++	if (cpu_has_ldpte)
			
 
				++		seq_puts(m, " ldpte");
			
 
				++	if (cpu_has_maar)
			
 
				++		seq_puts(m, " maar");
			
 
				++	if (cpu_has_rw_llb)
			
 
				++		seq_puts(m, " rw_llb");
			
 
				++	if (cpu_has_4kex)
			
 
				++		seq_puts(m, " 4kex");
			
 
				++	if (cpu_has_3k_cache)
			
 
				++		seq_puts(m, " 3k_cache");
			
 
				++	if (cpu_has_4k_cache)
			
 
				++		seq_puts(m, " 4k_cache");
			
 
				++	if (cpu_has_tx39_cache)
			
 
				++		seq_puts(m, " tx39_cache");
			
 
				++	if (cpu_has_octeon_cache)
			
 
				++		seq_puts(m, " octeon_cache");
			
 
				++	if (cpu_has_fpu)
			
 
				++		seq_puts(m, " fpu");
			
 
				++	if (cpu_has_32fpr)
			
 
				++		seq_puts(m, " 32fpr");
			
 
				++	if (cpu_has_cache_cdex_p)
			
 
				++		seq_puts(m, " cache_cdex_p");
			
 
				++	if (cpu_has_cache_cdex_s)
			
 
				++		seq_puts(m, " cache_cdex_s");
			
 
				++	if (cpu_has_prefetch)
			
 
				++		seq_puts(m, " prefetch");
			
 
				++	if (cpu_has_mcheck)
			
 
				++		seq_puts(m, " mcheck");
			
 
				++	if (cpu_has_ejtag)
			
 
				++		seq_puts(m, " ejtag");
			
 
				++	if (cpu_has_llsc)
			
 
				++		seq_puts(m, " llsc");
			
 
				++	if (cpu_has_guestctl0ext)
			
 
				++		seq_puts(m, " guestctl0ext");
			
 
				++	if (cpu_has_guestctl1)
			
 
				++		seq_puts(m, " guestctl1");
			
 
				++	if (cpu_has_guestctl2)
			
 
				++		seq_puts(m, " guestctl2");
			
 
				++	if (cpu_has_guestid)
			
 
				++		seq_puts(m, " guestid");
			
 
				++	if (cpu_has_drg)
			
 
				++		seq_puts(m, " drg");
			
 
				++	if (cpu_has_rixi)
			
 
				++		seq_puts(m, " rixi");
			
 
				++	if (cpu_has_lpa)
			
 
				++		seq_puts(m, " lpa");
			
 
				++	if (cpu_has_mvh)
			
 
				++		seq_puts(m, " mvh");
			
 
				++	if (cpu_has_vtag_icache)
			
 
				++		seq_puts(m, " vtag_icache");
			
 
				++	if (cpu_has_dc_aliases)
			
 
				++		seq_puts(m, " dc_aliases");
			
 
				++	if (cpu_has_ic_fills_f_dc)
			
 
				++		seq_puts(m, " ic_fills_f_dc");
			
 
				++	if (cpu_has_pindexed_dcache)
			
 
				++		seq_puts(m, " pindexed_dcache");
			
 
				++	if (cpu_has_userlocal)
			
 
				++		seq_puts(m, " userlocal");
			
 
				++	if (cpu_has_nofpuex)
			
 
				++		seq_puts(m, " nofpuex");
			
 
				++	if (cpu_has_vint)
			
 
				++		seq_puts(m, " vint");
			
 
				++	if (cpu_has_veic)
			
 
				++		seq_puts(m, " veic");
			
 
				++	if (cpu_has_inclusive_pcaches)
			
 
				++		seq_puts(m, " inclusive_pcaches");
			
 
				++	if (cpu_has_perf_cntr_intr_bit)
			
 
				++		seq_puts(m, " perf_cntr_intr_bit");
			
 
				++	if (cpu_has_ufr)
			
 
				++		seq_puts(m, " ufr");
			
 
				++	if (cpu_has_fre)
			
 
				++		seq_puts(m, " fre");
			
 
				++	if (cpu_has_cdmm)
			
 
				++		seq_puts(m, " cdmm");
			
 
				++	if (cpu_has_small_pages)
			
 
				++		seq_puts(m, " small_pages");
			
 
				++	if (cpu_has_nan_legacy)
			
 
				++		seq_puts(m, " nan_legacy");
			
 
				++	if (cpu_has_nan_2008)
			
 
				++		seq_puts(m, " nan_2008");
			
 
				++	if (cpu_has_ebase_wg)
			
 
				++		seq_puts(m, " ebase_wg");
			
 
				++	if (cpu_has_badinstr)
			
 
				++		seq_puts(m, " badinstr");
			
 
				++	if (cpu_has_badinstrp)
			
 
				++		seq_puts(m, " badinstrp");
			
 
				++	if (cpu_has_contextconfig)
			
 
				++		seq_puts(m, " contextconfig");
			
 
				++	if (cpu_has_perf)
			
 
				++		seq_puts(m, " perf");
			
 
				++	if (cpu_has_mac2008_only)
			
 
				++		seq_puts(m, " mac2008_only");
			
 
				++	if (cpu_has_ftlbparex)
			
 
				++		seq_puts(m, " ftlbparex");
			
 
				++	if (cpu_has_gsexcex)
			
 
				++		seq_puts(m, " gsexcex");
			
 
				++	if (cpu_has_shared_ftlb_ram)
			
 
				++		seq_puts(m, " shared_ftlb_ram");
			
 
				++	if (cpu_has_shared_ftlb_entries)
			
 
				++		seq_puts(m, " shared_ftlb_entries");
			
 
				++	if (cpu_has_mipsmt_pertccounters)
			
 
				++		seq_puts(m, " mipsmt_pertccounters");
			
 
				++	if (cpu_has_mmid)
			
 
				++		seq_puts(m, " mmid");
			
 
				++	if (cpu_has_mm_sysad)
			
 
				++		seq_puts(m, " mm_sysad");
			
 
				++	if (cpu_has_mm_full)
			
 
				++		seq_puts(m, " mm_full");
			
 
				++	seq_puts(m, "\n");
			
 
				++
			
 
				+ 	seq_printf(m, "shadow register sets\t: %d\n",
			
 
				+ 		      cpu_data[n].srsets);
			
 
				+ 	seq_printf(m, "kscratch registers\t: %d\n",
			
--- a/target/linux/generic/backport-6.1/330-v5.16-02-MIPS-Fix-using-smp_processor_id-in-preemptible-in-sh.patch
+++ b/target/linux/generic/backport-6.1/330-v5.16-02-MIPS-Fix-using-smp_processor_id-in-preemptible-in-sh.patch
@@ -0,0 +1,62 @@
 
				+From 1cab5bd69eb1f995ced2d7576cb15f8a8941fd85 Mon Sep 17 00:00:00 2001
			
 
				+From: Tiezhu Yang <[email protected]>
			
 
				+Date: Thu, 25 Nov 2021 19:39:32 +0800
			
 
				+Subject: [PATCH 1/1] MIPS: Fix using smp_processor_id() in preemptible in
			
 
				+ show_cpuinfo()
			
 
				+
			
 
				+There exists the following issue under DEBUG_PREEMPT:
			
 
				+
			
 
				+ BUG: using smp_processor_id() in preemptible [00000000] code: systemd/1
			
 
				+ caller is show_cpuinfo+0x460/0xea0
			
 
				+ ...
			
 
				+ Call Trace:
			
 
				+ [<ffffffff8020f0dc>] show_stack+0x94/0x128
			
 
				+ [<ffffffff80e6cab4>] dump_stack_lvl+0x94/0xd8
			
 
				+ [<ffffffff80e74c5c>] check_preemption_disabled+0x104/0x110
			
 
				+ [<ffffffff802209c8>] show_cpuinfo+0x460/0xea0
			
 
				+ [<ffffffff80539d54>] seq_read_iter+0xfc/0x4f8
			
 
				+ [<ffffffff804fcc10>] new_sync_read+0x110/0x1b8
			
 
				+ [<ffffffff804ff57c>] vfs_read+0x1b4/0x1d0
			
 
				+ [<ffffffff804ffb18>] ksys_read+0xd0/0x110
			
 
				+ [<ffffffff8021c090>] syscall_common+0x34/0x58
			
 
				+
			
 
				+We can see the following call trace:
			
 
				+ show_cpuinfo()
			
 
				+   cpu_has_fpu
			
 
				+     current_cpu_data
			
 
				+       smp_processor_id()
			
 
				+
			
 
				+ $ addr2line -f -e vmlinux 0xffffffff802209c8
			
 
				+ show_cpuinfo
			
 
				+ arch/mips/kernel/proc.c:188
			
 
				+
			
 
				+ $ head -188 arch/mips/kernel/proc.c | tail -1
			
 
				+	 if (cpu_has_fpu)
			
 
				+
			
 
				+ arch/mips/include/asm/cpu-features.h
			
 
				+ #  define cpu_has_fpu		(current_cpu_data.options & MIPS_CPU_FPU)
			
 
				+
			
 
				+ arch/mips/include/asm/cpu-info.h
			
 
				+ #define current_cpu_data cpu_data[smp_processor_id()]
			
 
				+
			
 
				+Based on the above analysis, fix the issue by using raw_cpu_has_fpu
			
 
				+which calls raw_smp_processor_id() in show_cpuinfo().
			
 
				+
			
 
				+Fixes: 626bfa037299 ("MIPS: kernel: proc: add CPU option reporting")
			
 
				+Signed-off-by: Tiezhu Yang <[email protected]>
			
 
				+Signed-off-by: Thomas Bogendoerfer <[email protected]>
			
 
				+---
			
 
				+ arch/mips/kernel/proc.c | 2 +-
			
 
				+ 1 file changed, 1 insertion(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/arch/mips/kernel/proc.c
			
 
				++++ b/arch/mips/kernel/proc.c
			
 
				+@@ -166,7 +166,7 @@ static int show_cpuinfo(struct seq_file
			
 
				+ 		seq_puts(m, " tx39_cache");
			
 
				+ 	if (cpu_has_octeon_cache)
			
 
				+ 		seq_puts(m, " octeon_cache");
			
 
				+-	if (cpu_has_fpu)
			
 
				++	if (raw_cpu_has_fpu)
			
 
				+ 		seq_puts(m, " fpu");
			
 
				+ 	if (cpu_has_32fpr)
			
 
				+ 		seq_puts(m, " 32fpr");
			
--- a/target/linux/generic/backport-6.1/331-v5.19-mtd-spinand-Add-support-for-XTX-XT26G0xA.patch
+++ b/target/linux/generic/backport-6.1/331-v5.19-mtd-spinand-Add-support-for-XTX-XT26G0xA.patch
@@ -0,0 +1,186 @@
 
				+From f4c5c7f9d2e5ab005d57826b740b694b042a737c Mon Sep 17 00:00:00 2001
			
 
				+From: Felix Matouschek <[email protected]>
			
 
				+Date: Mon, 18 Apr 2022 15:28:03 +0200
			
 
				+Subject: [PATCH 1/1] mtd: spinand: Add support for XTX XT26G0xA
			
 
				+
			
 
				+Add support for XTX Technology XT26G01AXXXXX, XTX26G02AXXXXX and
			
 
				+XTX26G04AXXXXX SPI NAND.
			
 
				+
			
 
				+These are 3V, 1G/2G/4Gbit serial SLC NAND flash devices with on-die ECC
			
 
				+(8bit strength per 512bytes).
			
 
				+
			
 
				+Tested on Teltonika RUTX10 flashed with OpenWrt.
			
 
				+
			
 
				+Links:
			
 
				+  - http://www.xtxtech.com/download/?AId=225
			
 
				+  - https://datasheet.lcsc.com/szlcsc/2005251034_XTX-XT26G01AWSEGA_C558841.pdf
			
 
				+Signed-off-by: Felix Matouschek <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/nand/spi/Makefile |   2 +-
			
 
				+ drivers/mtd/nand/spi/core.c   |   1 +
			
 
				+ drivers/mtd/nand/spi/xtx.c    | 129 ++++++++++++++++++++++++++++++++++
			
 
				+ include/linux/mtd/spinand.h   |   1 +
			
 
				+ 4 files changed, 132 insertions(+), 1 deletion(-)
			
 
				+ create mode 100644 drivers/mtd/nand/spi/xtx.c
			
 
				+
			
 
				+--- a/drivers/mtd/nand/spi/Makefile
			
 
				++++ b/drivers/mtd/nand/spi/Makefile
			
 
				+@@ -1,3 +1,3 @@
			
 
				+ # SPDX-License-Identifier: GPL-2.0
			
 
				+-spinand-objs := core.o gigadevice.o macronix.o micron.o paragon.o toshiba.o winbond.o
			
 
				++spinand-objs := core.o gigadevice.o macronix.o micron.o paragon.o toshiba.o winbond.o xtx.o
			
 
				+ obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
			
 
				+--- a/drivers/mtd/nand/spi/core.c
			
 
				++++ b/drivers/mtd/nand/spi/core.c
			
 
				+@@ -902,6 +902,7 @@ static const struct spinand_manufacturer
			
 
				+ 	&paragon_spinand_manufacturer,
			
 
				+ 	&toshiba_spinand_manufacturer,
			
 
				+ 	&winbond_spinand_manufacturer,
			
 
				++	&xtx_spinand_manufacturer,
			
 
				+ };
			
 
				+ 
			
 
				+ static int spinand_manufacturer_match(struct spinand_device *spinand,
			
 
				+--- /dev/null
			
 
				++++ b/drivers/mtd/nand/spi/xtx.c
			
 
				+@@ -0,0 +1,129 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0
			
 
				++/*
			
 
				++ * Author:
			
 
				++ * Felix Matouschek <[email protected]>
			
 
				++ */
			
 
				++
			
 
				++#include <linux/device.h>
			
 
				++#include <linux/kernel.h>
			
 
				++#include <linux/mtd/spinand.h>
			
 
				++
			
 
				++#define SPINAND_MFR_XTX	0x0B
			
 
				++
			
 
				++#define XT26G0XA_STATUS_ECC_MASK	GENMASK(5, 2)
			
 
				++#define XT26G0XA_STATUS_ECC_NO_DETECTED	(0 << 2)
			
 
				++#define XT26G0XA_STATUS_ECC_8_CORRECTED	(3 << 4)
			
 
				++#define XT26G0XA_STATUS_ECC_UNCOR_ERROR	(2 << 4)
			
 
				++
			
 
				++static SPINAND_OP_VARIANTS(read_cache_variants,
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 1, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
			
 
				++
			
 
				++static SPINAND_OP_VARIANTS(write_cache_variants,
			
 
				++		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
			
 
				++		SPINAND_PROG_LOAD(true, 0, NULL, 0));
			
 
				++
			
 
				++static SPINAND_OP_VARIANTS(update_cache_variants,
			
 
				++		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
			
 
				++		SPINAND_PROG_LOAD(false, 0, NULL, 0));
			
 
				++
			
 
				++static int xt26g0xa_ooblayout_ecc(struct mtd_info *mtd, int section,
			
 
				++				   struct mtd_oob_region *region)
			
 
				++{
			
 
				++	if (section)
			
 
				++		return -ERANGE;
			
 
				++
			
 
				++	region->offset = 48;
			
 
				++	region->length = 16;
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static int xt26g0xa_ooblayout_free(struct mtd_info *mtd, int section,
			
 
				++				   struct mtd_oob_region *region)
			
 
				++{
			
 
				++	if (section)
			
 
				++		return -ERANGE;
			
 
				++
			
 
				++	region->offset = 1;
			
 
				++	region->length = 47;
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static const struct mtd_ooblayout_ops xt26g0xa_ooblayout = {
			
 
				++	.ecc = xt26g0xa_ooblayout_ecc,
			
 
				++	.free = xt26g0xa_ooblayout_free,
			
 
				++};
			
 
				++
			
 
				++static int xt26g0xa_ecc_get_status(struct spinand_device *spinand,
			
 
				++					 u8 status)
			
 
				++{
			
 
				++	status = status & XT26G0XA_STATUS_ECC_MASK;
			
 
				++
			
 
				++	switch (status) {
			
 
				++	case XT26G0XA_STATUS_ECC_NO_DETECTED:
			
 
				++		return 0;
			
 
				++	case XT26G0XA_STATUS_ECC_8_CORRECTED:
			
 
				++		return 8;
			
 
				++	case XT26G0XA_STATUS_ECC_UNCOR_ERROR:
			
 
				++		return -EBADMSG;
			
 
				++	default:
			
 
				++		break;
			
 
				++	}
			
 
				++
			
 
				++	/* At this point values greater than (2 << 4) are invalid  */
			
 
				++	if (status > XT26G0XA_STATUS_ECC_UNCOR_ERROR)
			
 
				++		return -EINVAL;
			
 
				++
			
 
				++	/* (1 << 2) through (7 << 2) are 1-7 corrected errors */
			
 
				++	return status >> 2;
			
 
				++}
			
 
				++
			
 
				++static const struct spinand_info xtx_spinand_table[] = {
			
 
				++	SPINAND_INFO("XT26G01A",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xE1),
			
 
				++		     NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&xt26g0xa_ooblayout,
			
 
				++				     xt26g0xa_ecc_get_status)),
			
 
				++	SPINAND_INFO("XT26G02A",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xE2),
			
 
				++		     NAND_MEMORG(1, 2048, 64, 64, 2048, 40, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&xt26g0xa_ooblayout,
			
 
				++				     xt26g0xa_ecc_get_status)),
			
 
				++	SPINAND_INFO("XT26G04A",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xE3),
			
 
				++		     NAND_MEMORG(1, 2048, 64, 128, 2048, 40, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&xt26g0xa_ooblayout,
			
 
				++				     xt26g0xa_ecc_get_status)),
			
 
				++};
			
 
				++
			
 
				++static const struct spinand_manufacturer_ops xtx_spinand_manuf_ops = {
			
 
				++};
			
 
				++
			
 
				++const struct spinand_manufacturer xtx_spinand_manufacturer = {
			
 
				++	.id = SPINAND_MFR_XTX,
			
 
				++	.name = "XTX",
			
 
				++	.chips = xtx_spinand_table,
			
 
				++	.nchips = ARRAY_SIZE(xtx_spinand_table),
			
 
				++	.ops = &xtx_spinand_manuf_ops,
			
 
				++};
			
 
				+--- a/include/linux/mtd/spinand.h
			
 
				++++ b/include/linux/mtd/spinand.h
			
 
				+@@ -266,6 +266,7 @@ extern const struct spinand_manufacturer
			
 
				+ extern const struct spinand_manufacturer paragon_spinand_manufacturer;
			
 
				+ extern const struct spinand_manufacturer toshiba_spinand_manufacturer;
			
 
				+ extern const struct spinand_manufacturer winbond_spinand_manufacturer;
			
 
				++extern const struct spinand_manufacturer xtx_spinand_manufacturer;
			
 
				+ 
			
 
				+ /**
			
 
				+  * struct spinand_op_variants - SPI NAND operation variants
			
--- a/target/linux/generic/backport-6.1/344-v5.18-01-phy-marvell-phy-mvebu-a3700-comphy-Remove-port-from-.patch
+++ b/target/linux/generic/backport-6.1/344-v5.18-01-phy-marvell-phy-mvebu-a3700-comphy-Remove-port-from-.patch
@@ -0,0 +1,219 @@
 
				+From 4bf18d5a2dd02db8c5b16a2cfae513510506df5b Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <[email protected]>
			
 
				+Date: Thu, 3 Feb 2022 22:44:40 +0100
			
 
				+Subject: [PATCH 1/2] phy: marvell: phy-mvebu-a3700-comphy: Remove port from
			
 
				+ driver configuration
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Port number is encoded into argument for SMC call. It is zero for SATA,
			
 
				+PCIe and also both USB 3.0 PHYs. It is non-zero only for Ethernet PHY
			
 
				+(incorrectly called SGMII) on lane 0. Ethernet PHY on lane 1 also uses zero
			
 
				+port number.
			
 
				+
			
 
				+So construct "port" bits for SMC call argument can be constructed directly
			
 
				+from PHY type and lane number.
			
 
				+
			
 
				+Change driver code to always pass zero port number for non-ethernet PHYs
			
 
				+and for ethernet PHYs determinate port number from lane number. This
			
 
				+simplifies the driver.
			
 
				+
			
 
				+As port number from DT PHY configuration is not used anymore, remove whole
			
 
				+driver code which parses it. This also simplifies the driver.
			
 
				+
			
 
				+Signed-off-by: Pali Rohár <[email protected]>
			
 
				+Signed-off-by: Marek Behún <[email protected]>
			
 
				+Reviewed-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Vinod Koul <[email protected]>
			
 
				+---
			
 
				+ drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 62 +++++++++-----------
			
 
				+ 1 file changed, 29 insertions(+), 33 deletions(-)
			
 
				+
			
 
				+--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
			
 
				++++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
			
 
				+@@ -20,7 +20,6 @@
			
 
				+ #include <linux/platform_device.h>
			
 
				+ 
			
 
				+ #define MVEBU_A3700_COMPHY_LANES		3
			
 
				+-#define MVEBU_A3700_COMPHY_PORTS		2
			
 
				+ 
			
 
				+ /* COMPHY Fast SMC function identifiers */
			
 
				+ #define COMPHY_SIP_POWER_ON			0x82000001
			
 
				+@@ -45,51 +44,47 @@
			
 
				+ #define COMPHY_FW_NET(mode, idx, speed)		(COMPHY_FW_MODE(mode) | \
			
 
				+ 						 ((idx) << 8) |	\
			
 
				+ 						 ((speed) << 2))
			
 
				+-#define COMPHY_FW_PCIE(mode, idx, speed, width)	(COMPHY_FW_NET(mode, idx, speed) | \
			
 
				++#define COMPHY_FW_PCIE(mode, speed, width)	(COMPHY_FW_NET(mode, 0, speed) | \
			
 
				+ 						 ((width) << 18))
			
 
				+ 
			
 
				+ struct mvebu_a3700_comphy_conf {
			
 
				+ 	unsigned int lane;
			
 
				+ 	enum phy_mode mode;
			
 
				+ 	int submode;
			
 
				+-	unsigned int port;
			
 
				+ 	u32 fw_mode;
			
 
				+ };
			
 
				+ 
			
 
				+-#define MVEBU_A3700_COMPHY_CONF(_lane, _mode, _smode, _port, _fw)	\
			
 
				++#define MVEBU_A3700_COMPHY_CONF(_lane, _mode, _smode, _fw)		\
			
 
				+ 	{								\
			
 
				+ 		.lane = _lane,						\
			
 
				+ 		.mode = _mode,						\
			
 
				+ 		.submode = _smode,					\
			
 
				+-		.port = _port,						\
			
 
				+ 		.fw_mode = _fw,						\
			
 
				+ 	}
			
 
				+ 
			
 
				+-#define MVEBU_A3700_COMPHY_CONF_GEN(_lane, _mode, _port, _fw) \
			
 
				+-	MVEBU_A3700_COMPHY_CONF(_lane, _mode, PHY_INTERFACE_MODE_NA, _port, _fw)
			
 
				++#define MVEBU_A3700_COMPHY_CONF_GEN(_lane, _mode, _fw) \
			
 
				++	MVEBU_A3700_COMPHY_CONF(_lane, _mode, PHY_INTERFACE_MODE_NA, _fw)
			
 
				+ 
			
 
				+-#define MVEBU_A3700_COMPHY_CONF_ETH(_lane, _smode, _port, _fw) \
			
 
				+-	MVEBU_A3700_COMPHY_CONF(_lane, PHY_MODE_ETHERNET, _smode, _port, _fw)
			
 
				++#define MVEBU_A3700_COMPHY_CONF_ETH(_lane, _smode, _fw) \
			
 
				++	MVEBU_A3700_COMPHY_CONF(_lane, PHY_MODE_ETHERNET, _smode, _fw)
			
 
				+ 
			
 
				+ static const struct mvebu_a3700_comphy_conf mvebu_a3700_comphy_modes[] = {
			
 
				+ 	/* lane 0 */
			
 
				+-	MVEBU_A3700_COMPHY_CONF_GEN(0, PHY_MODE_USB_HOST_SS, 0,
			
 
				++	MVEBU_A3700_COMPHY_CONF_GEN(0, PHY_MODE_USB_HOST_SS,
			
 
				+ 				    COMPHY_FW_MODE_USB3H),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_SGMII, 1,
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_SGMII,
			
 
				+ 				    COMPHY_FW_MODE_SGMII),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_2500BASEX, 1,
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_2500BASEX,
			
 
				+ 				    COMPHY_FW_MODE_2500BASEX),
			
 
				+ 	/* lane 1 */
			
 
				+-	MVEBU_A3700_COMPHY_CONF_GEN(1, PHY_MODE_PCIE, 0,
			
 
				+-				    COMPHY_FW_MODE_PCIE),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_SGMII, 0,
			
 
				++	MVEBU_A3700_COMPHY_CONF_GEN(1, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_SGMII,
			
 
				+ 				    COMPHY_FW_MODE_SGMII),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_2500BASEX, 0,
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_2500BASEX,
			
 
				+ 				    COMPHY_FW_MODE_2500BASEX),
			
 
				+ 	/* lane 2 */
			
 
				+-	MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_SATA, 0,
			
 
				+-				    COMPHY_FW_MODE_SATA),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_USB_HOST_SS, 0,
			
 
				++	MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
			
 
				++	MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_USB_HOST_SS,
			
 
				+ 				    COMPHY_FW_MODE_USB3H),
			
 
				+ };
			
 
				+ 
			
 
				+@@ -98,7 +93,6 @@ struct mvebu_a3700_comphy_lane {
			
 
				+ 	unsigned int id;
			
 
				+ 	enum phy_mode mode;
			
 
				+ 	int submode;
			
 
				+-	int port;
			
 
				+ };
			
 
				+ 
			
 
				+ static int mvebu_a3700_comphy_smc(unsigned long function, unsigned long lane,
			
 
				+@@ -120,7 +114,7 @@ static int mvebu_a3700_comphy_smc(unsign
			
 
				+ 	}
			
 
				+ }
			
 
				+ 
			
 
				+-static int mvebu_a3700_comphy_get_fw_mode(int lane, int port,
			
 
				++static int mvebu_a3700_comphy_get_fw_mode(int lane,
			
 
				+ 					  enum phy_mode mode,
			
 
				+ 					  int submode)
			
 
				+ {
			
 
				+@@ -132,7 +126,6 @@ static int mvebu_a3700_comphy_get_fw_mod
			
 
				+ 
			
 
				+ 	for (i = 0; i < n; i++) {
			
 
				+ 		if (mvebu_a3700_comphy_modes[i].lane == lane &&
			
 
				+-		    mvebu_a3700_comphy_modes[i].port == port &&
			
 
				+ 		    mvebu_a3700_comphy_modes[i].mode == mode &&
			
 
				+ 		    mvebu_a3700_comphy_modes[i].submode == submode)
			
 
				+ 			break;
			
 
				+@@ -153,7 +146,7 @@ static int mvebu_a3700_comphy_set_mode(s
			
 
				+ 	if (submode == PHY_INTERFACE_MODE_1000BASEX)
			
 
				+ 		submode = PHY_INTERFACE_MODE_SGMII;
			
 
				+ 
			
 
				+-	fw_mode = mvebu_a3700_comphy_get_fw_mode(lane->id, lane->port, mode,
			
 
				++	fw_mode = mvebu_a3700_comphy_get_fw_mode(lane->id, mode,
			
 
				+ 						 submode);
			
 
				+ 	if (fw_mode < 0) {
			
 
				+ 		dev_err(lane->dev, "invalid COMPHY mode\n");
			
 
				+@@ -172,9 +165,10 @@ static int mvebu_a3700_comphy_power_on(s
			
 
				+ 	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
			
 
				+ 	u32 fw_param;
			
 
				+ 	int fw_mode;
			
 
				++	int fw_port;
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+-	fw_mode = mvebu_a3700_comphy_get_fw_mode(lane->id, lane->port,
			
 
				++	fw_mode = mvebu_a3700_comphy_get_fw_mode(lane->id,
			
 
				+ 						 lane->mode, lane->submode);
			
 
				+ 	if (fw_mode < 0) {
			
 
				+ 		dev_err(lane->dev, "invalid COMPHY mode\n");
			
 
				+@@ -191,17 +185,18 @@ static int mvebu_a3700_comphy_power_on(s
			
 
				+ 		fw_param = COMPHY_FW_MODE(fw_mode);
			
 
				+ 		break;
			
 
				+ 	case PHY_MODE_ETHERNET:
			
 
				++		fw_port = (lane->id == 0) ? 1 : 0;
			
 
				+ 		switch (lane->submode) {
			
 
				+ 		case PHY_INTERFACE_MODE_SGMII:
			
 
				+ 			dev_dbg(lane->dev, "set lane %d to SGMII mode\n",
			
 
				+ 				lane->id);
			
 
				+-			fw_param = COMPHY_FW_NET(fw_mode, lane->port,
			
 
				++			fw_param = COMPHY_FW_NET(fw_mode, fw_port,
			
 
				+ 						 COMPHY_FW_SPEED_1_25G);
			
 
				+ 			break;
			
 
				+ 		case PHY_INTERFACE_MODE_2500BASEX:
			
 
				+ 			dev_dbg(lane->dev, "set lane %d to 2500BASEX mode\n",
			
 
				+ 				lane->id);
			
 
				+-			fw_param = COMPHY_FW_NET(fw_mode, lane->port,
			
 
				++			fw_param = COMPHY_FW_NET(fw_mode, fw_port,
			
 
				+ 						 COMPHY_FW_SPEED_3_125G);
			
 
				+ 			break;
			
 
				+ 		default:
			
 
				+@@ -212,8 +207,7 @@ static int mvebu_a3700_comphy_power_on(s
			
 
				+ 		break;
			
 
				+ 	case PHY_MODE_PCIE:
			
 
				+ 		dev_dbg(lane->dev, "set lane %d to PCIe mode\n", lane->id);
			
 
				+-		fw_param = COMPHY_FW_PCIE(fw_mode, lane->port,
			
 
				+-					  COMPHY_FW_SPEED_5G,
			
 
				++		fw_param = COMPHY_FW_PCIE(fw_mode, COMPHY_FW_SPEED_5G,
			
 
				+ 					  phy->attrs.bus_width);
			
 
				+ 		break;
			
 
				+ 	default:
			
 
				+@@ -247,17 +241,20 @@ static struct phy *mvebu_a3700_comphy_xl
			
 
				+ 					    struct of_phandle_args *args)
			
 
				+ {
			
 
				+ 	struct mvebu_a3700_comphy_lane *lane;
			
 
				++	unsigned int port;
			
 
				+ 	struct phy *phy;
			
 
				+ 
			
 
				+-	if (WARN_ON(args->args[0] >= MVEBU_A3700_COMPHY_PORTS))
			
 
				+-		return ERR_PTR(-EINVAL);
			
 
				+-
			
 
				+ 	phy = of_phy_simple_xlate(dev, args);
			
 
				+ 	if (IS_ERR(phy))
			
 
				+ 		return phy;
			
 
				+ 
			
 
				+ 	lane = phy_get_drvdata(phy);
			
 
				+-	lane->port = args->args[0];
			
 
				++
			
 
				++	port = args->args[0];
			
 
				++	if (port != 0 && (port != 1 || lane->id != 0)) {
			
 
				++		dev_err(lane->dev, "invalid port number %u\n", port);
			
 
				++		return ERR_PTR(-EINVAL);
			
 
				++	}
			
 
				+ 
			
 
				+ 	return phy;
			
 
				+ }
			
 
				+@@ -302,7 +299,6 @@ static int mvebu_a3700_comphy_probe(stru
			
 
				+ 		lane->mode = PHY_MODE_INVALID;
			
 
				+ 		lane->submode = PHY_INTERFACE_MODE_NA;
			
 
				+ 		lane->id = lane_id;
			
 
				+-		lane->port = -1;
			
 
				+ 		phy_set_drvdata(phy, lane);
			
 
				+ 	}
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/344-v5.18-02-phy-marvell-phy-mvebu-a3700-comphy-Add-native-kernel.patch
+++ b/target/linux/generic/backport-6.1/344-v5.18-02-phy-marvell-phy-mvebu-a3700-comphy-Add-native-kernel.patch
@@ -0,0 +1,1552 @@
 
				+From 934337080c6c59b75db76b180b509f218640ad48 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <[email protected]>
			
 
				+Date: Thu, 3 Feb 2022 22:44:41 +0100
			
 
				+Subject: [PATCH 2/2] phy: marvell: phy-mvebu-a3700-comphy: Add native kernel
			
 
				+ implementation
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Remove old RPC implementation and add a new native kernel implementation.
			
 
				+
			
 
				+The old implementation uses ARM SMC API to issue RPC calls to ARM Trusted
			
 
				+Firmware which provides real implementation of PHY configuration.
			
 
				+
			
 
				+But older versions of ARM Trusted Firmware do not provide this PHY
			
 
				+configuration functionality, simply returning: operation not supported; or
			
 
				+worse, some versions provide the configuration functionality incorrectly.
			
 
				+
			
 
				+For example the firmware shipped in ESPRESSObin board has this older
			
 
				+version of ARM Trusted Firmware and therefore SATA, USB 3.0 and PCIe
			
 
				+functionality do not work with newer versions of Linux kernel.
			
 
				+
			
 
				+Due to the above reasons, the following commits were introduced into Linux,
			
 
				+to workaround these issues by ignoring -EOPNOTSUPP error code from
			
 
				+phy-mvebu-a3700-comphy driver function phy_power_on():
			
 
				+
			
 
				+commit 45aefe3d2251 ("ata: ahci: mvebu: Make SATA PHY optional for Armada
			
 
				+3720")
			
 
				+commit 3241929b67d2 ("usb: host: xhci: mvebu: make USB 3.0 PHY optional for
			
 
				+Armada 3720")
			
 
				+commit b0c6ae0f8948 ("PCI: aardvark: Fix initialization with old Marvell's
			
 
				+Arm Trusted Firmware")
			
 
				+
			
 
				+Replace this RPC implementation with proper native kernel implementation,
			
 
				+which is independent on the firmware. Never return -EOPNOTSUPP for proper
			
 
				+arguments.
			
 
				+
			
 
				+This should solve multiple issues with real-world boards, where it is not
			
 
				+possible or really inconvenient to change the firmware. Let's eliminate
			
 
				+these issues.
			
 
				+
			
 
				+This implementation is ported directly from Armada 3720 comphy driver found
			
 
				+in newest version of ARM Trusted Firmware source code, but with various
			
 
				+fixes of register names, some added comments, some refactoring due to the
			
 
				+original code not conforming to kernel standards. Also PCIe mode poweroff
			
 
				+support was added here, and PHY reset support. These changes are also going
			
 
				+to be sent to ARM Trusted Firmware.
			
 
				+
			
 
				+[ Pali did the porting from ATF.
			
 
				+  I (Marek) then fixed some register names, some various other things,
			
 
				+  added some comments and refactored the code to kernel standards. Also
			
 
				+  fixed PHY poweroff and added PHY reset. ]
			
 
				+
			
 
				+Signed-off-by: Pali Rohár <[email protected]>
			
 
				+Acked-by: Miquel Raynal <[email protected]>
			
 
				+Signed-off-by: Marek Behún <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Vinod Koul <[email protected]>
			
 
				+---
			
 
				+ drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 1332 ++++++++++++++++--
			
 
				+ 1 file changed, 1215 insertions(+), 117 deletions(-)
			
 
				+
			
 
				+--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
			
 
				++++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
			
 
				+@@ -5,12 +5,16 @@
			
 
				+  * Authors:
			
 
				+  *   Evan Wang <[email protected]>
			
 
				+  *   Miquèl Raynal <[email protected]>
			
 
				++ *   Pali Rohár <[email protected]>
			
 
				++ *   Marek Behún <[email protected]>
			
 
				+  *
			
 
				+  * Structure inspired from phy-mvebu-cp110-comphy.c written by Antoine Tenart.
			
 
				+- * SMC call initial support done by Grzegorz Jaszczyk.
			
 
				++ * Comphy code from ARM Trusted Firmware ported by Pali Rohár <[email protected]>
			
 
				++ * and Marek Behún <[email protected]>.
			
 
				+  */
			
 
				+ 
			
 
				+-#include <linux/arm-smccc.h>
			
 
				++#include <linux/bitfield.h>
			
 
				++#include <linux/clk.h>
			
 
				+ #include <linux/io.h>
			
 
				+ #include <linux/iopoll.h>
			
 
				+ #include <linux/mfd/syscon.h>
			
 
				+@@ -18,103 +22,1118 @@
			
 
				+ #include <linux/phy.h>
			
 
				+ #include <linux/phy/phy.h>
			
 
				+ #include <linux/platform_device.h>
			
 
				++#include <linux/spinlock.h>
			
 
				+ 
			
 
				+-#define MVEBU_A3700_COMPHY_LANES		3
			
 
				++#define PLL_SET_DELAY_US		600
			
 
				++#define COMPHY_PLL_SLEEP		1000
			
 
				++#define COMPHY_PLL_TIMEOUT		150000
			
 
				++
			
 
				++/* Comphy lane2 indirect access register offset */
			
 
				++#define COMPHY_LANE2_INDIR_ADDR		0x0
			
 
				++#define COMPHY_LANE2_INDIR_DATA		0x4
			
 
				++
			
 
				++/* SATA and USB3 PHY offset compared to SATA PHY */
			
 
				++#define COMPHY_LANE2_REGS_BASE		0x200
			
 
				++
			
 
				++/*
			
 
				++ * When accessing common PHY lane registers directly, we need to shift by 1,
			
 
				++ * since the registers are 16-bit.
			
 
				++ */
			
 
				++#define COMPHY_LANE_REG_DIRECT(reg)	(((reg) & 0x7FF) << 1)
			
 
				++
			
 
				++/* COMPHY registers */
			
 
				++#define COMPHY_POWER_PLL_CTRL		0x01
			
 
				++#define PU_IVREF_BIT			BIT(15)
			
 
				++#define PU_PLL_BIT			BIT(14)
			
 
				++#define PU_RX_BIT			BIT(13)
			
 
				++#define PU_TX_BIT			BIT(12)
			
 
				++#define PU_TX_INTP_BIT			BIT(11)
			
 
				++#define PU_DFE_BIT			BIT(10)
			
 
				++#define RESET_DTL_RX_BIT		BIT(9)
			
 
				++#define PLL_LOCK_BIT			BIT(8)
			
 
				++#define REF_FREF_SEL_MASK		GENMASK(4, 0)
			
 
				++#define REF_FREF_SEL_SERDES_25MHZ	FIELD_PREP(REF_FREF_SEL_MASK, 0x1)
			
 
				++#define REF_FREF_SEL_SERDES_40MHZ	FIELD_PREP(REF_FREF_SEL_MASK, 0x3)
			
 
				++#define REF_FREF_SEL_SERDES_50MHZ	FIELD_PREP(REF_FREF_SEL_MASK, 0x4)
			
 
				++#define REF_FREF_SEL_PCIE_USB3_25MHZ	FIELD_PREP(REF_FREF_SEL_MASK, 0x2)
			
 
				++#define REF_FREF_SEL_PCIE_USB3_40MHZ	FIELD_PREP(REF_FREF_SEL_MASK, 0x3)
			
 
				++#define COMPHY_MODE_MASK		GENMASK(7, 5)
			
 
				++#define COMPHY_MODE_SATA		FIELD_PREP(COMPHY_MODE_MASK, 0x0)
			
 
				++#define COMPHY_MODE_PCIE		FIELD_PREP(COMPHY_MODE_MASK, 0x3)
			
 
				++#define COMPHY_MODE_SERDES		FIELD_PREP(COMPHY_MODE_MASK, 0x4)
			
 
				++#define COMPHY_MODE_USB3		FIELD_PREP(COMPHY_MODE_MASK, 0x5)
			
 
				++
			
 
				++#define COMPHY_KVCO_CAL_CTRL		0x02
			
 
				++#define USE_MAX_PLL_RATE_BIT		BIT(12)
			
 
				++#define SPEED_PLL_MASK			GENMASK(7, 2)
			
 
				++#define SPEED_PLL_VALUE_16		FIELD_PREP(SPEED_PLL_MASK, 0x10)
			
 
				++
			
 
				++#define COMPHY_DIG_LOOPBACK_EN		0x23
			
 
				++#define SEL_DATA_WIDTH_MASK		GENMASK(11, 10)
			
 
				++#define DATA_WIDTH_10BIT		FIELD_PREP(SEL_DATA_WIDTH_MASK, 0x0)
			
 
				++#define DATA_WIDTH_20BIT		FIELD_PREP(SEL_DATA_WIDTH_MASK, 0x1)
			
 
				++#define DATA_WIDTH_40BIT		FIELD_PREP(SEL_DATA_WIDTH_MASK, 0x2)
			
 
				++#define PLL_READY_TX_BIT		BIT(4)
			
 
				++
			
 
				++#define COMPHY_SYNC_PATTERN		0x24
			
 
				++#define TXD_INVERT_BIT			BIT(10)
			
 
				++#define RXD_INVERT_BIT			BIT(11)
			
 
				++
			
 
				++#define COMPHY_SYNC_MASK_GEN		0x25
			
 
				++#define PHY_GEN_MAX_MASK		GENMASK(11, 10)
			
 
				++#define PHY_GEN_MAX_USB3_5G		FIELD_PREP(PHY_GEN_MAX_MASK, 0x1)
			
 
				++
			
 
				++#define COMPHY_ISOLATION_CTRL		0x26
			
 
				++#define PHY_ISOLATE_MODE		BIT(15)
			
 
				++
			
 
				++#define COMPHY_GEN2_SET2		0x3e
			
 
				++#define GS2_TX_SSC_AMP_MASK		GENMASK(15, 9)
			
 
				++#define GS2_TX_SSC_AMP_4128		FIELD_PREP(GS2_TX_SSC_AMP_MASK, 0x20)
			
 
				++#define GS2_VREG_RXTX_MAS_ISET_MASK	GENMASK(8, 7)
			
 
				++#define GS2_VREG_RXTX_MAS_ISET_60U	FIELD_PREP(GS2_VREG_RXTX_MAS_ISET_MASK,\
			
 
				++						   0x0)
			
 
				++#define GS2_VREG_RXTX_MAS_ISET_80U	FIELD_PREP(GS2_VREG_RXTX_MAS_ISET_MASK,\
			
 
				++						   0x1)
			
 
				++#define GS2_VREG_RXTX_MAS_ISET_100U	FIELD_PREP(GS2_VREG_RXTX_MAS_ISET_MASK,\
			
 
				++						   0x2)
			
 
				++#define GS2_VREG_RXTX_MAS_ISET_120U	FIELD_PREP(GS2_VREG_RXTX_MAS_ISET_MASK,\
			
 
				++						   0x3)
			
 
				++#define GS2_RSVD_6_0_MASK		GENMASK(6, 0)
			
 
				++
			
 
				++#define COMPHY_GEN3_SET2		0x3f
			
 
				++
			
 
				++#define COMPHY_IDLE_SYNC_EN		0x48
			
 
				++#define IDLE_SYNC_EN			BIT(12)
			
 
				++
			
 
				++#define COMPHY_MISC_CTRL0		0x4F
			
 
				++#define CLK100M_125M_EN			BIT(4)
			
 
				++#define TXDCLK_2X_SEL			BIT(6)
			
 
				++#define CLK500M_EN			BIT(7)
			
 
				++#define PHY_REF_CLK_SEL			BIT(10)
			
 
				++
			
 
				++#define COMPHY_SFT_RESET		0x52
			
 
				++#define SFT_RST				BIT(9)
			
 
				++#define SFT_RST_NO_REG			BIT(10)
			
 
				++
			
 
				++#define COMPHY_MISC_CTRL1		0x73
			
 
				++#define SEL_BITS_PCIE_FORCE		BIT(15)
			
 
				++
			
 
				++#define COMPHY_GEN2_SET3		0x112
			
 
				++#define GS3_FFE_CAP_SEL_MASK		GENMASK(3, 0)
			
 
				++#define GS3_FFE_CAP_SEL_VALUE		FIELD_PREP(GS3_FFE_CAP_SEL_MASK, 0xF)
			
 
				++
			
 
				++/* PIPE registers */
			
 
				++#define COMPHY_PIPE_LANE_CFG0		0x180
			
 
				++#define PRD_TXDEEMPH0_MASK		BIT(0)
			
 
				++#define PRD_TXMARGIN_MASK		GENMASK(3, 1)
			
 
				++#define PRD_TXSWING_MASK		BIT(4)
			
 
				++#define CFG_TX_ALIGN_POS_MASK		GENMASK(8, 5)
			
 
				++
			
 
				++#define COMPHY_PIPE_LANE_CFG1		0x181
			
 
				++#define PRD_TXDEEMPH1_MASK		BIT(15)
			
 
				++#define USE_MAX_PLL_RATE_EN		BIT(9)
			
 
				++#define TX_DET_RX_MODE			BIT(6)
			
 
				++#define GEN2_TX_DATA_DLY_MASK		GENMASK(4, 3)
			
 
				++#define GEN2_TX_DATA_DLY_DEFT		FIELD_PREP(GEN2_TX_DATA_DLY_MASK, 2)
			
 
				++#define TX_ELEC_IDLE_MODE_EN		BIT(0)
			
 
				++
			
 
				++#define COMPHY_PIPE_LANE_STAT1		0x183
			
 
				++#define TXDCLK_PCLK_EN			BIT(0)
			
 
				++
			
 
				++#define COMPHY_PIPE_LANE_CFG4		0x188
			
 
				++#define SPREAD_SPECTRUM_CLK_EN		BIT(7)
			
 
				++
			
 
				++#define COMPHY_PIPE_RST_CLK_CTRL	0x1C1
			
 
				++#define PIPE_SOFT_RESET			BIT(0)
			
 
				++#define PIPE_REG_RESET			BIT(1)
			
 
				++#define MODE_CORE_CLK_FREQ_SEL		BIT(9)
			
 
				++#define MODE_PIPE_WIDTH_32		BIT(3)
			
 
				++#define MODE_REFDIV_MASK		GENMASK(5, 4)
			
 
				++#define MODE_REFDIV_BY_4		FIELD_PREP(MODE_REFDIV_MASK, 0x2)
			
 
				++
			
 
				++#define COMPHY_PIPE_TEST_MODE_CTRL	0x1C2
			
 
				++#define MODE_MARGIN_OVERRIDE		BIT(2)
			
 
				++
			
 
				++#define COMPHY_PIPE_CLK_SRC_LO		0x1C3
			
 
				++#define MODE_CLK_SRC			BIT(0)
			
 
				++#define BUNDLE_PERIOD_SEL		BIT(1)
			
 
				++#define BUNDLE_PERIOD_SCALE_MASK	GENMASK(3, 2)
			
 
				++#define BUNDLE_SAMPLE_CTRL		BIT(4)
			
 
				++#define PLL_READY_DLY_MASK		GENMASK(7, 5)
			
 
				++#define CFG_SEL_20B			BIT(15)
			
 
				++
			
 
				++#define COMPHY_PIPE_PWR_MGM_TIM1	0x1D0
			
 
				++#define CFG_PM_OSCCLK_WAIT_MASK		GENMASK(15, 12)
			
 
				++#define CFG_PM_RXDEN_WAIT_MASK		GENMASK(11, 8)
			
 
				++#define CFG_PM_RXDEN_WAIT_1_UNIT	FIELD_PREP(CFG_PM_RXDEN_WAIT_MASK, 0x1)
			
 
				++#define CFG_PM_RXDLOZ_WAIT_MASK		GENMASK(7, 0)
			
 
				++#define CFG_PM_RXDLOZ_WAIT_7_UNIT	FIELD_PREP(CFG_PM_RXDLOZ_WAIT_MASK, 0x7)
			
 
				++#define CFG_PM_RXDLOZ_WAIT_12_UNIT	FIELD_PREP(CFG_PM_RXDLOZ_WAIT_MASK, 0xC)
			
 
				++
			
 
				++/*
			
 
				++ * This register is not from PHY lane register space. It only exists in the
			
 
				++ * indirect register space, before the actual PHY lane 2 registers. So the
			
 
				++ * offset is absolute, not relative to COMPHY_LANE2_REGS_BASE.
			
 
				++ * It is used only for SATA PHY initialization.
			
 
				++ */
			
 
				++#define COMPHY_RESERVED_REG		0x0E
			
 
				++#define PHYCTRL_FRM_PIN_BIT		BIT(13)
			
 
				+ 
			
 
				+-/* COMPHY Fast SMC function identifiers */
			
 
				+-#define COMPHY_SIP_POWER_ON			0x82000001
			
 
				+-#define COMPHY_SIP_POWER_OFF			0x82000002
			
 
				+-#define COMPHY_SIP_PLL_LOCK			0x82000003
			
 
				+-
			
 
				+-#define COMPHY_FW_MODE_SATA			0x1
			
 
				+-#define COMPHY_FW_MODE_SGMII			0x2
			
 
				+-#define COMPHY_FW_MODE_2500BASEX		0x3
			
 
				+-#define COMPHY_FW_MODE_USB3H			0x4
			
 
				+-#define COMPHY_FW_MODE_USB3D			0x5
			
 
				+-#define COMPHY_FW_MODE_PCIE			0x6
			
 
				+-#define COMPHY_FW_MODE_USB3			0xa
			
 
				+-
			
 
				+-#define COMPHY_FW_SPEED_1_25G			0 /* SGMII 1G */
			
 
				+-#define COMPHY_FW_SPEED_2_5G			1
			
 
				+-#define COMPHY_FW_SPEED_3_125G			2 /* 2500BASE-X */
			
 
				+-#define COMPHY_FW_SPEED_5G			3
			
 
				+-#define COMPHY_FW_SPEED_MAX			0x3F
			
 
				+-
			
 
				+-#define COMPHY_FW_MODE(mode)			((mode) << 12)
			
 
				+-#define COMPHY_FW_NET(mode, idx, speed)		(COMPHY_FW_MODE(mode) | \
			
 
				+-						 ((idx) << 8) |	\
			
 
				+-						 ((speed) << 2))
			
 
				+-#define COMPHY_FW_PCIE(mode, speed, width)	(COMPHY_FW_NET(mode, 0, speed) | \
			
 
				+-						 ((width) << 18))
			
 
				++/* South Bridge PHY Configuration Registers */
			
 
				++#define COMPHY_PHY_REG(lane, reg)	(((1 - (lane)) * 0x28) + ((reg) & 0x3f))
			
 
				++
			
 
				++/*
			
 
				++ * lane0: USB3/GbE1 PHY Configuration 1
			
 
				++ * lane1: PCIe/GbE0 PHY Configuration 1
			
 
				++ * (used only by SGMII code)
			
 
				++ */
			
 
				++#define COMPHY_PHY_CFG1			0x0
			
 
				++#define PIN_PU_IVREF_BIT		BIT(1)
			
 
				++#define PIN_RESET_CORE_BIT		BIT(11)
			
 
				++#define PIN_RESET_COMPHY_BIT		BIT(12)
			
 
				++#define PIN_PU_PLL_BIT			BIT(16)
			
 
				++#define PIN_PU_RX_BIT			BIT(17)
			
 
				++#define PIN_PU_TX_BIT			BIT(18)
			
 
				++#define PIN_TX_IDLE_BIT			BIT(19)
			
 
				++#define GEN_RX_SEL_MASK			GENMASK(25, 22)
			
 
				++#define GEN_RX_SEL_VALUE(val)		FIELD_PREP(GEN_RX_SEL_MASK, (val))
			
 
				++#define GEN_TX_SEL_MASK			GENMASK(29, 26)
			
 
				++#define GEN_TX_SEL_VALUE(val)		FIELD_PREP(GEN_TX_SEL_MASK, (val))
			
 
				++#define SERDES_SPEED_1_25_G		0x6
			
 
				++#define SERDES_SPEED_3_125_G		0x8
			
 
				++#define PHY_RX_INIT_BIT			BIT(30)
			
 
				++
			
 
				++/*
			
 
				++ * lane0: USB3/GbE1 PHY Status 1
			
 
				++ * lane1: PCIe/GbE0 PHY Status 1
			
 
				++ * (used only by SGMII code)
			
 
				++ */
			
 
				++#define COMPHY_PHY_STAT1		0x18
			
 
				++#define PHY_RX_INIT_DONE_BIT		BIT(0)
			
 
				++#define PHY_PLL_READY_RX_BIT		BIT(2)
			
 
				++#define PHY_PLL_READY_TX_BIT		BIT(3)
			
 
				++
			
 
				++/* PHY Selector */
			
 
				++#define COMPHY_SELECTOR_PHY_REG			0xFC
			
 
				++/* bit0: 0: Lane1 is GbE0; 1: Lane1 is PCIe */
			
 
				++#define COMPHY_SELECTOR_PCIE_GBE0_SEL_BIT	BIT(0)
			
 
				++/* bit4: 0: Lane0 is GbE1; 1: Lane0 is USB3 */
			
 
				++#define COMPHY_SELECTOR_USB3_GBE1_SEL_BIT	BIT(4)
			
 
				++/* bit8: 0: Lane0 is USB3 instead of GbE1, Lane2 is SATA; 1: Lane2 is USB3 */
			
 
				++#define COMPHY_SELECTOR_USB3_PHY_SEL_BIT	BIT(8)
			
 
				+ 
			
 
				+ struct mvebu_a3700_comphy_conf {
			
 
				+ 	unsigned int lane;
			
 
				+ 	enum phy_mode mode;
			
 
				+ 	int submode;
			
 
				+-	u32 fw_mode;
			
 
				+ };
			
 
				+ 
			
 
				+-#define MVEBU_A3700_COMPHY_CONF(_lane, _mode, _smode, _fw)		\
			
 
				++#define MVEBU_A3700_COMPHY_CONF(_lane, _mode, _smode)			\
			
 
				+ 	{								\
			
 
				+ 		.lane = _lane,						\
			
 
				+ 		.mode = _mode,						\
			
 
				+ 		.submode = _smode,					\
			
 
				+-		.fw_mode = _fw,						\
			
 
				+ 	}
			
 
				+ 
			
 
				+-#define MVEBU_A3700_COMPHY_CONF_GEN(_lane, _mode, _fw) \
			
 
				+-	MVEBU_A3700_COMPHY_CONF(_lane, _mode, PHY_INTERFACE_MODE_NA, _fw)
			
 
				++#define MVEBU_A3700_COMPHY_CONF_GEN(_lane, _mode) \
			
 
				++	MVEBU_A3700_COMPHY_CONF(_lane, _mode, PHY_INTERFACE_MODE_NA)
			
 
				+ 
			
 
				+-#define MVEBU_A3700_COMPHY_CONF_ETH(_lane, _smode, _fw) \
			
 
				+-	MVEBU_A3700_COMPHY_CONF(_lane, PHY_MODE_ETHERNET, _smode, _fw)
			
 
				++#define MVEBU_A3700_COMPHY_CONF_ETH(_lane, _smode) \
			
 
				++	MVEBU_A3700_COMPHY_CONF(_lane, PHY_MODE_ETHERNET, _smode)
			
 
				+ 
			
 
				+ static const struct mvebu_a3700_comphy_conf mvebu_a3700_comphy_modes[] = {
			
 
				+ 	/* lane 0 */
			
 
				+-	MVEBU_A3700_COMPHY_CONF_GEN(0, PHY_MODE_USB_HOST_SS,
			
 
				+-				    COMPHY_FW_MODE_USB3H),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_SGMII,
			
 
				+-				    COMPHY_FW_MODE_SGMII),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_2500BASEX,
			
 
				+-				    COMPHY_FW_MODE_2500BASEX),
			
 
				++	MVEBU_A3700_COMPHY_CONF_GEN(0, PHY_MODE_USB_HOST_SS),
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_SGMII),
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_1000BASEX),
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_2500BASEX),
			
 
				+ 	/* lane 1 */
			
 
				+-	MVEBU_A3700_COMPHY_CONF_GEN(1, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_SGMII,
			
 
				+-				    COMPHY_FW_MODE_SGMII),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_2500BASEX,
			
 
				+-				    COMPHY_FW_MODE_2500BASEX),
			
 
				++	MVEBU_A3700_COMPHY_CONF_GEN(1, PHY_MODE_PCIE),
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_SGMII),
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_1000BASEX),
			
 
				++	MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_2500BASEX),
			
 
				+ 	/* lane 2 */
			
 
				+-	MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
			
 
				+-	MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_USB_HOST_SS,
			
 
				+-				    COMPHY_FW_MODE_USB3H),
			
 
				++	MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_SATA),
			
 
				++	MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_USB_HOST_SS),
			
 
				++};
			
 
				++
			
 
				++struct mvebu_a3700_comphy_priv {
			
 
				++	void __iomem *comphy_regs;
			
 
				++	void __iomem *lane0_phy_regs; /* USB3 and GbE1 */
			
 
				++	void __iomem *lane1_phy_regs; /* PCIe and GbE0 */
			
 
				++	void __iomem *lane2_phy_indirect; /* SATA and USB3 */
			
 
				++	spinlock_t lock; /* for PHY selector access */
			
 
				++	bool xtal_is_40m;
			
 
				+ };
			
 
				+ 
			
 
				+ struct mvebu_a3700_comphy_lane {
			
 
				++	struct mvebu_a3700_comphy_priv *priv;
			
 
				+ 	struct device *dev;
			
 
				+ 	unsigned int id;
			
 
				+ 	enum phy_mode mode;
			
 
				+ 	int submode;
			
 
				++	bool invert_tx;
			
 
				++	bool invert_rx;
			
 
				++	bool needs_reset;
			
 
				++};
			
 
				++
			
 
				++struct gbe_phy_init_data_fix {
			
 
				++	u16 addr;
			
 
				++	u16 value;
			
 
				++};
			
 
				++
			
 
				++/* Changes to 40M1G25 mode data required for running 40M3G125 init mode */
			
 
				++static struct gbe_phy_init_data_fix gbe_phy_init_fix[] = {
			
 
				++	{ 0x005, 0x07CC }, { 0x015, 0x0000 }, { 0x01B, 0x0000 },
			
 
				++	{ 0x01D, 0x0000 }, { 0x01E, 0x0000 }, { 0x01F, 0x0000 },
			
 
				++	{ 0x020, 0x0000 }, { 0x021, 0x0030 }, { 0x026, 0x0888 },
			
 
				++	{ 0x04D, 0x0152 }, { 0x04F, 0xA020 }, { 0x050, 0x07CC },
			
 
				++	{ 0x053, 0xE9CA }, { 0x055, 0xBD97 }, { 0x071, 0x3015 },
			
 
				++	{ 0x076, 0x03AA }, { 0x07C, 0x0FDF }, { 0x0C2, 0x3030 },
			
 
				++	{ 0x0C3, 0x8000 }, { 0x0E2, 0x5550 }, { 0x0E3, 0x12A4 },
			
 
				++	{ 0x0E4, 0x7D00 }, { 0x0E6, 0x0C83 }, { 0x101, 0xFCC0 },
			
 
				++	{ 0x104, 0x0C10 }
			
 
				+ };
			
 
				+ 
			
 
				+-static int mvebu_a3700_comphy_smc(unsigned long function, unsigned long lane,
			
 
				+-				  unsigned long mode)
			
 
				++/* 40M1G25 mode init data */
			
 
				++static u16 gbe_phy_init[512] = {
			
 
				++	/* 0       1       2       3       4       5       6       7 */
			
 
				++	/*-----------------------------------------------------------*/
			
 
				++	/* 8       9       A       B       C       D       E       F */
			
 
				++	0x3110, 0xFD83, 0x6430, 0x412F, 0x82C0, 0x06FA, 0x4500, 0x6D26,	/* 00 */
			
 
				++	0xAFC0, 0x8000, 0xC000, 0x0000, 0x2000, 0x49CC, 0x0BC9, 0x2A52,	/* 08 */
			
 
				++	0x0BD2, 0x0CDE, 0x13D2, 0x0CE8, 0x1149, 0x10E0, 0x0000, 0x0000,	/* 10 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0001, 0x0000, 0x4134, 0x0D2D, 0xFFFF,	/* 18 */
			
 
				++	0xFFE0, 0x4030, 0x1016, 0x0030, 0x0000, 0x0800, 0x0866, 0x0000,	/* 20 */
			
 
				++	0x0000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,	/* 28 */
			
 
				++	0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/* 30 */
			
 
				++	0x0000, 0x0000, 0x000F, 0x6A62, 0x1988, 0x3100, 0x3100, 0x3100,	/* 38 */
			
 
				++	0x3100, 0xA708, 0x2430, 0x0830, 0x1030, 0x4610, 0xFF00, 0xFF00,	/* 40 */
			
 
				++	0x0060, 0x1000, 0x0400, 0x0040, 0x00F0, 0x0155, 0x1100, 0xA02A,	/* 48 */
			
 
				++	0x06FA, 0x0080, 0xB008, 0xE3ED, 0x5002, 0xB592, 0x7A80, 0x0001,	/* 50 */
			
 
				++	0x020A, 0x8820, 0x6014, 0x8054, 0xACAA, 0xFC88, 0x2A02, 0x45CF,	/* 58 */
			
 
				++	0x000F, 0x1817, 0x2860, 0x064F, 0x0000, 0x0204, 0x1800, 0x6000,	/* 60 */
			
 
				++	0x810F, 0x4F23, 0x4000, 0x4498, 0x0850, 0x0000, 0x000E, 0x1002,	/* 68 */
			
 
				++	0x9D3A, 0x3009, 0xD066, 0x0491, 0x0001, 0x6AB0, 0x0399, 0x3780,	/* 70 */
			
 
				++	0x0040, 0x5AC0, 0x4A80, 0x0000, 0x01DF, 0x0000, 0x0007, 0x0000,	/* 78 */
			
 
				++	0x2D54, 0x00A1, 0x4000, 0x0100, 0xA20A, 0x0000, 0x0000, 0x0000,	/* 80 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x7400, 0x0E81, 0x1000, 0x1242, 0x0210,	/* 88 */
			
 
				++	0x80DF, 0x0F1F, 0x2F3F, 0x4F5F, 0x6F7F, 0x0F1F, 0x2F3F, 0x4F5F,	/* 90 */
			
 
				++	0x6F7F, 0x4BAD, 0x0000, 0x0000, 0x0800, 0x0000, 0x2400, 0xB651,	/* 98 */
			
 
				++	0xC9E0, 0x4247, 0x0A24, 0x0000, 0xAF19, 0x1004, 0x0000, 0x0000,	/* A0 */
			
 
				++	0x0000, 0x0013, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/* A8 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/* B0 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0060, 0x0000, 0x0000, 0x0000, 0x0000,	/* B8 */
			
 
				++	0x0000, 0x0000, 0x3010, 0xFA00, 0x0000, 0x0000, 0x0000, 0x0003,	/* C0 */
			
 
				++	0x1618, 0x8200, 0x8000, 0x0400, 0x050F, 0x0000, 0x0000, 0x0000,	/* C8 */
			
 
				++	0x4C93, 0x0000, 0x1000, 0x1120, 0x0010, 0x1242, 0x1242, 0x1E00,	/* D0 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x00F8, 0x0000, 0x0041, 0x0800, 0x0000,	/* D8 */
			
 
				++	0x82A0, 0x572E, 0x2490, 0x14A9, 0x4E00, 0x0000, 0x0803, 0x0541,	/* E0 */
			
 
				++	0x0C15, 0x0000, 0x0000, 0x0400, 0x2626, 0x0000, 0x0000, 0x4200,	/* E8 */
			
 
				++	0x0000, 0xAA55, 0x1020, 0x0000, 0x0000, 0x5010, 0x0000, 0x0000,	/* F0 */
			
 
				++	0x0000, 0x0000, 0x5000, 0x0000, 0x0000, 0x0000, 0x02F2, 0x0000,	/* F8 */
			
 
				++	0x101F, 0xFDC0, 0x4000, 0x8010, 0x0110, 0x0006, 0x0000, 0x0000,	/*100 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*108 */
			
 
				++	0x04CF, 0x0000, 0x04CF, 0x0000, 0x04CF, 0x0000, 0x04C6, 0x0000,	/*110 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*118 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*120 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*128 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*130 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*138 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*140 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*148 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*150 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*158 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*160 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*168 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*170 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x00F0, 0x08A2, 0x3112, 0x0A14, 0x0000,	/*178 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*180 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*188 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*190 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*198 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1A0 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1A8 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1B0 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1B8 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1C0 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1C8 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1D0 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1D8 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1E0 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1E8 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,	/*1F0 */
			
 
				++	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000	/*1F8 */
			
 
				++};
			
 
				++
			
 
				++static inline void comphy_reg_set(void __iomem *addr, u32 data, u32 mask)
			
 
				+ {
			
 
				+-	struct arm_smccc_res res;
			
 
				+-	s32 ret;
			
 
				++	u32 val;
			
 
				++
			
 
				++	val = readl(addr);
			
 
				++	val = (val & ~mask) | (data & mask);
			
 
				++	writel(val, addr);
			
 
				++}
			
 
				+ 
			
 
				+-	arm_smccc_smc(function, lane, mode, 0, 0, 0, 0, 0, &res);
			
 
				+-	ret = res.a0;
			
 
				++static inline void comphy_reg_set16(void __iomem *addr, u16 data, u16 mask)
			
 
				++{
			
 
				++	u16 val;
			
 
				+ 
			
 
				+-	switch (ret) {
			
 
				+-	case SMCCC_RET_SUCCESS:
			
 
				+-		return 0;
			
 
				+-	case SMCCC_RET_NOT_SUPPORTED:
			
 
				+-		return -EOPNOTSUPP;
			
 
				++	val = readw(addr);
			
 
				++	val = (val & ~mask) | (data & mask);
			
 
				++	writew(val, addr);
			
 
				++}
			
 
				++
			
 
				++/* Used for accessing lane 2 registers (SATA/USB3 PHY) */
			
 
				++static void comphy_set_indirect(struct mvebu_a3700_comphy_priv *priv,
			
 
				++				u32 offset, u16 data, u16 mask)
			
 
				++{
			
 
				++	writel(offset,
			
 
				++	       priv->lane2_phy_indirect + COMPHY_LANE2_INDIR_ADDR);
			
 
				++	comphy_reg_set(priv->lane2_phy_indirect + COMPHY_LANE2_INDIR_DATA,
			
 
				++		       data, mask);
			
 
				++}
			
 
				++
			
 
				++static void comphy_lane_reg_set(struct mvebu_a3700_comphy_lane *lane,
			
 
				++				u16 reg, u16 data, u16 mask)
			
 
				++{
			
 
				++	if (lane->id == 2) {
			
 
				++		/* lane 2 PHY registers are accessed indirectly */
			
 
				++		comphy_set_indirect(lane->priv,
			
 
				++				    reg + COMPHY_LANE2_REGS_BASE,
			
 
				++				    data, mask);
			
 
				++	} else {
			
 
				++		void __iomem *base = lane->id == 1 ?
			
 
				++				     lane->priv->lane1_phy_regs :
			
 
				++				     lane->priv->lane0_phy_regs;
			
 
				++
			
 
				++		comphy_reg_set16(base + COMPHY_LANE_REG_DIRECT(reg),
			
 
				++				 data, mask);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++static int comphy_lane_reg_poll(struct mvebu_a3700_comphy_lane *lane,
			
 
				++				u16 reg, u16 bits,
			
 
				++				ulong sleep_us, ulong timeout_us)
			
 
				++{
			
 
				++	int ret;
			
 
				++
			
 
				++	if (lane->id == 2) {
			
 
				++		u32 data;
			
 
				++
			
 
				++		/* lane 2 PHY registers are accessed indirectly */
			
 
				++		writel(reg + COMPHY_LANE2_REGS_BASE,
			
 
				++		       lane->priv->lane2_phy_indirect +
			
 
				++		       COMPHY_LANE2_INDIR_ADDR);
			
 
				++
			
 
				++		ret = readl_poll_timeout(lane->priv->lane2_phy_indirect +
			
 
				++					 COMPHY_LANE2_INDIR_DATA,
			
 
				++					 data, (data & bits) == bits,
			
 
				++					 sleep_us, timeout_us);
			
 
				++	} else {
			
 
				++		void __iomem *base = lane->id == 1 ?
			
 
				++				     lane->priv->lane1_phy_regs :
			
 
				++				     lane->priv->lane0_phy_regs;
			
 
				++		u16 data;
			
 
				++
			
 
				++		ret = readw_poll_timeout(base + COMPHY_LANE_REG_DIRECT(reg),
			
 
				++					 data, (data & bits) == bits,
			
 
				++					 sleep_us, timeout_us);
			
 
				++	}
			
 
				++
			
 
				++	return ret;
			
 
				++}
			
 
				++
			
 
				++static void comphy_periph_reg_set(struct mvebu_a3700_comphy_lane *lane,
			
 
				++				  u8 reg, u32 data, u32 mask)
			
 
				++{
			
 
				++	comphy_reg_set(lane->priv->comphy_regs + COMPHY_PHY_REG(lane->id, reg),
			
 
				++		       data, mask);
			
 
				++}
			
 
				++
			
 
				++static int comphy_periph_reg_poll(struct mvebu_a3700_comphy_lane *lane,
			
 
				++				  u8 reg, u32 bits,
			
 
				++				  ulong sleep_us, ulong timeout_us)
			
 
				++{
			
 
				++	u32 data;
			
 
				++
			
 
				++	return readl_poll_timeout(lane->priv->comphy_regs +
			
 
				++				  COMPHY_PHY_REG(lane->id, reg),
			
 
				++				  data, (data & bits) == bits,
			
 
				++				  sleep_us, timeout_us);
			
 
				++}
			
 
				++
			
 
				++/* PHY selector configures with corresponding modes */
			
 
				++static int
			
 
				++mvebu_a3700_comphy_set_phy_selector(struct mvebu_a3700_comphy_lane *lane)
			
 
				++{
			
 
				++	u32 old, new, clr = 0, set = 0;
			
 
				++	unsigned long flags;
			
 
				++
			
 
				++	switch (lane->mode) {
			
 
				++	case PHY_MODE_SATA:
			
 
				++		/* SATA must be in Lane2 */
			
 
				++		if (lane->id == 2)
			
 
				++			clr = COMPHY_SELECTOR_USB3_PHY_SEL_BIT;
			
 
				++		else
			
 
				++			goto error;
			
 
				++		break;
			
 
				++
			
 
				++	case PHY_MODE_ETHERNET:
			
 
				++		if (lane->id == 0)
			
 
				++			clr = COMPHY_SELECTOR_USB3_GBE1_SEL_BIT;
			
 
				++		else if (lane->id == 1)
			
 
				++			clr = COMPHY_SELECTOR_PCIE_GBE0_SEL_BIT;
			
 
				++		else
			
 
				++			goto error;
			
 
				++		break;
			
 
				++
			
 
				++	case PHY_MODE_USB_HOST_SS:
			
 
				++		if (lane->id == 2)
			
 
				++			set = COMPHY_SELECTOR_USB3_PHY_SEL_BIT;
			
 
				++		else if (lane->id == 0)
			
 
				++			set = COMPHY_SELECTOR_USB3_GBE1_SEL_BIT;
			
 
				++		else
			
 
				++			goto error;
			
 
				++		break;
			
 
				++
			
 
				++	case PHY_MODE_PCIE:
			
 
				++		/* PCIE must be in Lane1 */
			
 
				++		if (lane->id == 1)
			
 
				++			set = COMPHY_SELECTOR_PCIE_GBE0_SEL_BIT;
			
 
				++		else
			
 
				++			goto error;
			
 
				++		break;
			
 
				++
			
 
				++	default:
			
 
				++		goto error;
			
 
				++	}
			
 
				++
			
 
				++	spin_lock_irqsave(&lane->priv->lock, flags);
			
 
				++
			
 
				++	old = readl(lane->priv->comphy_regs + COMPHY_SELECTOR_PHY_REG);
			
 
				++	new = (old & ~clr) | set;
			
 
				++	writel(new, lane->priv->comphy_regs + COMPHY_SELECTOR_PHY_REG);
			
 
				++
			
 
				++	spin_unlock_irqrestore(&lane->priv->lock, flags);
			
 
				++
			
 
				++	dev_dbg(lane->dev,
			
 
				++		"COMPHY[%d] mode[%d] changed PHY selector 0x%08x -> 0x%08x\n",
			
 
				++		lane->id, lane->mode, old, new);
			
 
				++
			
 
				++	return 0;
			
 
				++error:
			
 
				++	dev_err(lane->dev, "COMPHY[%d] mode[%d] is invalid\n", lane->id,
			
 
				++		lane->mode);
			
 
				++	return -EINVAL;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mvebu_a3700_comphy_sata_power_on(struct mvebu_a3700_comphy_lane *lane)
			
 
				++{
			
 
				++	u32 mask, data, ref_clk;
			
 
				++	int ret;
			
 
				++
			
 
				++	/* Configure phy selector for SATA */
			
 
				++	ret = mvebu_a3700_comphy_set_phy_selector(lane);
			
 
				++	if (ret)
			
 
				++		return ret;
			
 
				++
			
 
				++	/* Clear phy isolation mode to make it work in normal mode */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_ISOLATION_CTRL,
			
 
				++			    0x0, PHY_ISOLATE_MODE);
			
 
				++
			
 
				++	/* 0. Check the Polarity invert bits */
			
 
				++	data = 0x0;
			
 
				++	if (lane->invert_tx)
			
 
				++		data |= TXD_INVERT_BIT;
			
 
				++	if (lane->invert_rx)
			
 
				++		data |= RXD_INVERT_BIT;
			
 
				++	mask = TXD_INVERT_BIT | RXD_INVERT_BIT;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_SYNC_PATTERN, data, mask);
			
 
				++
			
 
				++	/* 1. Select 40-bit data width */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_DIG_LOOPBACK_EN,
			
 
				++			    DATA_WIDTH_40BIT, SEL_DATA_WIDTH_MASK);
			
 
				++
			
 
				++	/* 2. Select reference clock(25M) and PHY mode (SATA) */
			
 
				++	if (lane->priv->xtal_is_40m)
			
 
				++		ref_clk = REF_FREF_SEL_SERDES_40MHZ;
			
 
				++	else
			
 
				++		ref_clk = REF_FREF_SEL_SERDES_25MHZ;
			
 
				++
			
 
				++	data = ref_clk | COMPHY_MODE_SATA;
			
 
				++	mask = REF_FREF_SEL_MASK | COMPHY_MODE_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_POWER_PLL_CTRL, data, mask);
			
 
				++
			
 
				++	/* 3. Use maximum PLL rate (no power save) */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_KVCO_CAL_CTRL,
			
 
				++			    USE_MAX_PLL_RATE_BIT, USE_MAX_PLL_RATE_BIT);
			
 
				++
			
 
				++	/* 4. Reset reserved bit */
			
 
				++	comphy_set_indirect(lane->priv, COMPHY_RESERVED_REG,
			
 
				++			    0x0, PHYCTRL_FRM_PIN_BIT);
			
 
				++
			
 
				++	/* 5. Set vendor-specific configuration (It is done in sata driver) */
			
 
				++	/* XXX: in U-Boot below sequence was executed in this place, in Linux
			
 
				++	 * not.  Now it is done only in U-Boot before this comphy
			
 
				++	 * initialization - tests shows that it works ok, but in case of any
			
 
				++	 * future problem it is left for reference.
			
 
				++	 *   reg_set(MVEBU_REGS_BASE + 0xe00a0, 0, 0xffffffff);
			
 
				++	 *   reg_set(MVEBU_REGS_BASE + 0xe00a4, BIT(6), BIT(6));
			
 
				++	 */
			
 
				++
			
 
				++	/* Wait for > 55 us to allow PLL be enabled */
			
 
				++	udelay(PLL_SET_DELAY_US);
			
 
				++
			
 
				++	/* Polling status */
			
 
				++	ret = comphy_lane_reg_poll(lane, COMPHY_DIG_LOOPBACK_EN,
			
 
				++				   PLL_READY_TX_BIT, COMPHY_PLL_SLEEP,
			
 
				++				   COMPHY_PLL_TIMEOUT);
			
 
				++	if (ret)
			
 
				++		dev_err(lane->dev, "Failed to lock SATA PLL\n");
			
 
				++
			
 
				++	return ret;
			
 
				++}
			
 
				++
			
 
				++static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane,
			
 
				++				bool is_1gbps)
			
 
				++{
			
 
				++	int addr, fix_idx;
			
 
				++	u16 val;
			
 
				++
			
 
				++	fix_idx = 0;
			
 
				++	for (addr = 0; addr < 512; addr++) {
			
 
				++		/*
			
 
				++		 * All PHY register values are defined in full for 3.125Gbps
			
 
				++		 * SERDES speed. The values required for 1.25 Gbps are almost
			
 
				++		 * the same and only few registers should be "fixed" in
			
 
				++		 * comparison to 3.125 Gbps values. These register values are
			
 
				++		 * stored in "gbe_phy_init_fix" array.
			
 
				++		 */
			
 
				++		if (!is_1gbps && gbe_phy_init_fix[fix_idx].addr == addr) {
			
 
				++			/* Use new value */
			
 
				++			val = gbe_phy_init_fix[fix_idx].value;
			
 
				++			if (fix_idx < ARRAY_SIZE(gbe_phy_init_fix))
			
 
				++				fix_idx++;
			
 
				++		} else {
			
 
				++			val = gbe_phy_init[addr];
			
 
				++		}
			
 
				++
			
 
				++		comphy_lane_reg_set(lane, addr, val, 0xFFFF);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mvebu_a3700_comphy_ethernet_power_on(struct mvebu_a3700_comphy_lane *lane)
			
 
				++{
			
 
				++	u32 mask, data, speed_sel;
			
 
				++	int ret;
			
 
				++
			
 
				++	/* Set selector */
			
 
				++	ret = mvebu_a3700_comphy_set_phy_selector(lane);
			
 
				++	if (ret)
			
 
				++		return ret;
			
 
				++
			
 
				++	/*
			
 
				++	 * 1. Reset PHY by setting PHY input port PIN_RESET=1.
			
 
				++	 * 2. Set PHY input port PIN_TX_IDLE=1, PIN_PU_IVREF=1 to keep
			
 
				++	 *    PHY TXP/TXN output to idle state during PHY initialization
			
 
				++	 * 3. Set PHY input port PIN_PU_PLL=0, PIN_PU_RX=0, PIN_PU_TX=0.
			
 
				++	 */
			
 
				++	data = PIN_PU_IVREF_BIT | PIN_TX_IDLE_BIT | PIN_RESET_COMPHY_BIT;
			
 
				++	mask = data | PIN_RESET_CORE_BIT | PIN_PU_PLL_BIT | PIN_PU_RX_BIT |
			
 
				++	       PIN_PU_TX_BIT | PHY_RX_INIT_BIT;
			
 
				++	comphy_periph_reg_set(lane, COMPHY_PHY_CFG1, data, mask);
			
 
				++
			
 
				++	/* 4. Release reset to the PHY by setting PIN_RESET=0. */
			
 
				++	data = 0x0;
			
 
				++	mask = PIN_RESET_COMPHY_BIT;
			
 
				++	comphy_periph_reg_set(lane, COMPHY_PHY_CFG1, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 5. Set PIN_PHY_GEN_TX[3:0] and PIN_PHY_GEN_RX[3:0] to decide COMPHY
			
 
				++	 * bit rate
			
 
				++	 */
			
 
				++	switch (lane->submode) {
			
 
				++	case PHY_INTERFACE_MODE_SGMII:
			
 
				++	case PHY_INTERFACE_MODE_1000BASEX:
			
 
				++		/* SGMII 1G, SerDes speed 1.25G */
			
 
				++		speed_sel = SERDES_SPEED_1_25_G;
			
 
				++		break;
			
 
				++	case PHY_INTERFACE_MODE_2500BASEX:
			
 
				++		/* 2500Base-X, SerDes speed 3.125G */
			
 
				++		speed_sel = SERDES_SPEED_3_125_G;
			
 
				++		break;
			
 
				+ 	default:
			
 
				++		/* Other rates are not supported */
			
 
				++		dev_err(lane->dev,
			
 
				++			"unsupported phy speed %d on comphy lane%d\n",
			
 
				++			lane->submode, lane->id);
			
 
				+ 		return -EINVAL;
			
 
				+ 	}
			
 
				++	data = GEN_RX_SEL_VALUE(speed_sel) | GEN_TX_SEL_VALUE(speed_sel);
			
 
				++	mask = GEN_RX_SEL_MASK | GEN_TX_SEL_MASK;
			
 
				++	comphy_periph_reg_set(lane, COMPHY_PHY_CFG1, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 6. Wait 10mS for bandgap and reference clocks to stabilize; then
			
 
				++	 * start SW programming.
			
 
				++	 */
			
 
				++	mdelay(10);
			
 
				++
			
 
				++	/* 7. Program COMPHY register PHY_MODE */
			
 
				++	data = COMPHY_MODE_SERDES;
			
 
				++	mask = COMPHY_MODE_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_POWER_PLL_CTRL, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 8. Set COMPHY register REFCLK_SEL to select the correct REFCLK
			
 
				++	 * source
			
 
				++	 */
			
 
				++	data = 0x0;
			
 
				++	mask = PHY_REF_CLK_SEL;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_MISC_CTRL0, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 9. Set correct reference clock frequency in COMPHY register
			
 
				++	 * REF_FREF_SEL.
			
 
				++	 */
			
 
				++	if (lane->priv->xtal_is_40m)
			
 
				++		data = REF_FREF_SEL_SERDES_50MHZ;
			
 
				++	else
			
 
				++		data = REF_FREF_SEL_SERDES_25MHZ;
			
 
				++
			
 
				++	mask = REF_FREF_SEL_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_POWER_PLL_CTRL, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 10. Program COMPHY register PHY_GEN_MAX[1:0]
			
 
				++	 * This step is mentioned in the flow received from verification team.
			
 
				++	 * However the PHY_GEN_MAX value is only meaningful for other interfaces
			
 
				++	 * (not SERDES). For instance, it selects SATA speed 1.5/3/6 Gbps or
			
 
				++	 * PCIe speed 2.5/5 Gbps
			
 
				++	 */
			
 
				++
			
 
				++	/*
			
 
				++	 * 11. Program COMPHY register SEL_BITS to set correct parallel data
			
 
				++	 * bus width
			
 
				++	 */
			
 
				++	data = DATA_WIDTH_10BIT;
			
 
				++	mask = SEL_DATA_WIDTH_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_DIG_LOOPBACK_EN, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 12. As long as DFE function needs to be enabled in any mode,
			
 
				++	 * COMPHY register DFE_UPDATE_EN[5:0] shall be programmed to 0x3F
			
 
				++	 * for real chip during COMPHY power on.
			
 
				++	 * The value of the DFE_UPDATE_EN already is 0x3F, because it is the
			
 
				++	 * default value after reset of the PHY.
			
 
				++	 */
			
 
				++
			
 
				++	/*
			
 
				++	 * 13. Program COMPHY GEN registers.
			
 
				++	 * These registers should be programmed based on the lab testing result
			
 
				++	 * to achieve optimal performance. Please contact the CEA group to get
			
 
				++	 * the related GEN table during real chip bring-up. We only required to
			
 
				++	 * run though the entire registers programming flow defined by
			
 
				++	 * "comphy_gbe_phy_init" when the REF clock is 40 MHz. For REF clock
			
 
				++	 * 25 MHz the default values stored in PHY registers are OK.
			
 
				++	 */
			
 
				++	dev_dbg(lane->dev, "Running C-DPI phy init %s mode\n",
			
 
				++		lane->submode == PHY_INTERFACE_MODE_2500BASEX ? "2G5" : "1G");
			
 
				++	if (lane->priv->xtal_is_40m)
			
 
				++		comphy_gbe_phy_init(lane,
			
 
				++				    lane->submode != PHY_INTERFACE_MODE_2500BASEX);
			
 
				++
			
 
				++	/*
			
 
				++	 * 14. Check the PHY Polarity invert bit
			
 
				++	 */
			
 
				++	data = 0x0;
			
 
				++	if (lane->invert_tx)
			
 
				++		data |= TXD_INVERT_BIT;
			
 
				++	if (lane->invert_rx)
			
 
				++		data |= RXD_INVERT_BIT;
			
 
				++	mask = TXD_INVERT_BIT | RXD_INVERT_BIT;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_SYNC_PATTERN, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 15. Set PHY input ports PIN_PU_PLL, PIN_PU_TX and PIN_PU_RX to 1 to
			
 
				++	 * start PHY power up sequence. All the PHY register programming should
			
 
				++	 * be done before PIN_PU_PLL=1. There should be no register programming
			
 
				++	 * for normal PHY operation from this point.
			
 
				++	 */
			
 
				++	data = PIN_PU_PLL_BIT | PIN_PU_RX_BIT | PIN_PU_TX_BIT;
			
 
				++	mask = data;
			
 
				++	comphy_periph_reg_set(lane, COMPHY_PHY_CFG1, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 16. Wait for PHY power up sequence to finish by checking output ports
			
 
				++	 * PIN_PLL_READY_TX=1 and PIN_PLL_READY_RX=1.
			
 
				++	 */
			
 
				++	ret = comphy_periph_reg_poll(lane, COMPHY_PHY_STAT1,
			
 
				++				     PHY_PLL_READY_TX_BIT |
			
 
				++				     PHY_PLL_READY_RX_BIT,
			
 
				++				     COMPHY_PLL_SLEEP, COMPHY_PLL_TIMEOUT);
			
 
				++	if (ret) {
			
 
				++		dev_err(lane->dev, "Failed to lock PLL for SERDES PHY %d\n",
			
 
				++			lane->id);
			
 
				++		return ret;
			
 
				++	}
			
 
				++
			
 
				++	/*
			
 
				++	 * 17. Set COMPHY input port PIN_TX_IDLE=0
			
 
				++	 */
			
 
				++	comphy_periph_reg_set(lane, COMPHY_PHY_CFG1, 0x0, PIN_TX_IDLE_BIT);
			
 
				++
			
 
				++	/*
			
 
				++	 * 18. After valid data appear on PIN_RXDATA bus, set PIN_RX_INIT=1. To
			
 
				++	 * start RX initialization. PIN_RX_INIT_DONE will be cleared to 0 by the
			
 
				++	 * PHY After RX initialization is done, PIN_RX_INIT_DONE will be set to
			
 
				++	 * 1 by COMPHY Set PIN_RX_INIT=0 after PIN_RX_INIT_DONE= 1. Please
			
 
				++	 * refer to RX initialization part for details.
			
 
				++	 */
			
 
				++	comphy_periph_reg_set(lane, COMPHY_PHY_CFG1,
			
 
				++			      PHY_RX_INIT_BIT, PHY_RX_INIT_BIT);
			
 
				++
			
 
				++	ret = comphy_periph_reg_poll(lane, COMPHY_PHY_STAT1,
			
 
				++				     PHY_PLL_READY_TX_BIT |
			
 
				++				     PHY_PLL_READY_RX_BIT,
			
 
				++				     COMPHY_PLL_SLEEP, COMPHY_PLL_TIMEOUT);
			
 
				++	if (ret) {
			
 
				++		dev_err(lane->dev, "Failed to lock PLL for SERDES PHY %d\n",
			
 
				++			lane->id);
			
 
				++		return ret;
			
 
				++	}
			
 
				++
			
 
				++	ret = comphy_periph_reg_poll(lane, COMPHY_PHY_STAT1,
			
 
				++				     PHY_RX_INIT_DONE_BIT,
			
 
				++				     COMPHY_PLL_SLEEP, COMPHY_PLL_TIMEOUT);
			
 
				++	if (ret)
			
 
				++		dev_err(lane->dev, "Failed to init RX of SERDES PHY %d\n",
			
 
				++			lane->id);
			
 
				++
			
 
				++	return ret;
			
 
				+ }
			
 
				+ 
			
 
				+-static int mvebu_a3700_comphy_get_fw_mode(int lane,
			
 
				++static int
			
 
				++mvebu_a3700_comphy_usb3_power_on(struct mvebu_a3700_comphy_lane *lane)
			
 
				++{
			
 
				++	u32 mask, data, cfg, ref_clk;
			
 
				++	int ret;
			
 
				++
			
 
				++	/* Set phy seclector */
			
 
				++	ret = mvebu_a3700_comphy_set_phy_selector(lane);
			
 
				++	if (ret)
			
 
				++		return ret;
			
 
				++
			
 
				++	/*
			
 
				++	 * 0. Set PHY OTG Control(0x5d034), bit 4, Power up OTG module The
			
 
				++	 * register belong to UTMI module, so it is set in UTMI phy driver.
			
 
				++	 */
			
 
				++
			
 
				++	/*
			
 
				++	 * 1. Set PRD_TXDEEMPH (3.5db de-emph)
			
 
				++	 */
			
 
				++	data = PRD_TXDEEMPH0_MASK;
			
 
				++	mask = PRD_TXDEEMPH0_MASK | PRD_TXMARGIN_MASK | PRD_TXSWING_MASK |
			
 
				++	       CFG_TX_ALIGN_POS_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_LANE_CFG0, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 2. Set BIT0: enable transmitter in high impedance mode
			
 
				++	 *    Set BIT[3:4]: delay 2 clock cycles for HiZ off latency
			
 
				++	 *    Set BIT6: Tx detect Rx at HiZ mode
			
 
				++	 *    Unset BIT15: set to 0 to set USB3 De-emphasize level to -3.5db
			
 
				++	 *            together with bit 0 of COMPHY_PIPE_LANE_CFG0 register
			
 
				++	 */
			
 
				++	data = TX_DET_RX_MODE | GEN2_TX_DATA_DLY_DEFT | TX_ELEC_IDLE_MODE_EN;
			
 
				++	mask = PRD_TXDEEMPH1_MASK | TX_DET_RX_MODE | GEN2_TX_DATA_DLY_MASK |
			
 
				++	       TX_ELEC_IDLE_MODE_EN;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_LANE_CFG1, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 3. Set Spread Spectrum Clock Enabled
			
 
				++	 */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_LANE_CFG4,
			
 
				++			    SPREAD_SPECTRUM_CLK_EN, SPREAD_SPECTRUM_CLK_EN);
			
 
				++
			
 
				++	/*
			
 
				++	 * 4. Set Override Margining Controls From the MAC:
			
 
				++	 *    Use margining signals from lane configuration
			
 
				++	 */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_TEST_MODE_CTRL,
			
 
				++			    MODE_MARGIN_OVERRIDE, 0xFFFF);
			
 
				++
			
 
				++	/*
			
 
				++	 * 5. Set Lane-to-Lane Bundle Clock Sampling Period = per PCLK cycles
			
 
				++	 *    set Mode Clock Source = PCLK is generated from REFCLK
			
 
				++	 */
			
 
				++	data = 0x0;
			
 
				++	mask = MODE_CLK_SRC | BUNDLE_PERIOD_SEL | BUNDLE_PERIOD_SCALE_MASK |
			
 
				++	       BUNDLE_SAMPLE_CTRL | PLL_READY_DLY_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_CLK_SRC_LO, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 6. Set G2 Spread Spectrum Clock Amplitude at 4K
			
 
				++	 */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_GEN2_SET2,
			
 
				++			    GS2_TX_SSC_AMP_4128, GS2_TX_SSC_AMP_MASK);
			
 
				++
			
 
				++	/*
			
 
				++	 * 7. Unset G3 Spread Spectrum Clock Amplitude
			
 
				++	 *    set G3 TX and RX Register Master Current Select
			
 
				++	 */
			
 
				++	data = GS2_VREG_RXTX_MAS_ISET_60U;
			
 
				++	mask = GS2_TX_SSC_AMP_MASK | GS2_VREG_RXTX_MAS_ISET_MASK |
			
 
				++	       GS2_RSVD_6_0_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_GEN3_SET2, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 8. Check crystal jumper setting and program the Power and PLL Control
			
 
				++	 * accordingly Change RX wait
			
 
				++	 */
			
 
				++	if (lane->priv->xtal_is_40m) {
			
 
				++		ref_clk = REF_FREF_SEL_PCIE_USB3_40MHZ;
			
 
				++		cfg = CFG_PM_RXDLOZ_WAIT_12_UNIT;
			
 
				++	} else {
			
 
				++		ref_clk = REF_FREF_SEL_PCIE_USB3_25MHZ;
			
 
				++		cfg = CFG_PM_RXDLOZ_WAIT_7_UNIT;
			
 
				++	}
			
 
				++
			
 
				++	data = PU_IVREF_BIT | PU_PLL_BIT | PU_RX_BIT | PU_TX_BIT |
			
 
				++	       PU_TX_INTP_BIT | PU_DFE_BIT | COMPHY_MODE_USB3 | ref_clk;
			
 
				++	mask = PU_IVREF_BIT | PU_PLL_BIT | PU_RX_BIT | PU_TX_BIT |
			
 
				++	       PU_TX_INTP_BIT | PU_DFE_BIT | PLL_LOCK_BIT | COMPHY_MODE_MASK |
			
 
				++	       REF_FREF_SEL_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_POWER_PLL_CTRL, data, mask);
			
 
				++
			
 
				++	data = CFG_PM_RXDEN_WAIT_1_UNIT | cfg;
			
 
				++	mask = CFG_PM_OSCCLK_WAIT_MASK | CFG_PM_RXDEN_WAIT_MASK |
			
 
				++	       CFG_PM_RXDLOZ_WAIT_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_PWR_MGM_TIM1, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 9. Enable idle sync
			
 
				++	 */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_IDLE_SYNC_EN,
			
 
				++			    IDLE_SYNC_EN, IDLE_SYNC_EN);
			
 
				++
			
 
				++	/*
			
 
				++	 * 10. Enable the output of 500M clock
			
 
				++	 */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_MISC_CTRL0, CLK500M_EN, CLK500M_EN);
			
 
				++
			
 
				++	/*
			
 
				++	 * 11. Set 20-bit data width
			
 
				++	 */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_DIG_LOOPBACK_EN,
			
 
				++			    DATA_WIDTH_20BIT, 0xFFFF);
			
 
				++
			
 
				++	/*
			
 
				++	 * 12. Override Speed_PLL value and use MAC PLL
			
 
				++	 */
			
 
				++	data = SPEED_PLL_VALUE_16 | USE_MAX_PLL_RATE_BIT;
			
 
				++	mask = 0xFFFF;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_KVCO_CAL_CTRL, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 13. Check the Polarity invert bit
			
 
				++	 */
			
 
				++	data = 0x0;
			
 
				++	if (lane->invert_tx)
			
 
				++		data |= TXD_INVERT_BIT;
			
 
				++	if (lane->invert_rx)
			
 
				++		data |= RXD_INVERT_BIT;
			
 
				++	mask = TXD_INVERT_BIT | RXD_INVERT_BIT;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_SYNC_PATTERN, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 14. Set max speed generation to USB3.0 5Gbps
			
 
				++	 */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_SYNC_MASK_GEN,
			
 
				++			    PHY_GEN_MAX_USB3_5G, PHY_GEN_MAX_MASK);
			
 
				++
			
 
				++	/*
			
 
				++	 * 15. Set capacitor value for FFE gain peaking to 0xF
			
 
				++	 */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_GEN2_SET3,
			
 
				++			    GS3_FFE_CAP_SEL_VALUE, GS3_FFE_CAP_SEL_MASK);
			
 
				++
			
 
				++	/*
			
 
				++	 * 16. Release SW reset
			
 
				++	 */
			
 
				++	data = MODE_CORE_CLK_FREQ_SEL | MODE_PIPE_WIDTH_32 | MODE_REFDIV_BY_4;
			
 
				++	mask = 0xFFFF;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_RST_CLK_CTRL, data, mask);
			
 
				++
			
 
				++	/* Wait for > 55 us to allow PCLK be enabled */
			
 
				++	udelay(PLL_SET_DELAY_US);
			
 
				++
			
 
				++	ret = comphy_lane_reg_poll(lane, COMPHY_PIPE_LANE_STAT1, TXDCLK_PCLK_EN,
			
 
				++				   COMPHY_PLL_SLEEP, COMPHY_PLL_TIMEOUT);
			
 
				++	if (ret)
			
 
				++		dev_err(lane->dev, "Failed to lock USB3 PLL\n");
			
 
				++
			
 
				++	return ret;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mvebu_a3700_comphy_pcie_power_on(struct mvebu_a3700_comphy_lane *lane)
			
 
				++{
			
 
				++	u32 mask, data, ref_clk;
			
 
				++	int ret;
			
 
				++
			
 
				++	/* Configure phy selector for PCIe */
			
 
				++	ret = mvebu_a3700_comphy_set_phy_selector(lane);
			
 
				++	if (ret)
			
 
				++		return ret;
			
 
				++
			
 
				++	/* 1. Enable max PLL. */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_LANE_CFG1,
			
 
				++			    USE_MAX_PLL_RATE_EN, USE_MAX_PLL_RATE_EN);
			
 
				++
			
 
				++	/* 2. Select 20 bit SERDES interface. */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_CLK_SRC_LO,
			
 
				++			    CFG_SEL_20B, CFG_SEL_20B);
			
 
				++
			
 
				++	/* 3. Force to use reg setting for PCIe mode */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_MISC_CTRL1,
			
 
				++			    SEL_BITS_PCIE_FORCE, SEL_BITS_PCIE_FORCE);
			
 
				++
			
 
				++	/* 4. Change RX wait */
			
 
				++	data = CFG_PM_RXDEN_WAIT_1_UNIT | CFG_PM_RXDLOZ_WAIT_12_UNIT;
			
 
				++	mask = CFG_PM_OSCCLK_WAIT_MASK | CFG_PM_RXDEN_WAIT_MASK |
			
 
				++	       CFG_PM_RXDLOZ_WAIT_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_PWR_MGM_TIM1, data, mask);
			
 
				++
			
 
				++	/* 5. Enable idle sync */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_IDLE_SYNC_EN,
			
 
				++			    IDLE_SYNC_EN, IDLE_SYNC_EN);
			
 
				++
			
 
				++	/* 6. Enable the output of 100M/125M/500M clock */
			
 
				++	data = CLK500M_EN | TXDCLK_2X_SEL | CLK100M_125M_EN;
			
 
				++	mask = data;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_MISC_CTRL0, data, mask);
			
 
				++
			
 
				++	/*
			
 
				++	 * 7. Enable TX, PCIE global register, 0xd0074814, it is done in
			
 
				++	 * PCI-E driver
			
 
				++	 */
			
 
				++
			
 
				++	/*
			
 
				++	 * 8. Check crystal jumper setting and program the Power and PLL
			
 
				++	 * Control accordingly
			
 
				++	 */
			
 
				++
			
 
				++	if (lane->priv->xtal_is_40m)
			
 
				++		ref_clk = REF_FREF_SEL_PCIE_USB3_40MHZ;
			
 
				++	else
			
 
				++		ref_clk = REF_FREF_SEL_PCIE_USB3_25MHZ;
			
 
				++
			
 
				++	data = PU_IVREF_BIT | PU_PLL_BIT | PU_RX_BIT | PU_TX_BIT |
			
 
				++	       PU_TX_INTP_BIT | PU_DFE_BIT | COMPHY_MODE_PCIE | ref_clk;
			
 
				++	mask = 0xFFFF;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_POWER_PLL_CTRL, data, mask);
			
 
				++
			
 
				++	/* 9. Override Speed_PLL value and use MAC PLL */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_KVCO_CAL_CTRL,
			
 
				++			    SPEED_PLL_VALUE_16 | USE_MAX_PLL_RATE_BIT,
			
 
				++			    0xFFFF);
			
 
				++
			
 
				++	/* 10. Check the Polarity invert bit */
			
 
				++	data = 0x0;
			
 
				++	if (lane->invert_tx)
			
 
				++		data |= TXD_INVERT_BIT;
			
 
				++	if (lane->invert_rx)
			
 
				++		data |= RXD_INVERT_BIT;
			
 
				++	mask = TXD_INVERT_BIT | RXD_INVERT_BIT;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_SYNC_PATTERN, data, mask);
			
 
				++
			
 
				++	/* 11. Release SW reset */
			
 
				++	data = MODE_CORE_CLK_FREQ_SEL | MODE_PIPE_WIDTH_32;
			
 
				++	mask = data | PIPE_SOFT_RESET | MODE_REFDIV_MASK;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_RST_CLK_CTRL, data, mask);
			
 
				++
			
 
				++	/* Wait for > 55 us to allow PCLK be enabled */
			
 
				++	udelay(PLL_SET_DELAY_US);
			
 
				++
			
 
				++	ret = comphy_lane_reg_poll(lane, COMPHY_PIPE_LANE_STAT1, TXDCLK_PCLK_EN,
			
 
				++				   COMPHY_PLL_SLEEP, COMPHY_PLL_TIMEOUT);
			
 
				++	if (ret)
			
 
				++		dev_err(lane->dev, "Failed to lock PCIE PLL\n");
			
 
				++
			
 
				++	return ret;
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mvebu_a3700_comphy_sata_power_off(struct mvebu_a3700_comphy_lane *lane)
			
 
				++{
			
 
				++	/* Set phy isolation mode */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_ISOLATION_CTRL,
			
 
				++			    PHY_ISOLATE_MODE, PHY_ISOLATE_MODE);
			
 
				++
			
 
				++	/* Power off PLL, Tx, Rx */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_POWER_PLL_CTRL,
			
 
				++			    0x0, PU_PLL_BIT | PU_RX_BIT | PU_TX_BIT);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mvebu_a3700_comphy_ethernet_power_off(struct mvebu_a3700_comphy_lane *lane)
			
 
				++{
			
 
				++	u32 mask, data;
			
 
				++
			
 
				++	data = PIN_RESET_CORE_BIT | PIN_RESET_COMPHY_BIT | PIN_PU_IVREF_BIT |
			
 
				++	       PHY_RX_INIT_BIT;
			
 
				++	mask = data;
			
 
				++	comphy_periph_reg_set(lane, COMPHY_PHY_CFG1, data, mask);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mvebu_a3700_comphy_pcie_power_off(struct mvebu_a3700_comphy_lane *lane)
			
 
				++{
			
 
				++	/* Power off PLL, Tx, Rx */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_POWER_PLL_CTRL,
			
 
				++			    0x0, PU_PLL_BIT | PU_RX_BIT | PU_TX_BIT);
			
 
				++}
			
 
				++
			
 
				++static int mvebu_a3700_comphy_reset(struct phy *phy)
			
 
				++{
			
 
				++	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
			
 
				++	u16 mask, data;
			
 
				++
			
 
				++	dev_dbg(lane->dev, "resetting lane %d\n", lane->id);
			
 
				++
			
 
				++	/* COMPHY reset for internal logic */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_SFT_RESET,
			
 
				++			    SFT_RST_NO_REG, SFT_RST_NO_REG);
			
 
				++
			
 
				++	/* COMPHY register reset (cleared automatically) */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_SFT_RESET, SFT_RST, SFT_RST);
			
 
				++
			
 
				++	/* PIPE soft and register reset */
			
 
				++	data = PIPE_SOFT_RESET | PIPE_REG_RESET;
			
 
				++	mask = data;
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_RST_CLK_CTRL, data, mask);
			
 
				++
			
 
				++	/* Release PIPE register reset */
			
 
				++	comphy_lane_reg_set(lane, COMPHY_PIPE_RST_CLK_CTRL,
			
 
				++			    0x0, PIPE_REG_RESET);
			
 
				++
			
 
				++	/* Reset SB configuration register (only for lanes 0 and 1) */
			
 
				++	if (lane->id == 0 || lane->id == 1) {
			
 
				++		u32 mask, data;
			
 
				++
			
 
				++		data = PIN_RESET_CORE_BIT | PIN_RESET_COMPHY_BIT |
			
 
				++		       PIN_PU_PLL_BIT | PIN_PU_RX_BIT | PIN_PU_TX_BIT;
			
 
				++		mask = data | PIN_PU_IVREF_BIT | PIN_TX_IDLE_BIT;
			
 
				++		comphy_periph_reg_set(lane, COMPHY_PHY_CFG1, data, mask);
			
 
				++	}
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static bool mvebu_a3700_comphy_check_mode(int lane,
			
 
				+ 					  enum phy_mode mode,
			
 
				+ 					  int submode)
			
 
				+ {
			
 
				+@@ -122,7 +1141,7 @@ static int mvebu_a3700_comphy_get_fw_mod
			
 
				+ 
			
 
				+ 	/* Unused PHY mux value is 0x0 */
			
 
				+ 	if (mode == PHY_MODE_INVALID)
			
 
				+-		return -EINVAL;
			
 
				++		return false;
			
 
				+ 
			
 
				+ 	for (i = 0; i < n; i++) {
			
 
				+ 		if (mvebu_a3700_comphy_modes[i].lane == lane &&
			
 
				+@@ -132,27 +1151,30 @@ static int mvebu_a3700_comphy_get_fw_mod
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	if (i == n)
			
 
				+-		return -EINVAL;
			
 
				++		return false;
			
 
				+ 
			
 
				+-	return mvebu_a3700_comphy_modes[i].fw_mode;
			
 
				++	return true;
			
 
				+ }
			
 
				+ 
			
 
				+ static int mvebu_a3700_comphy_set_mode(struct phy *phy, enum phy_mode mode,
			
 
				+ 				       int submode)
			
 
				+ {
			
 
				+ 	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
			
 
				+-	int fw_mode;
			
 
				+ 
			
 
				+-	if (submode == PHY_INTERFACE_MODE_1000BASEX)
			
 
				+-		submode = PHY_INTERFACE_MODE_SGMII;
			
 
				+-
			
 
				+-	fw_mode = mvebu_a3700_comphy_get_fw_mode(lane->id, mode,
			
 
				+-						 submode);
			
 
				+-	if (fw_mode < 0) {
			
 
				++	if (!mvebu_a3700_comphy_check_mode(lane->id, mode, submode)) {
			
 
				+ 		dev_err(lane->dev, "invalid COMPHY mode\n");
			
 
				+-		return fw_mode;
			
 
				++		return -EINVAL;
			
 
				+ 	}
			
 
				+ 
			
 
				++	/* Mode cannot be changed while the PHY is powered on */
			
 
				++	if (phy->power_count &&
			
 
				++	    (lane->mode != mode || lane->submode != submode))
			
 
				++		return -EBUSY;
			
 
				++
			
 
				++	/* If changing mode, ensure reset is called */
			
 
				++	if (lane->mode != PHY_MODE_INVALID && lane->mode != mode)
			
 
				++		lane->needs_reset = true;
			
 
				++
			
 
				+ 	/* Just remember the mode, ->power_on() will do the real setup */
			
 
				+ 	lane->mode = mode;
			
 
				+ 	lane->submode = submode;
			
 
				+@@ -163,76 +1185,77 @@ static int mvebu_a3700_comphy_set_mode(s
			
 
				+ static int mvebu_a3700_comphy_power_on(struct phy *phy)
			
 
				+ {
			
 
				+ 	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
			
 
				+-	u32 fw_param;
			
 
				+-	int fw_mode;
			
 
				+-	int fw_port;
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+-	fw_mode = mvebu_a3700_comphy_get_fw_mode(lane->id,
			
 
				+-						 lane->mode, lane->submode);
			
 
				+-	if (fw_mode < 0) {
			
 
				++	if (!mvebu_a3700_comphy_check_mode(lane->id, lane->mode,
			
 
				++					   lane->submode)) {
			
 
				+ 		dev_err(lane->dev, "invalid COMPHY mode\n");
			
 
				+-		return fw_mode;
			
 
				++		return -EINVAL;
			
 
				++	}
			
 
				++
			
 
				++	if (lane->needs_reset) {
			
 
				++		ret = mvebu_a3700_comphy_reset(phy);
			
 
				++		if (ret)
			
 
				++			return ret;
			
 
				++
			
 
				++		lane->needs_reset = false;
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	switch (lane->mode) {
			
 
				+ 	case PHY_MODE_USB_HOST_SS:
			
 
				+ 		dev_dbg(lane->dev, "set lane %d to USB3 host mode\n", lane->id);
			
 
				+-		fw_param = COMPHY_FW_MODE(fw_mode);
			
 
				+-		break;
			
 
				++		return mvebu_a3700_comphy_usb3_power_on(lane);
			
 
				+ 	case PHY_MODE_SATA:
			
 
				+ 		dev_dbg(lane->dev, "set lane %d to SATA mode\n", lane->id);
			
 
				+-		fw_param = COMPHY_FW_MODE(fw_mode);
			
 
				+-		break;
			
 
				++		return mvebu_a3700_comphy_sata_power_on(lane);
			
 
				+ 	case PHY_MODE_ETHERNET:
			
 
				+-		fw_port = (lane->id == 0) ? 1 : 0;
			
 
				+-		switch (lane->submode) {
			
 
				+-		case PHY_INTERFACE_MODE_SGMII:
			
 
				+-			dev_dbg(lane->dev, "set lane %d to SGMII mode\n",
			
 
				+-				lane->id);
			
 
				+-			fw_param = COMPHY_FW_NET(fw_mode, fw_port,
			
 
				+-						 COMPHY_FW_SPEED_1_25G);
			
 
				+-			break;
			
 
				+-		case PHY_INTERFACE_MODE_2500BASEX:
			
 
				+-			dev_dbg(lane->dev, "set lane %d to 2500BASEX mode\n",
			
 
				+-				lane->id);
			
 
				+-			fw_param = COMPHY_FW_NET(fw_mode, fw_port,
			
 
				+-						 COMPHY_FW_SPEED_3_125G);
			
 
				+-			break;
			
 
				+-		default:
			
 
				+-			dev_err(lane->dev, "unsupported PHY submode (%d)\n",
			
 
				+-				lane->submode);
			
 
				+-			return -ENOTSUPP;
			
 
				+-		}
			
 
				+-		break;
			
 
				++		dev_dbg(lane->dev, "set lane %d to Ethernet mode\n", lane->id);
			
 
				++		return mvebu_a3700_comphy_ethernet_power_on(lane);
			
 
				+ 	case PHY_MODE_PCIE:
			
 
				+ 		dev_dbg(lane->dev, "set lane %d to PCIe mode\n", lane->id);
			
 
				+-		fw_param = COMPHY_FW_PCIE(fw_mode, COMPHY_FW_SPEED_5G,
			
 
				+-					  phy->attrs.bus_width);
			
 
				+-		break;
			
 
				++		return mvebu_a3700_comphy_pcie_power_on(lane);
			
 
				+ 	default:
			
 
				+ 		dev_err(lane->dev, "unsupported PHY mode (%d)\n", lane->mode);
			
 
				+-		return -ENOTSUPP;
			
 
				++		return -EOPNOTSUPP;
			
 
				+ 	}
			
 
				+-
			
 
				+-	ret = mvebu_a3700_comphy_smc(COMPHY_SIP_POWER_ON, lane->id, fw_param);
			
 
				+-	if (ret == -EOPNOTSUPP)
			
 
				+-		dev_err(lane->dev,
			
 
				+-			"unsupported SMC call, try updating your firmware\n");
			
 
				+-
			
 
				+-	return ret;
			
 
				+ }
			
 
				+ 
			
 
				+ static int mvebu_a3700_comphy_power_off(struct phy *phy)
			
 
				+ {
			
 
				+ 	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
			
 
				+ 
			
 
				+-	return mvebu_a3700_comphy_smc(COMPHY_SIP_POWER_OFF, lane->id, 0);
			
 
				++	switch (lane->mode) {
			
 
				++	case PHY_MODE_USB_HOST_SS:
			
 
				++		/*
			
 
				++		 * The USB3 MAC sets the USB3 PHY to low state, so we do not
			
 
				++		 * need to power off USB3 PHY again.
			
 
				++		 */
			
 
				++		break;
			
 
				++
			
 
				++	case PHY_MODE_SATA:
			
 
				++		mvebu_a3700_comphy_sata_power_off(lane);
			
 
				++		break;
			
 
				++
			
 
				++	case PHY_MODE_ETHERNET:
			
 
				++		mvebu_a3700_comphy_ethernet_power_off(lane);
			
 
				++		break;
			
 
				++
			
 
				++	case PHY_MODE_PCIE:
			
 
				++		mvebu_a3700_comphy_pcie_power_off(lane);
			
 
				++		break;
			
 
				++
			
 
				++	default:
			
 
				++		dev_err(lane->dev, "invalid COMPHY mode\n");
			
 
				++		return -EINVAL;
			
 
				++	}
			
 
				++
			
 
				++	return 0;
			
 
				+ }
			
 
				+ 
			
 
				+ static const struct phy_ops mvebu_a3700_comphy_ops = {
			
 
				+ 	.power_on	= mvebu_a3700_comphy_power_on,
			
 
				+ 	.power_off	= mvebu_a3700_comphy_power_off,
			
 
				++	.reset		= mvebu_a3700_comphy_reset,
			
 
				+ 	.set_mode	= mvebu_a3700_comphy_set_mode,
			
 
				+ 	.owner		= THIS_MODULE,
			
 
				+ };
			
 
				+@@ -256,13 +1279,75 @@ static struct phy *mvebu_a3700_comphy_xl
			
 
				+ 		return ERR_PTR(-EINVAL);
			
 
				+ 	}
			
 
				+ 
			
 
				++	lane->invert_tx = args->args[1] & BIT(0);
			
 
				++	lane->invert_rx = args->args[1] & BIT(1);
			
 
				++
			
 
				+ 	return phy;
			
 
				+ }
			
 
				+ 
			
 
				+ static int mvebu_a3700_comphy_probe(struct platform_device *pdev)
			
 
				+ {
			
 
				++	struct mvebu_a3700_comphy_priv *priv;
			
 
				+ 	struct phy_provider *provider;
			
 
				+ 	struct device_node *child;
			
 
				++	struct resource *res;
			
 
				++	struct clk *clk;
			
 
				++	int ret;
			
 
				++
			
 
				++	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
			
 
				++	if (!priv)
			
 
				++		return -ENOMEM;
			
 
				++
			
 
				++	spin_lock_init(&priv->lock);
			
 
				++
			
 
				++	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "comphy");
			
 
				++	priv->comphy_regs = devm_ioremap_resource(&pdev->dev, res);
			
 
				++	if (IS_ERR(priv->comphy_regs))
			
 
				++		return PTR_ERR(priv->comphy_regs);
			
 
				++
			
 
				++	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
			
 
				++					   "lane1_pcie_gbe");
			
 
				++	priv->lane1_phy_regs = devm_ioremap_resource(&pdev->dev, res);
			
 
				++	if (IS_ERR(priv->lane1_phy_regs))
			
 
				++		return PTR_ERR(priv->lane1_phy_regs);
			
 
				++
			
 
				++	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
			
 
				++					   "lane0_usb3_gbe");
			
 
				++	priv->lane0_phy_regs = devm_ioremap_resource(&pdev->dev, res);
			
 
				++	if (IS_ERR(priv->lane0_phy_regs))
			
 
				++		return PTR_ERR(priv->lane0_phy_regs);
			
 
				++
			
 
				++	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
			
 
				++					   "lane2_sata_usb3");
			
 
				++	priv->lane2_phy_indirect = devm_ioremap_resource(&pdev->dev, res);
			
 
				++	if (IS_ERR(priv->lane2_phy_indirect))
			
 
				++		return PTR_ERR(priv->lane2_phy_indirect);
			
 
				++
			
 
				++	/*
			
 
				++	 * Driver needs to know if reference xtal clock is 40MHz or 25MHz.
			
 
				++	 * Old DT bindings do not have xtal clk present. So do not fail here
			
 
				++	 * and expects that default 25MHz reference clock is used.
			
 
				++	 */
			
 
				++	clk = clk_get(&pdev->dev, "xtal");
			
 
				++	if (IS_ERR(clk)) {
			
 
				++		if (PTR_ERR(clk) == -EPROBE_DEFER)
			
 
				++			return -EPROBE_DEFER;
			
 
				++		dev_warn(&pdev->dev, "missing 'xtal' clk (%ld)\n",
			
 
				++			 PTR_ERR(clk));
			
 
				++	} else {
			
 
				++		ret = clk_prepare_enable(clk);
			
 
				++		if (ret) {
			
 
				++			dev_warn(&pdev->dev, "enabling xtal clk failed (%d)\n",
			
 
				++				 ret);
			
 
				++		} else {
			
 
				++			if (clk_get_rate(clk) == 40000000)
			
 
				++				priv->xtal_is_40m = true;
			
 
				++			clk_disable_unprepare(clk);
			
 
				++		}
			
 
				++		clk_put(clk);
			
 
				++	}
			
 
				++
			
 
				++	dev_set_drvdata(&pdev->dev, priv);
			
 
				+ 
			
 
				+ 	for_each_available_child_of_node(pdev->dev.of_node, child) {
			
 
				+ 		struct mvebu_a3700_comphy_lane *lane;
			
 
				+@@ -277,7 +1362,7 @@ static int mvebu_a3700_comphy_probe(stru
			
 
				+ 			continue;
			
 
				+ 		}
			
 
				+ 
			
 
				+-		if (lane_id >= MVEBU_A3700_COMPHY_LANES) {
			
 
				++		if (lane_id >= 3) {
			
 
				+ 			dev_err(&pdev->dev, "invalid 'reg' property\n");
			
 
				+ 			continue;
			
 
				+ 		}
			
 
				+@@ -295,15 +1380,26 @@ static int mvebu_a3700_comphy_probe(stru
			
 
				+ 			return PTR_ERR(phy);
			
 
				+ 		}
			
 
				+ 
			
 
				++		lane->priv = priv;
			
 
				+ 		lane->dev = &pdev->dev;
			
 
				+ 		lane->mode = PHY_MODE_INVALID;
			
 
				+ 		lane->submode = PHY_INTERFACE_MODE_NA;
			
 
				+ 		lane->id = lane_id;
			
 
				++		lane->invert_tx = false;
			
 
				++		lane->invert_rx = false;
			
 
				+ 		phy_set_drvdata(phy, lane);
			
 
				++
			
 
				++		/*
			
 
				++		 * To avoid relying on the bootloader/firmware configuration,
			
 
				++		 * power off all comphys.
			
 
				++		 */
			
 
				++		mvebu_a3700_comphy_reset(phy);
			
 
				++		lane->needs_reset = false;
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	provider = devm_of_phy_provider_register(&pdev->dev,
			
 
				+ 						 mvebu_a3700_comphy_xlate);
			
 
				++
			
 
				+ 	return PTR_ERR_OR_ZERO(provider);
			
 
				+ }
			
 
				+ 
			
 
				+@@ -323,5 +1419,7 @@ static struct platform_driver mvebu_a370
			
 
				+ module_platform_driver(mvebu_a3700_comphy_driver);
			
 
				+ 
			
 
				+ MODULE_AUTHOR("Miquèl Raynal <[email protected]>");
			
 
				++MODULE_AUTHOR("Pali Rohár <[email protected]>");
			
 
				++MODULE_AUTHOR("Marek Behún <[email protected]>");
			
 
				+ MODULE_DESCRIPTION("Common PHY driver for A3700");
			
 
				+ MODULE_LICENSE("GPL v2");
			
--- a/target/linux/generic/backport-6.1/345-v5.17-arm64-dts-marvell-armada-37xx-Add-xtal-clock-to-comp.patch
+++ b/target/linux/generic/backport-6.1/345-v5.17-arm64-dts-marvell-armada-37xx-Add-xtal-clock-to-comp.patch
@@ -0,0 +1,32 @@
 
				+From 73a78b6130d9e13daca22b86ad52f063b9403e03 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <[email protected]>
			
 
				+Date: Wed, 8 Dec 2021 03:40:35 +0100
			
 
				+Subject: [PATCH 1/1] arm64: dts: marvell: armada-37xx: Add xtal clock to
			
 
				+ comphy node
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Kernel driver phy-mvebu-a3700-comphy.c needs to know the rate of the
			
 
				+reference xtal clock. So add missing xtal clock source into comphy device
			
 
				+tree node. If the property is not present, the driver defaults to 25 MHz
			
 
				+xtal rate (which, as far as we know, is used by all the existing boards).
			
 
				+
			
 
				+Signed-off-by: Pali Rohár <[email protected]>
			
 
				+Signed-off-by: Marek Behún <[email protected]>
			
 
				+Signed-off-by: Gregory CLEMENT <[email protected]>
			
 
				+---
			
 
				+ arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 2 ++
			
 
				+ 1 file changed, 2 insertions(+)
			
 
				+
			
 
				+--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
			
 
				++++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
			
 
				+@@ -265,6 +265,8 @@
			
 
				+ 					    "lane2_sata_usb3";
			
 
				+ 				#address-cells = <1>;
			
 
				+ 				#size-cells = <0>;
			
 
				++				clocks = <&xtalclk>;
			
 
				++				clock-names = "xtal";
			
 
				+ 
			
 
				+ 				comphy0: phy@0 {
			
 
				+ 					reg = <0>;
			
--- a/target/linux/generic/backport-6.1/346-v5.18-01-Revert-ata-ahci-mvebu-Make-SATA-PHY-optional-for-Arm.patch
+++ b/target/linux/generic/backport-6.1/346-v5.18-01-Revert-ata-ahci-mvebu-Make-SATA-PHY-optional-for-Arm.patch
@@ -0,0 +1,64 @@
 
				+From ee995101fde67f85a3cd4c74f4f92fc4592e726b Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <[email protected]>
			
 
				+Date: Thu, 3 Feb 2022 22:44:42 +0100
			
 
				+Subject: [PATCH 1/3] Revert "ata: ahci: mvebu: Make SATA PHY optional for
			
 
				+ Armada 3720"
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+This reverts commit 45aefe3d2251e4e229d7662052739f96ad1d08d9.
			
 
				+
			
 
				+Armada 3720 PHY driver (phy-mvebu-a3700-comphy.c) does not return
			
 
				+-EOPNOTSUPP from phy_power_on() callback anymore.
			
 
				+
			
 
				+So remove AHCI_HFLAG_IGN_NOTSUPP_POWER_ON flag from Armada 3720 plat data.
			
 
				+
			
 
				+AHCI_HFLAG_IGN_NOTSUPP_POWER_ON is not used by any other ahci driver, so
			
 
				+remove this flag completely.
			
 
				+
			
 
				+Signed-off-by: Pali Rohár <[email protected]>
			
 
				+Signed-off-by: Marek Behún <[email protected]>
			
 
				+Acked-by: Miquel Raynal <[email protected]>
			
 
				+Acked-by: Damien Le Moal <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Vinod Koul <[email protected]>
			
 
				+---
			
 
				+ drivers/ata/ahci.h             | 2 --
			
 
				+ drivers/ata/ahci_mvebu.c       | 2 +-
			
 
				+ drivers/ata/libahci_platform.c | 2 +-
			
 
				+ 3 files changed, 2 insertions(+), 4 deletions(-)
			
 
				+
			
 
				+--- a/drivers/ata/ahci.h
			
 
				++++ b/drivers/ata/ahci.h
			
 
				+@@ -240,8 +240,6 @@ enum {
			
 
				+ 							as default lpm_policy */
			
 
				+ 	AHCI_HFLAG_SUSPEND_PHYS		= (1 << 26), /* handle PHYs during
			
 
				+ 							suspend/resume */
			
 
				+-	AHCI_HFLAG_IGN_NOTSUPP_POWER_ON	= (1 << 27), /* ignore -EOPNOTSUPP
			
 
				+-							from phy_power_on() */
			
 
				+ 	AHCI_HFLAG_NO_SXS		= (1 << 28), /* SXS not supported */
			
 
				+ 
			
 
				+ 	/* ap->flags bits */
			
 
				+--- a/drivers/ata/ahci_mvebu.c
			
 
				++++ b/drivers/ata/ahci_mvebu.c
			
 
				+@@ -227,7 +227,7 @@ static const struct ahci_mvebu_plat_data
			
 
				+ 
			
 
				+ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = {
			
 
				+ 	.plat_config = ahci_mvebu_armada_3700_config,
			
 
				+-	.flags = AHCI_HFLAG_SUSPEND_PHYS | AHCI_HFLAG_IGN_NOTSUPP_POWER_ON,
			
 
				++	.flags = AHCI_HFLAG_SUSPEND_PHYS,
			
 
				+ };
			
 
				+ 
			
 
				+ static const struct of_device_id ahci_mvebu_of_match[] = {
			
 
				+--- a/drivers/ata/libahci_platform.c
			
 
				++++ b/drivers/ata/libahci_platform.c
			
 
				+@@ -59,7 +59,7 @@ int ahci_platform_enable_phys(struct ahc
			
 
				+ 		}
			
 
				+ 
			
 
				+ 		rc = phy_power_on(hpriv->phys[i]);
			
 
				+-		if (rc && !(rc == -EOPNOTSUPP && (hpriv->flags & AHCI_HFLAG_IGN_NOTSUPP_POWER_ON))) {
			
 
				++		if (rc) {
			
 
				+ 			phy_exit(hpriv->phys[i]);
			
 
				+ 			goto disable_phys;
			
 
				+ 		}
			
--- a/target/linux/generic/backport-6.1/346-v5.18-02-Revert-usb-host-xhci-mvebu-make-USB-3.0-PHY-optional.patch
+++ b/target/linux/generic/backport-6.1/346-v5.18-02-Revert-usb-host-xhci-mvebu-make-USB-3.0-PHY-optional.patch
@@ -0,0 +1,166 @@
 
				+From 8e10548f7f4814e530857d2049d6af6bc78add53 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <[email protected]>
			
 
				+Date: Thu, 3 Feb 2022 22:44:43 +0100
			
 
				+Subject: [PATCH 2/3] Revert "usb: host: xhci: mvebu: make USB 3.0 PHY optional
			
 
				+ for Armada 3720"
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+This reverts commit 3241929b67d28c83945d3191c6816a3271fd6b85.
			
 
				+
			
 
				+Armada 3720 phy driver (phy-mvebu-a3700-comphy.c) does not return
			
 
				+-EOPNOTSUPP from phy_power_on() callback anymore.
			
 
				+
			
 
				+So remove XHCI_SKIP_PHY_INIT flag from xhci_mvebu_a3700_plat_setup() and
			
 
				+then also whole xhci_mvebu_a3700_plat_setup() function which is there just
			
 
				+to handle -EOPNOTSUPP for XHCI_SKIP_PHY_INIT.
			
 
				+
			
 
				+xhci plat_setup callback is not used by any other xhci plat driver, so
			
 
				+remove this callback completely.
			
 
				+
			
 
				+Signed-off-by: Pali Rohár <[email protected]>
			
 
				+Signed-off-by: Marek Behún <[email protected]>
			
 
				+Acked-by: Miquel Raynal <[email protected]>
			
 
				+Acked-by: Greg Kroah-Hartman <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Vinod Koul <[email protected]>
			
 
				+---
			
 
				+ drivers/usb/host/xhci-mvebu.c | 42 -----------------------------------
			
 
				+ drivers/usb/host/xhci-mvebu.h |  6 -----
			
 
				+ drivers/usb/host/xhci-plat.c  | 20 +----------------
			
 
				+ drivers/usb/host/xhci-plat.h  |  1 -
			
 
				+ 4 files changed, 1 insertion(+), 68 deletions(-)
			
 
				+
			
 
				+--- a/drivers/usb/host/xhci-mvebu.c
			
 
				++++ b/drivers/usb/host/xhci-mvebu.c
			
 
				+@@ -8,7 +8,6 @@
			
 
				+ #include <linux/mbus.h>
			
 
				+ #include <linux/of.h>
			
 
				+ #include <linux/platform_device.h>
			
 
				+-#include <linux/phy/phy.h>
			
 
				+ 
			
 
				+ #include <linux/usb.h>
			
 
				+ #include <linux/usb/hcd.h>
			
 
				+@@ -74,47 +73,6 @@ int xhci_mvebu_mbus_init_quirk(struct us
			
 
				+ 
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+-
			
 
				+-int xhci_mvebu_a3700_plat_setup(struct usb_hcd *hcd)
			
 
				+-{
			
 
				+-	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
			
 
				+-	struct device *dev = hcd->self.controller;
			
 
				+-	struct phy *phy;
			
 
				+-	int ret;
			
 
				+-
			
 
				+-	/* Old bindings miss the PHY handle */
			
 
				+-	phy = of_phy_get(dev->of_node, "usb3-phy");
			
 
				+-	if (IS_ERR(phy) && PTR_ERR(phy) == -EPROBE_DEFER)
			
 
				+-		return -EPROBE_DEFER;
			
 
				+-	else if (IS_ERR(phy))
			
 
				+-		goto phy_out;
			
 
				+-
			
 
				+-	ret = phy_init(phy);
			
 
				+-	if (ret)
			
 
				+-		goto phy_put;
			
 
				+-
			
 
				+-	ret = phy_set_mode(phy, PHY_MODE_USB_HOST_SS);
			
 
				+-	if (ret)
			
 
				+-		goto phy_exit;
			
 
				+-
			
 
				+-	ret = phy_power_on(phy);
			
 
				+-	if (ret == -EOPNOTSUPP) {
			
 
				+-		/* Skip initializatin of XHCI PHY when it is unsupported by firmware */
			
 
				+-		dev_warn(dev, "PHY unsupported by firmware\n");
			
 
				+-		xhci->quirks |= XHCI_SKIP_PHY_INIT;
			
 
				+-	}
			
 
				+-	if (ret)
			
 
				+-		goto phy_exit;
			
 
				+-
			
 
				+-	phy_power_off(phy);
			
 
				+-phy_exit:
			
 
				+-	phy_exit(phy);
			
 
				+-phy_put:
			
 
				+-	of_phy_put(phy);
			
 
				+-phy_out:
			
 
				+-
			
 
				+-	return 0;
			
 
				+-}
			
 
				+ 
			
 
				+ int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd)
			
 
				+ {
			
 
				+--- a/drivers/usb/host/xhci-mvebu.h
			
 
				++++ b/drivers/usb/host/xhci-mvebu.h
			
 
				+@@ -12,18 +12,12 @@ struct usb_hcd;
			
 
				+ 
			
 
				+ #if IS_ENABLED(CONFIG_USB_XHCI_MVEBU)
			
 
				+ int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd);
			
 
				+-int xhci_mvebu_a3700_plat_setup(struct usb_hcd *hcd);
			
 
				+ int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd);
			
 
				+ #else
			
 
				+ static inline int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd)
			
 
				+ {
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+-
			
 
				+-static inline int xhci_mvebu_a3700_plat_setup(struct usb_hcd *hcd)
			
 
				+-{
			
 
				+-	return 0;
			
 
				+-}
			
 
				+ 
			
 
				+ static inline int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd)
			
 
				+ {
			
 
				+--- a/drivers/usb/host/xhci-plat.c
			
 
				++++ b/drivers/usb/host/xhci-plat.c
			
 
				+@@ -44,16 +44,6 @@ static void xhci_priv_plat_start(struct
			
 
				+ 		priv->plat_start(hcd);
			
 
				+ }
			
 
				+ 
			
 
				+-static int xhci_priv_plat_setup(struct usb_hcd *hcd)
			
 
				+-{
			
 
				+-	struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd);
			
 
				+-
			
 
				+-	if (!priv->plat_setup)
			
 
				+-		return 0;
			
 
				+-
			
 
				+-	return priv->plat_setup(hcd);
			
 
				+-}
			
 
				+-
			
 
				+ static int xhci_priv_init_quirk(struct usb_hcd *hcd)
			
 
				+ {
			
 
				+ 	struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd);
			
 
				+@@ -121,7 +111,6 @@ static const struct xhci_plat_priv xhci_
			
 
				+ };
			
 
				+ 
			
 
				+ static const struct xhci_plat_priv xhci_plat_marvell_armada3700 = {
			
 
				+-	.plat_setup = xhci_mvebu_a3700_plat_setup,
			
 
				+ 	.init_quirk = xhci_mvebu_a3700_init_quirk,
			
 
				+ };
			
 
				+ 
			
 
				+@@ -341,14 +330,7 @@ static int xhci_plat_probe(struct platfo
			
 
				+ 
			
 
				+ 	hcd->tpl_support = of_usb_host_tpl_support(sysdev->of_node);
			
 
				+ 	xhci->shared_hcd->tpl_support = hcd->tpl_support;
			
 
				+-
			
 
				+-	if (priv) {
			
 
				+-		ret = xhci_priv_plat_setup(hcd);
			
 
				+-		if (ret)
			
 
				+-			goto disable_usb_phy;
			
 
				+-	}
			
 
				+-
			
 
				+-	if ((xhci->quirks & XHCI_SKIP_PHY_INIT) || (priv && (priv->quirks & XHCI_SKIP_PHY_INIT)))
			
 
				++	if (priv && (priv->quirks & XHCI_SKIP_PHY_INIT))
			
 
				+ 		hcd->skip_phy_initialization = 1;
			
 
				+ 
			
 
				+ 	if (priv && (priv->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK))
			
 
				+--- a/drivers/usb/host/xhci-plat.h
			
 
				++++ b/drivers/usb/host/xhci-plat.h
			
 
				+@@ -13,7 +13,6 @@
			
 
				+ struct xhci_plat_priv {
			
 
				+ 	const char *firmware_name;
			
 
				+ 	unsigned long long quirks;
			
 
				+-	int (*plat_setup)(struct usb_hcd *);
			
 
				+ 	void (*plat_start)(struct usb_hcd *);
			
 
				+ 	int (*init_quirk)(struct usb_hcd *);
			
 
				+ 	int (*suspend_quirk)(struct usb_hcd *);
			
--- a/target/linux/generic/backport-6.1/346-v5.18-03-Revert-PCI-aardvark-Fix-initialization-with-old-Marv.patch
+++ b/target/linux/generic/backport-6.1/346-v5.18-03-Revert-PCI-aardvark-Fix-initialization-with-old-Marv.patch
@@ -0,0 +1,39 @@
 
				+From 9a4556dad7bd0a6b8339cb72e169f5c76f2af6f1 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <[email protected]>
			
 
				+Date: Thu, 3 Feb 2022 22:44:44 +0100
			
 
				+Subject: [PATCH 3/3] Revert "PCI: aardvark: Fix initialization with old
			
 
				+ Marvell's Arm Trusted Firmware"
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+This reverts commit b0c6ae0f8948a2be6bf4e8b4bbab9ca1343289b6.
			
 
				+
			
 
				+Armada 3720 phy driver (phy-mvebu-a3700-comphy.c) does not return
			
 
				+-EOPNOTSUPP from phy_power_on() callback anymore.
			
 
				+
			
 
				+So remove dead code which handles -EOPNOTSUPP return value.
			
 
				+
			
 
				+Signed-off-by: Pali Rohár <[email protected]>
			
 
				+Signed-off-by: Marek Behún <[email protected]>
			
 
				+Acked-by: Miquel Raynal <[email protected]>
			
 
				+Acked-by: Lorenzo Pieralisi <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Vinod Koul <[email protected]>
			
 
				+---
			
 
				+ drivers/pci/controller/pci-aardvark.c | 4 +---
			
 
				+ 1 file changed, 1 insertion(+), 3 deletions(-)
			
 
				+
			
 
				+--- a/drivers/pci/controller/pci-aardvark.c
			
 
				++++ b/drivers/pci/controller/pci-aardvark.c
			
 
				+@@ -1642,9 +1642,7 @@ static int advk_pcie_enable_phy(struct a
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	ret = phy_power_on(pcie->phy);
			
 
				+-	if (ret == -EOPNOTSUPP) {
			
 
				+-		dev_warn(&pcie->pdev->dev, "PHY unsupported by firmware\n");
			
 
				+-	} else if (ret) {
			
 
				++	if (ret) {
			
 
				+ 		phy_exit(pcie->phy);
			
 
				+ 		return ret;
			
 
				+ 	}
			
--- a/target/linux/generic/backport-6.1/347-v6.0-phy-marvell-phy-mvebu-a3700-comphy-Remove-broken-res.patch
+++ b/target/linux/generic/backport-6.1/347-v6.0-phy-marvell-phy-mvebu-a3700-comphy-Remove-broken-res.patch
@@ -0,0 +1,194 @@
 
				+From 0a6fc70d76bddf98278af2ac000379c82aec8f11 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <[email protected]>
			
 
				+Date: Mon, 29 Aug 2022 10:30:46 +0200
			
 
				+Subject: [PATCH] phy: marvell: phy-mvebu-a3700-comphy: Remove broken reset
			
 
				+ support
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Reset support for SATA PHY is somehow broken and after calling it, kernel
			
 
				+is not able to detect and initialize SATA disk Samsung SSD 850 EMT0 [1].
			
 
				+
			
 
				+Reset support was introduced in commit 934337080c6c ("phy: marvell:
			
 
				+phy-mvebu-a3700-comphy: Add native kernel implementation") as part of
			
 
				+complete rewrite of this driver. v1 patch series of that commit [2] did
			
 
				+not contain reset support and was tested that is working fine with
			
 
				+Ethernet, SATA and USB PHYs without issues too.
			
 
				+
			
 
				+So for now remove broken reset support and change implementation of
			
 
				+power_off callback to power off all functions on specified lane (and not
			
 
				+only selected function) because during startup kernel does not know which
			
 
				+function was selected and configured by bootloader. Same logic was used
			
 
				+also in v1 patch series of that commit.
			
 
				+
			
 
				+This change fixes issues with initialization of SATA disk Samsung SSD 850
			
 
				+and disk is working again, like before mentioned commit.
			
 
				+
			
 
				+Once problem with PHY reset callback is solved its functionality could be
			
 
				+re-introduced. But for now it is unknown why it does not work.
			
 
				+
			
 
				+[1] - https://lore.kernel.org/r/20220531124159.3e4lgn2v462irbtz@shindev/
			
 
				+[2] - https://lore.kernel.org/r/[email protected]/
			
 
				+
			
 
				+Reported-by: Shinichiro Kawasaki <[email protected]>
			
 
				+Fixes: 934337080c6c ("phy: marvell: phy-mvebu-a3700-comphy: Add native kernel implementation")
			
 
				+Cc: [email protected] # v5.18+
			
 
				+Signed-off-by: Pali Rohár <[email protected]>
			
 
				+Tested-by: Shinichiro Kawasaki <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Vinod Koul <[email protected]>
			
 
				+---
			
 
				+ drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 87 ++++----------------
			
 
				+ 1 file changed, 17 insertions(+), 70 deletions(-)
			
 
				+
			
 
				+--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
			
 
				++++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
			
 
				+@@ -274,7 +274,6 @@ struct mvebu_a3700_comphy_lane {
			
 
				+ 	int submode;
			
 
				+ 	bool invert_tx;
			
 
				+ 	bool invert_rx;
			
 
				+-	bool needs_reset;
			
 
				+ };
			
 
				+ 
			
 
				+ struct gbe_phy_init_data_fix {
			
 
				+@@ -1097,40 +1096,12 @@ mvebu_a3700_comphy_pcie_power_off(struct
			
 
				+ 			    0x0, PU_PLL_BIT | PU_RX_BIT | PU_TX_BIT);
			
 
				+ }
			
 
				+ 
			
 
				+-static int mvebu_a3700_comphy_reset(struct phy *phy)
			
 
				++static void mvebu_a3700_comphy_usb3_power_off(struct mvebu_a3700_comphy_lane *lane)
			
 
				+ {
			
 
				+-	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
			
 
				+-	u16 mask, data;
			
 
				+-
			
 
				+-	dev_dbg(lane->dev, "resetting lane %d\n", lane->id);
			
 
				+-
			
 
				+-	/* COMPHY reset for internal logic */
			
 
				+-	comphy_lane_reg_set(lane, COMPHY_SFT_RESET,
			
 
				+-			    SFT_RST_NO_REG, SFT_RST_NO_REG);
			
 
				+-
			
 
				+-	/* COMPHY register reset (cleared automatically) */
			
 
				+-	comphy_lane_reg_set(lane, COMPHY_SFT_RESET, SFT_RST, SFT_RST);
			
 
				+-
			
 
				+-	/* PIPE soft and register reset */
			
 
				+-	data = PIPE_SOFT_RESET | PIPE_REG_RESET;
			
 
				+-	mask = data;
			
 
				+-	comphy_lane_reg_set(lane, COMPHY_PIPE_RST_CLK_CTRL, data, mask);
			
 
				+-
			
 
				+-	/* Release PIPE register reset */
			
 
				+-	comphy_lane_reg_set(lane, COMPHY_PIPE_RST_CLK_CTRL,
			
 
				+-			    0x0, PIPE_REG_RESET);
			
 
				+-
			
 
				+-	/* Reset SB configuration register (only for lanes 0 and 1) */
			
 
				+-	if (lane->id == 0 || lane->id == 1) {
			
 
				+-		u32 mask, data;
			
 
				+-
			
 
				+-		data = PIN_RESET_CORE_BIT | PIN_RESET_COMPHY_BIT |
			
 
				+-		       PIN_PU_PLL_BIT | PIN_PU_RX_BIT | PIN_PU_TX_BIT;
			
 
				+-		mask = data | PIN_PU_IVREF_BIT | PIN_TX_IDLE_BIT;
			
 
				+-		comphy_periph_reg_set(lane, COMPHY_PHY_CFG1, data, mask);
			
 
				+-	}
			
 
				+-
			
 
				+-	return 0;
			
 
				++	/*
			
 
				++	 * The USB3 MAC sets the USB3 PHY to low state, so we do not
			
 
				++	 * need to power off USB3 PHY again.
			
 
				++	 */
			
 
				+ }
			
 
				+ 
			
 
				+ static bool mvebu_a3700_comphy_check_mode(int lane,
			
 
				+@@ -1171,10 +1142,6 @@ static int mvebu_a3700_comphy_set_mode(s
			
 
				+ 	    (lane->mode != mode || lane->submode != submode))
			
 
				+ 		return -EBUSY;
			
 
				+ 
			
 
				+-	/* If changing mode, ensure reset is called */
			
 
				+-	if (lane->mode != PHY_MODE_INVALID && lane->mode != mode)
			
 
				+-		lane->needs_reset = true;
			
 
				+-
			
 
				+ 	/* Just remember the mode, ->power_on() will do the real setup */
			
 
				+ 	lane->mode = mode;
			
 
				+ 	lane->submode = submode;
			
 
				+@@ -1185,7 +1152,6 @@ static int mvebu_a3700_comphy_set_mode(s
			
 
				+ static int mvebu_a3700_comphy_power_on(struct phy *phy)
			
 
				+ {
			
 
				+ 	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
			
 
				+-	int ret;
			
 
				+ 
			
 
				+ 	if (!mvebu_a3700_comphy_check_mode(lane->id, lane->mode,
			
 
				+ 					   lane->submode)) {
			
 
				+@@ -1193,14 +1159,6 @@ static int mvebu_a3700_comphy_power_on(s
			
 
				+ 		return -EINVAL;
			
 
				+ 	}
			
 
				+ 
			
 
				+-	if (lane->needs_reset) {
			
 
				+-		ret = mvebu_a3700_comphy_reset(phy);
			
 
				+-		if (ret)
			
 
				+-			return ret;
			
 
				+-
			
 
				+-		lane->needs_reset = false;
			
 
				+-	}
			
 
				+-
			
 
				+ 	switch (lane->mode) {
			
 
				+ 	case PHY_MODE_USB_HOST_SS:
			
 
				+ 		dev_dbg(lane->dev, "set lane %d to USB3 host mode\n", lane->id);
			
 
				+@@ -1224,38 +1182,28 @@ static int mvebu_a3700_comphy_power_off(
			
 
				+ {
			
 
				+ 	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
			
 
				+ 
			
 
				+-	switch (lane->mode) {
			
 
				+-	case PHY_MODE_USB_HOST_SS:
			
 
				+-		/*
			
 
				+-		 * The USB3 MAC sets the USB3 PHY to low state, so we do not
			
 
				+-		 * need to power off USB3 PHY again.
			
 
				+-		 */
			
 
				+-		break;
			
 
				+-
			
 
				+-	case PHY_MODE_SATA:
			
 
				+-		mvebu_a3700_comphy_sata_power_off(lane);
			
 
				+-		break;
			
 
				+-
			
 
				+-	case PHY_MODE_ETHERNET:
			
 
				++	switch (lane->id) {
			
 
				++	case 0:
			
 
				++		mvebu_a3700_comphy_usb3_power_off(lane);
			
 
				+ 		mvebu_a3700_comphy_ethernet_power_off(lane);
			
 
				+-		break;
			
 
				+-
			
 
				+-	case PHY_MODE_PCIE:
			
 
				++		return 0;
			
 
				++	case 1:
			
 
				+ 		mvebu_a3700_comphy_pcie_power_off(lane);
			
 
				+-		break;
			
 
				+-
			
 
				++		mvebu_a3700_comphy_ethernet_power_off(lane);
			
 
				++		return 0;
			
 
				++	case 2:
			
 
				++		mvebu_a3700_comphy_usb3_power_off(lane);
			
 
				++		mvebu_a3700_comphy_sata_power_off(lane);
			
 
				++		return 0;
			
 
				+ 	default:
			
 
				+ 		dev_err(lane->dev, "invalid COMPHY mode\n");
			
 
				+ 		return -EINVAL;
			
 
				+ 	}
			
 
				+-
			
 
				+-	return 0;
			
 
				+ }
			
 
				+ 
			
 
				+ static const struct phy_ops mvebu_a3700_comphy_ops = {
			
 
				+ 	.power_on	= mvebu_a3700_comphy_power_on,
			
 
				+ 	.power_off	= mvebu_a3700_comphy_power_off,
			
 
				+-	.reset		= mvebu_a3700_comphy_reset,
			
 
				+ 	.set_mode	= mvebu_a3700_comphy_set_mode,
			
 
				+ 	.owner		= THIS_MODULE,
			
 
				+ };
			
 
				+@@ -1393,8 +1341,7 @@ static int mvebu_a3700_comphy_probe(stru
			
 
				+ 		 * To avoid relying on the bootloader/firmware configuration,
			
 
				+ 		 * power off all comphys.
			
 
				+ 		 */
			
 
				+-		mvebu_a3700_comphy_reset(phy);
			
 
				+-		lane->needs_reset = false;
			
 
				++		mvebu_a3700_comphy_power_off(phy);
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	provider = devm_of_phy_provider_register(&pdev->dev,
			
--- a/target/linux/generic/backport-6.1/350-v5.18-regmap-add-configurable-downshift-for-addresses.patch
+++ b/target/linux/generic/backport-6.1/350-v5.18-regmap-add-configurable-downshift-for-addresses.patch
@@ -0,0 +1,90 @@
 
				+From 86fc59ef818beb0e1945d17f8e734898baba7e4e Mon Sep 17 00:00:00 2001
			
 
				+From: Colin Foster <[email protected]>
			
 
				+Date: Sun, 13 Mar 2022 15:45:23 -0700
			
 
				+Subject: [PATCH 1/2] regmap: add configurable downshift for addresses
			
 
				+
			
 
				+Add an additional reg_downshift to be applied to register addresses before
			
 
				+any register accesses. An example of a device that uses this is a VSC7514
			
 
				+chip, which require each register address to be downshifted by two if the
			
 
				+access is performed over a SPI bus.
			
 
				+
			
 
				+Signed-off-by: Colin Foster <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Mark Brown <[email protected]>
			
 
				+---
			
 
				+ drivers/base/regmap/internal.h | 1 +
			
 
				+ drivers/base/regmap/regmap.c   | 5 +++++
			
 
				+ include/linux/regmap.h         | 3 +++
			
 
				+ 3 files changed, 9 insertions(+)
			
 
				+
			
 
				+--- a/drivers/base/regmap/internal.h
			
 
				++++ b/drivers/base/regmap/internal.h
			
 
				+@@ -31,6 +31,7 @@ struct regmap_format {
			
 
				+ 	size_t buf_size;
			
 
				+ 	size_t reg_bytes;
			
 
				+ 	size_t pad_bytes;
			
 
				++	size_t reg_downshift;
			
 
				+ 	size_t val_bytes;
			
 
				+ 	void (*format_write)(struct regmap *map,
			
 
				+ 			     unsigned int reg, unsigned int val);
			
 
				+--- a/drivers/base/regmap/regmap.c
			
 
				++++ b/drivers/base/regmap/regmap.c
			
 
				+@@ -823,6 +823,7 @@ struct regmap *__regmap_init(struct devi
			
 
				+ 
			
 
				+ 	map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8);
			
 
				+ 	map->format.pad_bytes = config->pad_bits / 8;
			
 
				++	map->format.reg_downshift = config->reg_downshift;
			
 
				+ 	map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8);
			
 
				+ 	map->format.buf_size = DIV_ROUND_UP(config->reg_bits +
			
 
				+ 			config->val_bits + config->pad_bits, 8);
			
 
				+@@ -1735,6 +1736,7 @@ static int _regmap_raw_write_impl(struct
			
 
				+ 			return ret;
			
 
				+ 	}
			
 
				+ 
			
 
				++	reg >>= map->format.reg_downshift;
			
 
				+ 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
			
 
				+ 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
			
 
				+ 				      map->write_flag_mask);
			
 
				+@@ -1905,6 +1907,7 @@ static int _regmap_bus_formatted_write(v
			
 
				+ 			return ret;
			
 
				+ 	}
			
 
				+ 
			
 
				++	reg >>= map->format.reg_downshift;
			
 
				+ 	map->format.format_write(map, reg, val);
			
 
				+ 
			
 
				+ 	trace_regmap_hw_write_start(map, reg, 1);
			
 
				+@@ -2346,6 +2349,7 @@ static int _regmap_raw_multi_reg_write(s
			
 
				+ 		unsigned int reg = regs[i].reg;
			
 
				+ 		unsigned int val = regs[i].def;
			
 
				+ 		trace_regmap_hw_write_start(map, reg, 1);
			
 
				++		reg >>= map->format.reg_downshift;
			
 
				+ 		map->format.format_reg(u8, reg, map->reg_shift);
			
 
				+ 		u8 += reg_bytes + pad_bytes;
			
 
				+ 		map->format.format_val(u8, val, 0);
			
 
				+@@ -2673,6 +2677,7 @@ static int _regmap_raw_read(struct regma
			
 
				+ 			return ret;
			
 
				+ 	}
			
 
				+ 
			
 
				++	reg >>= map->format.reg_downshift;
			
 
				+ 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
			
 
				+ 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
			
 
				+ 				      map->read_flag_mask);
			
 
				+--- a/include/linux/regmap.h
			
 
				++++ b/include/linux/regmap.h
			
 
				+@@ -237,6 +237,8 @@ typedef void (*regmap_unlock)(void *);
			
 
				+  * @reg_stride: The register address stride. Valid register addresses are a
			
 
				+  *              multiple of this value. If set to 0, a value of 1 will be
			
 
				+  *              used.
			
 
				++ * @reg_downshift: The number of bits to downshift the register before
			
 
				++ *		   performing any operations.
			
 
				+  * @pad_bits: Number of bits of padding between register and value.
			
 
				+  * @val_bits: Number of bits in a register value, mandatory.
			
 
				+  *
			
 
				+@@ -360,6 +362,7 @@ struct regmap_config {
			
 
				+ 
			
 
				+ 	int reg_bits;
			
 
				+ 	int reg_stride;
			
 
				++	int reg_downshift;
			
 
				+ 	int pad_bits;
			
 
				+ 	int val_bits;
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/351-v5.18-regmap-allow-a-defined-reg_base-to-be-added-to-every.patch
+++ b/target/linux/generic/backport-6.1/351-v5.18-regmap-allow-a-defined-reg_base-to-be-added-to-every.patch
@@ -0,0 +1,95 @@
 
				+From 0074f3f2b1e43d3cedd97e47fb6980db6d2ba79e Mon Sep 17 00:00:00 2001
			
 
				+From: Colin Foster <[email protected]>
			
 
				+Date: Sun, 13 Mar 2022 15:45:24 -0700
			
 
				+Subject: [PATCH 2/2] regmap: allow a defined reg_base to be added to every
			
 
				+ address
			
 
				+
			
 
				+There's an inconsistency that arises when a register set can be accessed
			
 
				+internally via MMIO, or externally via SPI. The VSC7514 chip allows both
			
 
				+modes of operation. When internally accessed, the system utilizes __iomem,
			
 
				+devm_ioremap_resource, and devm_regmap_init_mmio.
			
 
				+
			
 
				+For SPI it isn't possible to utilize memory-mapped IO. To properly operate,
			
 
				+the resource base must be added to the register before every operation.
			
 
				+
			
 
				+Signed-off-by: Colin Foster <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Mark Brown <[email protected]>
			
 
				+---
			
 
				+ drivers/base/regmap/internal.h | 1 +
			
 
				+ drivers/base/regmap/regmap.c   | 6 ++++++
			
 
				+ include/linux/regmap.h         | 3 +++
			
 
				+ 3 files changed, 10 insertions(+)
			
 
				+
			
 
				+--- a/drivers/base/regmap/internal.h
			
 
				++++ b/drivers/base/regmap/internal.h
			
 
				+@@ -63,6 +63,7 @@ struct regmap {
			
 
				+ 	regmap_unlock unlock;
			
 
				+ 	void *lock_arg; /* This is passed to lock/unlock functions */
			
 
				+ 	gfp_t alloc_flags;
			
 
				++	unsigned int reg_base;
			
 
				+ 
			
 
				+ 	struct device *dev; /* Device we do I/O on */
			
 
				+ 	void *work_buf;     /* Scratch buffer used to format I/O */
			
 
				+--- a/drivers/base/regmap/regmap.c
			
 
				++++ b/drivers/base/regmap/regmap.c
			
 
				+@@ -821,6 +821,8 @@ struct regmap *__regmap_init(struct devi
			
 
				+ 	else
			
 
				+ 		map->alloc_flags = GFP_KERNEL;
			
 
				+ 
			
 
				++	map->reg_base = config->reg_base;
			
 
				++
			
 
				+ 	map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8);
			
 
				+ 	map->format.pad_bytes = config->pad_bits / 8;
			
 
				+ 	map->format.reg_downshift = config->reg_downshift;
			
 
				+@@ -1736,6 +1738,7 @@ static int _regmap_raw_write_impl(struct
			
 
				+ 			return ret;
			
 
				+ 	}
			
 
				+ 
			
 
				++	reg += map->reg_base;
			
 
				+ 	reg >>= map->format.reg_downshift;
			
 
				+ 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
			
 
				+ 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
			
 
				+@@ -1907,6 +1910,7 @@ static int _regmap_bus_formatted_write(v
			
 
				+ 			return ret;
			
 
				+ 	}
			
 
				+ 
			
 
				++	reg += map->reg_base;
			
 
				+ 	reg >>= map->format.reg_downshift;
			
 
				+ 	map->format.format_write(map, reg, val);
			
 
				+ 
			
 
				+@@ -2349,6 +2353,7 @@ static int _regmap_raw_multi_reg_write(s
			
 
				+ 		unsigned int reg = regs[i].reg;
			
 
				+ 		unsigned int val = regs[i].def;
			
 
				+ 		trace_regmap_hw_write_start(map, reg, 1);
			
 
				++		reg += map->reg_base;
			
 
				+ 		reg >>= map->format.reg_downshift;
			
 
				+ 		map->format.format_reg(u8, reg, map->reg_shift);
			
 
				+ 		u8 += reg_bytes + pad_bytes;
			
 
				+@@ -2677,6 +2682,7 @@ static int _regmap_raw_read(struct regma
			
 
				+ 			return ret;
			
 
				+ 	}
			
 
				+ 
			
 
				++	reg += map->reg_base;
			
 
				+ 	reg >>= map->format.reg_downshift;
			
 
				+ 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
			
 
				+ 	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
			
 
				+--- a/include/linux/regmap.h
			
 
				++++ b/include/linux/regmap.h
			
 
				+@@ -239,6 +239,8 @@ typedef void (*regmap_unlock)(void *);
			
 
				+  *              used.
			
 
				+  * @reg_downshift: The number of bits to downshift the register before
			
 
				+  *		   performing any operations.
			
 
				++ * @reg_base: Value to be added to every register address before performing any
			
 
				++ *	      operation.
			
 
				+  * @pad_bits: Number of bits of padding between register and value.
			
 
				+  * @val_bits: Number of bits in a register value, mandatory.
			
 
				+  *
			
 
				+@@ -363,6 +365,7 @@ struct regmap_config {
			
 
				+ 	int reg_bits;
			
 
				+ 	int reg_stride;
			
 
				+ 	int reg_downshift;
			
 
				++	unsigned int reg_base;
			
 
				+ 	int pad_bits;
			
 
				+ 	int val_bits;
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/352-v6.3-regmap-apply-reg_base-and-reg_downshift-for-single-r.patch
+++ b/target/linux/generic/backport-6.1/352-v6.3-regmap-apply-reg_base-and-reg_downshift-for-single-r.patch
@@ -0,0 +1,57 @@
 
				+From 697c3892d825fb78f42ec8e53bed065dd728db3e Mon Sep 17 00:00:00 2001
			
 
				+From: Daniel Golle <[email protected]>
			
 
				+Date: Mon, 30 Jan 2023 02:04:57 +0000
			
 
				+Subject: [PATCH] regmap: apply reg_base and reg_downshift for single register
			
 
				+ ops
			
 
				+
			
 
				+reg_base and reg_downshift currently don't have any effect if used with
			
 
				+a regmap_bus or regmap_config which only offers single register
			
 
				+operations (ie. reg_read, reg_write and optionally reg_update_bits).
			
 
				+
			
 
				+Fix that and take them into account also for regmap_bus with only
			
 
				+reg_read and read_write operations by applying reg_base and
			
 
				+reg_downshift in _regmap_bus_reg_write, _regmap_bus_reg_read.
			
 
				+
			
 
				+Also apply reg_base and reg_downshift in _regmap_update_bits, but only
			
 
				+in case the operation is carried out with a reg_update_bits call
			
 
				+defined in either regmap_bus or regmap_config.
			
 
				+
			
 
				+Fixes: 0074f3f2b1e43d ("regmap: allow a defined reg_base to be added to every address")
			
 
				+Fixes: 86fc59ef818beb ("regmap: add configurable downshift for addresses")
			
 
				+Signed-off-by: Daniel Golle <[email protected]>
			
 
				+Tested-by: Colin Foster <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Mark Brown <[email protected]>
			
 
				+---
			
 
				+ drivers/base/regmap/regmap.c | 6 ++++++
			
 
				+ 1 file changed, 6 insertions(+)
			
 
				+
			
 
				+--- a/drivers/base/regmap/regmap.c
			
 
				++++ b/drivers/base/regmap/regmap.c
			
 
				+@@ -1929,6 +1929,8 @@ static int _regmap_bus_reg_write(void *c
			
 
				+ {
			
 
				+ 	struct regmap *map = context;
			
 
				+ 
			
 
				++	reg += map->reg_base;
			
 
				++	reg >>= map->format.reg_downshift;
			
 
				+ 	return map->bus->reg_write(map->bus_context, reg, val);
			
 
				+ }
			
 
				+ 
			
 
				+@@ -2703,6 +2705,8 @@ static int _regmap_bus_reg_read(void *co
			
 
				+ {
			
 
				+ 	struct regmap *map = context;
			
 
				+ 
			
 
				++	reg += map->reg_base;
			
 
				++	reg >>= map->format.reg_downshift;
			
 
				+ 	return map->bus->reg_read(map->bus_context, reg, val);
			
 
				+ }
			
 
				+ 
			
 
				+@@ -3078,6 +3082,8 @@ static int _regmap_update_bits(struct re
			
 
				+ 		*change = false;
			
 
				+ 
			
 
				+ 	if (regmap_volatile(map, reg) && map->reg_update_bits) {
			
 
				++		reg += map->reg_base;
			
 
				++		reg >>= map->format.reg_downshift;
			
 
				+ 		ret = map->reg_update_bits(map->bus_context, reg, mask, val);
			
 
				+ 		if (ret == 0 && change)
			
 
				+ 			*change = true;
			
--- a/target/linux/generic/backport-6.1/400-v5.19-mtd-call-of_platform_populate-for-MTD-partitions.patch
+++ b/target/linux/generic/backport-6.1/400-v5.19-mtd-call-of_platform_populate-for-MTD-partitions.patch
@@ -0,0 +1,72 @@
 
				+From bcdf0315a61a29eb753a607d3a85a4032de72d94 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <[email protected]>
			
 
				+Date: Tue, 10 May 2022 15:12:59 +0200
			
 
				+Subject: [PATCH] mtd: call of_platform_populate() for MTD partitions
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Until this change MTD subsystem supported handling partitions only with
			
 
				+MTD partitions parsers. That's a specific / limited API designed around
			
 
				+partitions.
			
 
				+
			
 
				+Some MTD partitions may however require different handling. They may
			
 
				+contain specific data that needs to be parsed and somehow extracted. For
			
 
				+that purpose MTD subsystem should allow binding of standard platform
			
 
				+drivers.
			
 
				+
			
 
				+An example can be U-Boot (sub)partition with environment variables.
			
 
				+There exist a "u-boot,env" DT binding for MTD (sub)partition that
			
 
				+requires an NVMEM driver.
			
 
				+
			
 
				+Ref: 5db1c2dbc04c ("dt-bindings: nvmem: add U-Boot environment variables binding")
			
 
				+Signed-off-by: Rafał Miłecki <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/mtdpart.c | 9 +++++++++
			
 
				+ 1 file changed, 9 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdpart.c
			
 
				++++ b/drivers/mtd/mtdpart.c
			
 
				+@@ -17,6 +17,7 @@
			
 
				+ #include <linux/mtd/partitions.h>
			
 
				+ #include <linux/err.h>
			
 
				+ #include <linux/of.h>
			
 
				++#include <linux/of_platform.h>
			
 
				+ 
			
 
				+ #include "mtdcore.h"
			
 
				+ 
			
 
				+@@ -577,10 +578,16 @@ static int mtd_part_of_parse(struct mtd_
			
 
				+ 	struct mtd_part_parser *parser;
			
 
				+ 	struct device_node *np;
			
 
				+ 	struct property *prop;
			
 
				++	struct device *dev;
			
 
				+ 	const char *compat;
			
 
				+ 	const char *fixed = "fixed-partitions";
			
 
				+ 	int ret, err = 0;
			
 
				+ 
			
 
				++	dev = &master->dev;
			
 
				++	/* Use parent device (controller) if the top level MTD is not registered */
			
 
				++	if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) && !mtd_is_partition(master))
			
 
				++		dev = master->dev.parent;
			
 
				++
			
 
				+ 	np = mtd_get_of_node(master);
			
 
				+ 	if (mtd_is_partition(master))
			
 
				+ 		of_node_get(np);
			
 
				+@@ -593,6 +600,7 @@ static int mtd_part_of_parse(struct mtd_
			
 
				+ 			continue;
			
 
				+ 		ret = mtd_part_do_parse(parser, master, pparts, NULL);
			
 
				+ 		if (ret > 0) {
			
 
				++			of_platform_populate(np, NULL, NULL, dev);
			
 
				+ 			of_node_put(np);
			
 
				+ 			return ret;
			
 
				+ 		}
			
 
				+@@ -600,6 +608,7 @@ static int mtd_part_of_parse(struct mtd_
			
 
				+ 		if (ret < 0 && !err)
			
 
				+ 			err = ret;
			
 
				+ 	}
			
 
				++	of_platform_populate(np, NULL, NULL, dev);
			
 
				+ 	of_node_put(np);
			
 
				+ 
			
 
				+ 	/*
			
--- a/target/linux/generic/backport-6.1/401-v6.0-mtd-parsers-add-support-for-Sercomm-partitions.patch
+++ b/target/linux/generic/backport-6.1/401-v6.0-mtd-parsers-add-support-for-Sercomm-partitions.patch
@@ -0,0 +1,302 @@
 
				+From 9b78ef0c7997052e9eaa0f7a4513d546fa17358c Mon Sep 17 00:00:00 2001
			
 
				+From: Mikhail Zhilkin <[email protected]>
			
 
				+Date: Sun, 29 May 2022 11:07:14 +0000
			
 
				+Subject: [PATCH] mtd: parsers: add support for Sercomm partitions
			
 
				+
			
 
				+This adds an MTD partition parser for the Sercomm partition table that
			
 
				+is used in some Beeline, Netgear and Sercomm routers.
			
 
				+
			
 
				+The Sercomm partition map table contains real partition offsets, which
			
 
				+may differ from device to device depending on the number and location of
			
 
				+bad blocks on NAND.
			
 
				+
			
 
				+Original patch (proposed by NOGUCHI Hiroshi):
			
 
				+Link: https://github.com/openwrt/openwrt/pull/1318#issuecomment-420607394
			
 
				+
			
 
				+Signed-off-by: NOGUCHI Hiroshi <[email protected]>
			
 
				+Signed-off-by: Mikhail Zhilkin <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/parsers/Kconfig  |   9 ++
			
 
				+ drivers/mtd/parsers/Makefile |   1 +
			
 
				+ drivers/mtd/parsers/scpart.c | 248 +++++++++++++++++++++++++++++++++++
			
 
				+ 3 files changed, 258 insertions(+)
			
 
				+ create mode 100644 drivers/mtd/parsers/scpart.c
			
 
				+
			
 
				+--- a/drivers/mtd/parsers/Kconfig
			
 
				++++ b/drivers/mtd/parsers/Kconfig
			
 
				+@@ -186,3 +186,12 @@ config MTD_QCOMSMEM_PARTS
			
 
				+ 	help
			
 
				+ 	  This provides support for parsing partitions from Shared Memory (SMEM)
			
 
				+ 	  for NAND and SPI flash on Qualcomm platforms.
			
 
				++
			
 
				++config MTD_SERCOMM_PARTS
			
 
				++	tristate "Sercomm partition table parser"
			
 
				++	depends on MTD && RALINK
			
 
				++	help
			
 
				++	  This provides partitions table parser for devices with Sercomm
			
 
				++	  partition map. This partition table contains real partition
			
 
				++	  offsets, which may differ from device to device depending on the
			
 
				++	  number and location of bad blocks on NAND.
			
 
				+--- a/drivers/mtd/parsers/Makefile
			
 
				++++ b/drivers/mtd/parsers/Makefile
			
 
				+@@ -10,6 +10,7 @@ ofpart-$(CONFIG_MTD_OF_PARTS_LINKSYS_NS)
			
 
				+ obj-$(CONFIG_MTD_PARSER_IMAGETAG)	+= parser_imagetag.o
			
 
				+ obj-$(CONFIG_MTD_AFS_PARTS)		+= afs.o
			
 
				+ obj-$(CONFIG_MTD_PARSER_TRX)		+= parser_trx.o
			
 
				++obj-$(CONFIG_MTD_SERCOMM_PARTS)		+= scpart.o
			
 
				+ obj-$(CONFIG_MTD_SHARPSL_PARTS)		+= sharpslpart.o
			
 
				+ obj-$(CONFIG_MTD_REDBOOT_PARTS)		+= redboot.o
			
 
				+ obj-$(CONFIG_MTD_QCOMSMEM_PARTS)	+= qcomsmempart.o
			
 
				+--- /dev/null
			
 
				++++ b/drivers/mtd/parsers/scpart.c
			
 
				+@@ -0,0 +1,248 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-or-later
			
 
				++/*
			
 
				++ *    drivers/mtd/scpart.c: Sercomm Partition Parser
			
 
				++ *
			
 
				++ *    Copyright (C) 2018 NOGUCHI Hiroshi
			
 
				++ *    Copyright (C) 2022 Mikhail Zhilkin
			
 
				++ */
			
 
				++
			
 
				++#include <linux/kernel.h>
			
 
				++#include <linux/slab.h>
			
 
				++#include <linux/mtd/mtd.h>
			
 
				++#include <linux/mtd/partitions.h>
			
 
				++#include <linux/module.h>
			
 
				++
			
 
				++#define	MOD_NAME	"scpart"
			
 
				++
			
 
				++#ifdef pr_fmt
			
 
				++#undef pr_fmt
			
 
				++#endif
			
 
				++
			
 
				++#define pr_fmt(fmt) MOD_NAME ": " fmt
			
 
				++
			
 
				++#define	ID_ALREADY_FOUND	0xffffffffUL
			
 
				++
			
 
				++#define	MAP_OFFS_IN_BLK		0x800
			
 
				++#define	MAP_MIRROR_NUM		2
			
 
				++
			
 
				++static const char sc_part_magic[] = {
			
 
				++	'S', 'C', 'F', 'L', 'M', 'A', 'P', 'O', 'K', '\0',
			
 
				++};
			
 
				++#define	PART_MAGIC_LEN		sizeof(sc_part_magic)
			
 
				++
			
 
				++/* assumes that all fields are set by CPU native endian */
			
 
				++struct sc_part_desc {
			
 
				++	uint32_t	part_id;
			
 
				++	uint32_t	part_offs;
			
 
				++	uint32_t	part_bytes;
			
 
				++};
			
 
				++
			
 
				++static uint32_t scpart_desc_is_valid(struct sc_part_desc *pdesc)
			
 
				++{
			
 
				++	return ((pdesc->part_id != 0xffffffffUL) &&
			
 
				++		(pdesc->part_offs != 0xffffffffUL) &&
			
 
				++		(pdesc->part_bytes != 0xffffffffUL));
			
 
				++}
			
 
				++
			
 
				++static int scpart_scan_partmap(struct mtd_info *master, loff_t partmap_offs,
			
 
				++			       struct sc_part_desc **ppdesc)
			
 
				++{
			
 
				++	int cnt = 0;
			
 
				++	int res = 0;
			
 
				++	int res2;
			
 
				++	loff_t offs;
			
 
				++	size_t retlen;
			
 
				++	struct sc_part_desc *pdesc = NULL;
			
 
				++	struct sc_part_desc *tmpdesc;
			
 
				++	uint8_t *buf;
			
 
				++
			
 
				++	buf = kzalloc(master->erasesize, GFP_KERNEL);
			
 
				++	if (!buf) {
			
 
				++		res = -ENOMEM;
			
 
				++		goto out;
			
 
				++	}
			
 
				++
			
 
				++	res2 = mtd_read(master, partmap_offs, master->erasesize, &retlen, buf);
			
 
				++	if (res2 || retlen != master->erasesize) {
			
 
				++		res = -EIO;
			
 
				++		goto free;
			
 
				++	}
			
 
				++
			
 
				++	for (offs = MAP_OFFS_IN_BLK;
			
 
				++	     offs < master->erasesize - sizeof(*tmpdesc);
			
 
				++	     offs += sizeof(*tmpdesc)) {
			
 
				++		tmpdesc = (struct sc_part_desc *)&buf[offs];
			
 
				++		if (!scpart_desc_is_valid(tmpdesc))
			
 
				++			break;
			
 
				++		cnt++;
			
 
				++	}
			
 
				++
			
 
				++	if (cnt > 0) {
			
 
				++		int bytes = cnt * sizeof(*pdesc);
			
 
				++
			
 
				++		pdesc = kcalloc(cnt, sizeof(*pdesc), GFP_KERNEL);
			
 
				++		if (!pdesc) {
			
 
				++			res = -ENOMEM;
			
 
				++			goto free;
			
 
				++		}
			
 
				++		memcpy(pdesc, &(buf[MAP_OFFS_IN_BLK]), bytes);
			
 
				++
			
 
				++		*ppdesc = pdesc;
			
 
				++		res = cnt;
			
 
				++	}
			
 
				++
			
 
				++free:
			
 
				++	kfree(buf);
			
 
				++
			
 
				++out:
			
 
				++	return res;
			
 
				++}
			
 
				++
			
 
				++static int scpart_find_partmap(struct mtd_info *master,
			
 
				++			       struct sc_part_desc **ppdesc)
			
 
				++{
			
 
				++	int magic_found = 0;
			
 
				++	int res = 0;
			
 
				++	int res2;
			
 
				++	loff_t offs = 0;
			
 
				++	size_t retlen;
			
 
				++	uint8_t rdbuf[PART_MAGIC_LEN];
			
 
				++
			
 
				++	while ((magic_found < MAP_MIRROR_NUM) &&
			
 
				++			(offs < master->size) &&
			
 
				++			 !mtd_block_isbad(master, offs)) {
			
 
				++		res2 = mtd_read(master, offs, PART_MAGIC_LEN, &retlen, rdbuf);
			
 
				++		if (res2 || retlen != PART_MAGIC_LEN) {
			
 
				++			res = -EIO;
			
 
				++			goto out;
			
 
				++		}
			
 
				++		if (!memcmp(rdbuf, sc_part_magic, PART_MAGIC_LEN)) {
			
 
				++			pr_debug("Signature found at 0x%llx\n", offs);
			
 
				++			magic_found++;
			
 
				++			res = scpart_scan_partmap(master, offs, ppdesc);
			
 
				++			if (res > 0)
			
 
				++				goto out;
			
 
				++		}
			
 
				++		offs += master->erasesize;
			
 
				++	}
			
 
				++
			
 
				++out:
			
 
				++	if (res > 0)
			
 
				++		pr_info("Valid 'SC PART MAP' (%d partitions) found at 0x%llx\n", res, offs);
			
 
				++	else
			
 
				++		pr_info("No valid 'SC PART MAP' was found\n");
			
 
				++
			
 
				++	return res;
			
 
				++}
			
 
				++
			
 
				++static int scpart_parse(struct mtd_info *master,
			
 
				++			const struct mtd_partition **pparts,
			
 
				++			struct mtd_part_parser_data *data)
			
 
				++{
			
 
				++	const char *partname;
			
 
				++	int n;
			
 
				++	int nr_scparts;
			
 
				++	int nr_parts = 0;
			
 
				++	int res = 0;
			
 
				++	struct sc_part_desc *scpart_map = NULL;
			
 
				++	struct mtd_partition *parts = NULL;
			
 
				++	struct device_node *mtd_node;
			
 
				++	struct device_node *ofpart_node;
			
 
				++	struct device_node *pp;
			
 
				++
			
 
				++	mtd_node = mtd_get_of_node(master);
			
 
				++	if (!mtd_node) {
			
 
				++		res = -ENOENT;
			
 
				++		goto out;
			
 
				++	}
			
 
				++
			
 
				++	ofpart_node = of_get_child_by_name(mtd_node, "partitions");
			
 
				++	if (!ofpart_node) {
			
 
				++		pr_info("%s: 'partitions' subnode not found on %pOF.\n",
			
 
				++				master->name, mtd_node);
			
 
				++		res = -ENOENT;
			
 
				++		goto out;
			
 
				++	}
			
 
				++
			
 
				++	nr_scparts = scpart_find_partmap(master, &scpart_map);
			
 
				++	if (nr_scparts <= 0) {
			
 
				++		pr_info("No any partitions was found in 'SC PART MAP'.\n");
			
 
				++		res = -ENOENT;
			
 
				++		goto free;
			
 
				++	}
			
 
				++
			
 
				++	parts = kcalloc(of_get_child_count(ofpart_node), sizeof(*parts),
			
 
				++		GFP_KERNEL);
			
 
				++	if (!parts) {
			
 
				++		res = -ENOMEM;
			
 
				++		goto free;
			
 
				++	}
			
 
				++
			
 
				++	for_each_child_of_node(ofpart_node, pp) {
			
 
				++		u32 scpart_id;
			
 
				++
			
 
				++		if (of_property_read_u32(pp, "sercomm,scpart-id", &scpart_id))
			
 
				++			continue;
			
 
				++
			
 
				++		for (n = 0 ; n < nr_scparts ; n++)
			
 
				++			if ((scpart_map[n].part_id != ID_ALREADY_FOUND) &&
			
 
				++					(scpart_id == scpart_map[n].part_id))
			
 
				++				break;
			
 
				++		if (n >= nr_scparts)
			
 
				++			/* not match */
			
 
				++			continue;
			
 
				++
			
 
				++		/* add the partition found in OF into MTD partition array */
			
 
				++		parts[nr_parts].offset = scpart_map[n].part_offs;
			
 
				++		parts[nr_parts].size = scpart_map[n].part_bytes;
			
 
				++		parts[nr_parts].of_node = pp;
			
 
				++
			
 
				++		if (!of_property_read_string(pp, "label", &partname))
			
 
				++			parts[nr_parts].name = partname;
			
 
				++		if (of_property_read_bool(pp, "read-only"))
			
 
				++			parts[nr_parts].mask_flags |= MTD_WRITEABLE;
			
 
				++		if (of_property_read_bool(pp, "lock"))
			
 
				++			parts[nr_parts].mask_flags |= MTD_POWERUP_LOCK;
			
 
				++
			
 
				++		/* mark as 'done' */
			
 
				++		scpart_map[n].part_id = ID_ALREADY_FOUND;
			
 
				++
			
 
				++		nr_parts++;
			
 
				++	}
			
 
				++
			
 
				++	if (nr_parts > 0) {
			
 
				++		*pparts = parts;
			
 
				++		res = nr_parts;
			
 
				++	} else
			
 
				++		pr_info("No partition in OF matches partition ID with 'SC PART MAP'.\n");
			
 
				++
			
 
				++	of_node_put(pp);
			
 
				++
			
 
				++free:
			
 
				++	kfree(scpart_map);
			
 
				++	if (res <= 0)
			
 
				++		kfree(parts);
			
 
				++
			
 
				++out:
			
 
				++	return res;
			
 
				++}
			
 
				++
			
 
				++static const struct of_device_id scpart_parser_of_match_table[] = {
			
 
				++	{ .compatible = "sercomm,sc-partitions" },
			
 
				++	{},
			
 
				++};
			
 
				++MODULE_DEVICE_TABLE(of, scpart_parser_of_match_table);
			
 
				++
			
 
				++static struct mtd_part_parser scpart_parser = {
			
 
				++	.parse_fn = scpart_parse,
			
 
				++	.name = "scpart",
			
 
				++	.of_match_table = scpart_parser_of_match_table,
			
 
				++};
			
 
				++module_mtd_part_parser(scpart_parser);
			
 
				++
			
 
				++/* mtd parsers will request the module by parser name */
			
 
				++MODULE_ALIAS("scpart");
			
 
				++MODULE_LICENSE("GPL");
			
 
				++MODULE_AUTHOR("NOGUCHI Hiroshi <[email protected]>");
			
 
				++MODULE_AUTHOR("Mikhail Zhilkin <[email protected]>");
			
 
				++MODULE_DESCRIPTION("Sercomm partition parser");
			
--- a/target/linux/generic/backport-6.1/402-v6.0-mtd-next-mtd-core-introduce-of-support-for-dynamic-partitions.patch
+++ b/target/linux/generic/backport-6.1/402-v6.0-mtd-next-mtd-core-introduce-of-support-for-dynamic-partitions.patch
@@ -0,0 +1,106 @@
 
				+From ad9b10d1eaada169bd764abcab58f08538877e26 Mon Sep 17 00:00:00 2001
			
 
				+From: Christian Marangi <[email protected]>
			
 
				+Date: Wed, 22 Jun 2022 03:06:28 +0200
			
 
				+Subject: mtd: core: introduce of support for dynamic partitions
			
 
				+
			
 
				+We have many parser that register mtd partitions at runtime. One example
			
 
				+is the cmdlinepart or the smem-part parser where the compatible is defined
			
 
				+in the dts and the partitions gets detected and registered by the
			
 
				+parser. This is problematic for the NVMEM subsystem that requires an OF
			
 
				+node to detect NVMEM cells.
			
 
				+
			
 
				+To fix this problem, introduce an additional logic that will try to
			
 
				+assign an OF node to the MTD if declared.
			
 
				+
			
 
				+On MTD addition, it will be checked if the MTD has an OF node and if
			
 
				+not declared will check if a partition with the same label / node name is
			
 
				+declared in DTS. If an exact match is found, the partition dynamically
			
 
				+allocated by the parser will have a connected OF node.
			
 
				+
			
 
				+The NVMEM subsystem will detect the OF node and register any NVMEM cells
			
 
				+declared statically in the DTS.
			
 
				+
			
 
				+Signed-off-by: Christian Marangi <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/mtdcore.c | 61 +++++++++++++++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 61 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdcore.c
			
 
				++++ b/drivers/mtd/mtdcore.c
			
 
				+@@ -564,6 +564,66 @@ static int mtd_nvmem_add(struct mtd_info
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+ 
			
 
				++static void mtd_check_of_node(struct mtd_info *mtd)
			
 
				++{
			
 
				++	struct device_node *partitions, *parent_dn, *mtd_dn = NULL;
			
 
				++	const char *pname, *prefix = "partition-";
			
 
				++	int plen, mtd_name_len, offset, prefix_len;
			
 
				++	struct mtd_info *parent;
			
 
				++	bool found = false;
			
 
				++
			
 
				++	/* Check if MTD already has a device node */
			
 
				++	if (dev_of_node(&mtd->dev))
			
 
				++		return;
			
 
				++
			
 
				++	/* Check if a partitions node exist */
			
 
				++	parent = mtd->parent;
			
 
				++	parent_dn = dev_of_node(&parent->dev);
			
 
				++	if (!parent_dn)
			
 
				++		return;
			
 
				++
			
 
				++	partitions = of_get_child_by_name(parent_dn, "partitions");
			
 
				++	if (!partitions)
			
 
				++		goto exit_parent;
			
 
				++
			
 
				++	prefix_len = strlen(prefix);
			
 
				++	mtd_name_len = strlen(mtd->name);
			
 
				++
			
 
				++	/* Search if a partition is defined with the same name */
			
 
				++	for_each_child_of_node(partitions, mtd_dn) {
			
 
				++		offset = 0;
			
 
				++
			
 
				++		/* Skip partition with no/wrong prefix */
			
 
				++		if (!of_node_name_prefix(mtd_dn, "partition-"))
			
 
				++			continue;
			
 
				++
			
 
				++		/* Label have priority. Check that first */
			
 
				++		if (of_property_read_string(mtd_dn, "label", &pname)) {
			
 
				++			of_property_read_string(mtd_dn, "name", &pname);
			
 
				++			offset = prefix_len;
			
 
				++		}
			
 
				++
			
 
				++		plen = strlen(pname) - offset;
			
 
				++		if (plen == mtd_name_len &&
			
 
				++		    !strncmp(mtd->name, pname + offset, plen)) {
			
 
				++			found = true;
			
 
				++			break;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	if (!found)
			
 
				++		goto exit_partitions;
			
 
				++
			
 
				++	/* Set of_node only for nvmem */
			
 
				++	if (of_device_is_compatible(mtd_dn, "nvmem-cells"))
			
 
				++		mtd_set_of_node(mtd, mtd_dn);
			
 
				++
			
 
				++exit_partitions:
			
 
				++	of_node_put(partitions);
			
 
				++exit_parent:
			
 
				++	of_node_put(parent_dn);
			
 
				++}
			
 
				++
			
 
				+ /**
			
 
				+  *	add_mtd_device - register an MTD device
			
 
				+  *	@mtd: pointer to new MTD device info structure
			
 
				+@@ -669,6 +729,7 @@ int add_mtd_device(struct mtd_info *mtd)
			
 
				+ 	mtd->dev.devt = MTD_DEVT(i);
			
 
				+ 	dev_set_name(&mtd->dev, "mtd%d", i);
			
 
				+ 	dev_set_drvdata(&mtd->dev, mtd);
			
 
				++	mtd_check_of_node(mtd);
			
 
				+ 	of_node_get(mtd_get_of_node(mtd));
			
 
				+ 	error = device_register(&mtd->dev);
			
 
				+ 	if (error) {
			
--- a/target/linux/generic/backport-6.1/403-v6.1-mtd-allow-getting-MTD-device-associated-with-a-speci.patch
+++ b/target/linux/generic/backport-6.1/403-v6.1-mtd-allow-getting-MTD-device-associated-with-a-speci.patch
@@ -0,0 +1,72 @@
 
				+From b0321721be50b80c03a51866a94fde4f94690e18 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <[email protected]>
			
 
				+Date: Wed, 15 Jun 2022 21:42:59 +0200
			
 
				+Subject: [PATCH] mtd: allow getting MTD device associated with a specific DT
			
 
				+ node
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+MTD subsystem API allows interacting with MTD devices (e.g. reading,
			
 
				+writing, handling bad blocks). So far a random driver could get MTD
			
 
				+device only by its name (get_mtd_device_nm()). This change allows
			
 
				+getting them also by a DT node.
			
 
				+
			
 
				+This API is required for drivers handling DT defined MTD partitions in a
			
 
				+specific way (e.g. U-Boot (sub)partition with environment variables).
			
 
				+
			
 
				+Signed-off-by: Rafał Miłecki <[email protected]>
			
 
				+Acked-by: Miquel Raynal <[email protected]>
			
 
				+Signed-off-by: Srinivas Kandagatla <[email protected]>
			
 
				+---
			
 
				+ drivers/mtd/mtdcore.c   | 28 ++++++++++++++++++++++++++++
			
 
				+ include/linux/mtd/mtd.h |  1 +
			
 
				+ 2 files changed, 29 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdcore.c
			
 
				++++ b/drivers/mtd/mtdcore.c
			
 
				+@@ -1236,6 +1236,34 @@ int __get_mtd_device(struct mtd_info *mt
			
 
				+ EXPORT_SYMBOL_GPL(__get_mtd_device);
			
 
				+ 
			
 
				+ /**
			
 
				++ * of_get_mtd_device_by_node - obtain an MTD device associated with a given node
			
 
				++ *
			
 
				++ * @np: device tree node
			
 
				++ */
			
 
				++struct mtd_info *of_get_mtd_device_by_node(struct device_node *np)
			
 
				++{
			
 
				++	struct mtd_info *mtd = NULL;
			
 
				++	struct mtd_info *tmp;
			
 
				++	int err;
			
 
				++
			
 
				++	mutex_lock(&mtd_table_mutex);
			
 
				++
			
 
				++	err = -EPROBE_DEFER;
			
 
				++	mtd_for_each_device(tmp) {
			
 
				++		if (mtd_get_of_node(tmp) == np) {
			
 
				++			mtd = tmp;
			
 
				++			err = __get_mtd_device(mtd);
			
 
				++			break;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	mutex_unlock(&mtd_table_mutex);
			
 
				++
			
 
				++	return err ? ERR_PTR(err) : mtd;
			
 
				++}
			
 
				++EXPORT_SYMBOL_GPL(of_get_mtd_device_by_node);
			
 
				++
			
 
				++/**
			
 
				+  *	get_mtd_device_nm - obtain a validated handle for an MTD device by
			
 
				+  *	device name
			
 
				+  *	@name: MTD device name to open
			
 
				+--- a/include/linux/mtd/mtd.h
			
 
				++++ b/include/linux/mtd/mtd.h
			
 
				+@@ -682,6 +682,7 @@ extern int mtd_device_unregister(struct
			
 
				+ extern struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num);
			
 
				+ extern int __get_mtd_device(struct mtd_info *mtd);
			
 
				+ extern void __put_mtd_device(struct mtd_info *mtd);
			
 
				++extern struct mtd_info *of_get_mtd_device_by_node(struct device_node *np);
			
 
				+ extern struct mtd_info *get_mtd_device_nm(const char *name);
			
 
				+ extern void put_mtd_device(struct mtd_info *mtd);
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/404-v6.0-mtd-core-check-partition-before-dereference.patch
+++ b/target/linux/generic/backport-6.1/404-v6.0-mtd-core-check-partition-before-dereference.patch
@@ -0,0 +1,30 @@
 
				+From 7ec4cdb321738d44ae5d405e7b6ac73dfbf99caa Mon Sep 17 00:00:00 2001
			
 
				+From: Tetsuo Handa <[email protected]>
			
 
				+Date: Mon, 25 Jul 2022 22:49:25 +0900
			
 
				+Subject: [PATCH] mtd: core: check partition before dereference
			
 
				+
			
 
				+syzbot is reporting NULL pointer dereference at mtd_check_of_node() [1],
			
 
				+for mtdram test device (CONFIG_MTD_MTDRAM) is not partition.
			
 
				+
			
 
				+Link: https://syzkaller.appspot.com/bug?extid=fe013f55a2814a9e8cfd [1]
			
 
				+Reported-by: syzbot <[email protected]>
			
 
				+Reported-by: kernel test robot <[email protected]>
			
 
				+Fixes: ad9b10d1eaada169 ("mtd: core: introduce of support for dynamic partitions")
			
 
				+Signed-off-by: Tetsuo Handa <[email protected]>
			
 
				+CC: [email protected]
			
 
				+Signed-off-by: Richard Weinberger <[email protected]>
			
 
				+---
			
 
				+ drivers/mtd/mtdcore.c | 2 ++
			
 
				+ 1 file changed, 2 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdcore.c
			
 
				++++ b/drivers/mtd/mtdcore.c
			
 
				+@@ -577,6 +577,8 @@ static void mtd_check_of_node(struct mtd
			
 
				+ 		return;
			
 
				+ 
			
 
				+ 	/* Check if a partitions node exist */
			
 
				++	if (!mtd_is_partition(mtd))
			
 
				++		return;
			
 
				+ 	parent = mtd->parent;
			
 
				+ 	parent_dn = dev_of_node(&parent->dev);
			
 
				+ 	if (!parent_dn)
			
--- a/target/linux/generic/backport-6.1/405-v6.1-mtd-core-add-missing-of_node_get-in-dynamic-partitio.patch
+++ b/target/linux/generic/backport-6.1/405-v6.1-mtd-core-add-missing-of_node_get-in-dynamic-partitio.patch
@@ -0,0 +1,101 @@
 
				+From 12b58961de0bd88b3c7dfa5d21f6d67f4678b780 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <[email protected]>
			
 
				+Date: Tue, 18 Oct 2022 07:18:22 +0200
			
 
				+Subject: [PATCH] mtd: core: add missing of_node_get() in dynamic partitions
			
 
				+ code
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+This fixes unbalanced of_node_put():
			
 
				+[    1.078910] 6 cmdlinepart partitions found on MTD device gpmi-nand
			
 
				+[    1.085116] Creating 6 MTD partitions on "gpmi-nand":
			
 
				+[    1.090181] 0x000000000000-0x000008000000 : "nandboot"
			
 
				+[    1.096952] 0x000008000000-0x000009000000 : "nandfit"
			
 
				+[    1.103547] 0x000009000000-0x00000b000000 : "nandkernel"
			
 
				+[    1.110317] 0x00000b000000-0x00000c000000 : "nanddtb"
			
 
				+[    1.115525] ------------[ cut here ]------------
			
 
				+[    1.120141] refcount_t: addition on 0; use-after-free.
			
 
				+[    1.125328] WARNING: CPU: 0 PID: 1 at lib/refcount.c:25 refcount_warn_saturate+0xdc/0x148
			
 
				+[    1.133528] Modules linked in:
			
 
				+[    1.136589] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.0.0-rc7-next-20220930-04543-g8cf3f7
			
 
				+[    1.146342] Hardware name: Freescale i.MX8DXL DDR3L EVK (DT)
			
 
				+[    1.151999] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
			
 
				+[    1.158965] pc : refcount_warn_saturate+0xdc/0x148
			
 
				+[    1.163760] lr : refcount_warn_saturate+0xdc/0x148
			
 
				+[    1.168556] sp : ffff800009ddb080
			
 
				+[    1.171866] x29: ffff800009ddb080 x28: ffff800009ddb35a x27: 0000000000000002
			
 
				+[    1.179015] x26: ffff8000098b06ad x25: ffffffffffffffff x24: ffff0a00ffffff05
			
 
				+[    1.186165] x23: ffff00001fdf6470 x22: ffff800009ddb367 x21: 0000000000000000
			
 
				+[    1.193314] x20: ffff00001fdfebe8 x19: ffff00001fdfec50 x18: ffffffffffffffff
			
 
				+[    1.200464] x17: 0000000000000000 x16: 0000000000000118 x15: 0000000000000004
			
 
				+[    1.207614] x14: 0000000000000fff x13: ffff800009bca248 x12: 0000000000000003
			
 
				+[    1.214764] x11: 00000000ffffefff x10: c0000000ffffefff x9 : 4762cb2ccb52de00
			
 
				+[    1.221914] x8 : 4762cb2ccb52de00 x7 : 205d313431303231 x6 : 312e31202020205b
			
 
				+[    1.229063] x5 : ffff800009d55c1f x4 : 0000000000000001 x3 : 0000000000000000
			
 
				+[    1.236213] x2 : 0000000000000000 x1 : ffff800009954be6 x0 : 000000000000002a
			
 
				+[    1.243365] Call trace:
			
 
				+[    1.245806]  refcount_warn_saturate+0xdc/0x148
			
 
				+[    1.250253]  kobject_get+0x98/0x9c
			
 
				+[    1.253658]  of_node_get+0x20/0x34
			
 
				+[    1.257072]  of_fwnode_get+0x3c/0x54
			
 
				+[    1.260652]  fwnode_get_nth_parent+0xd8/0xf4
			
 
				+[    1.264926]  fwnode_full_name_string+0x3c/0xb4
			
 
				+[    1.269373]  device_node_string+0x498/0x5b4
			
 
				+[    1.273561]  pointer+0x41c/0x5d0
			
 
				+[    1.276793]  vsnprintf+0x4d8/0x694
			
 
				+[    1.280198]  vprintk_store+0x164/0x528
			
 
				+[    1.283951]  vprintk_emit+0x98/0x164
			
 
				+[    1.287530]  vprintk_default+0x44/0x6c
			
 
				+[    1.291284]  vprintk+0xf0/0x134
			
 
				+[    1.294428]  _printk+0x54/0x7c
			
 
				+[    1.297486]  of_node_release+0xe8/0x128
			
 
				+[    1.301326]  kobject_put+0x98/0xfc
			
 
				+[    1.304732]  of_node_put+0x1c/0x28
			
 
				+[    1.308137]  add_mtd_device+0x484/0x6d4
			
 
				+[    1.311977]  add_mtd_partitions+0xf0/0x1d0
			
 
				+[    1.316078]  parse_mtd_partitions+0x45c/0x518
			
 
				+[    1.320439]  mtd_device_parse_register+0xb0/0x274
			
 
				+[    1.325147]  gpmi_nand_probe+0x51c/0x650
			
 
				+[    1.329074]  platform_probe+0xa8/0xd0
			
 
				+[    1.332740]  really_probe+0x130/0x334
			
 
				+[    1.336406]  __driver_probe_device+0xb4/0xe0
			
 
				+[    1.340681]  driver_probe_device+0x3c/0x1f8
			
 
				+[    1.344869]  __driver_attach+0xdc/0x1a4
			
 
				+[    1.348708]  bus_for_each_dev+0x80/0xcc
			
 
				+[    1.352548]  driver_attach+0x24/0x30
			
 
				+[    1.356127]  bus_add_driver+0x108/0x1f4
			
 
				+[    1.359967]  driver_register+0x78/0x114
			
 
				+[    1.363807]  __platform_driver_register+0x24/0x30
			
 
				+[    1.368515]  gpmi_nand_driver_init+0x1c/0x28
			
 
				+[    1.372798]  do_one_initcall+0xbc/0x238
			
 
				+[    1.376638]  do_initcall_level+0x94/0xb4
			
 
				+[    1.380565]  do_initcalls+0x54/0x94
			
 
				+[    1.384058]  do_basic_setup+0x1c/0x28
			
 
				+[    1.387724]  kernel_init_freeable+0x110/0x188
			
 
				+[    1.392084]  kernel_init+0x20/0x1a0
			
 
				+[    1.395578]  ret_from_fork+0x10/0x20
			
 
				+[    1.399157] ---[ end trace 0000000000000000 ]---
			
 
				+[    1.403782] ------------[ cut here ]------------
			
 
				+
			
 
				+Reported-by: Han Xu <[email protected]>
			
 
				+Fixes: ad9b10d1eaada169 ("mtd: core: introduce of support for dynamic partitions")
			
 
				+Signed-off-by: Rafał Miłecki <[email protected]>
			
 
				+Tested-by: Han Xu <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/mtdcore.c | 2 +-
			
 
				+ 1 file changed, 1 insertion(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdcore.c
			
 
				++++ b/drivers/mtd/mtdcore.c
			
 
				+@@ -580,7 +580,7 @@ static void mtd_check_of_node(struct mtd
			
 
				+ 	if (!mtd_is_partition(mtd))
			
 
				+ 		return;
			
 
				+ 	parent = mtd->parent;
			
 
				+-	parent_dn = dev_of_node(&parent->dev);
			
 
				++	parent_dn = of_node_get(dev_of_node(&parent->dev));
			
 
				+ 	if (!parent_dn)
			
 
				+ 		return;
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/406-v6.2-0001-mtd-core-simplify-a-bit-code-find-partition-matching.patch
+++ b/target/linux/generic/backport-6.1/406-v6.2-0001-mtd-core-simplify-a-bit-code-find-partition-matching.patch
@@ -0,0 +1,65 @@
 
				+From 63db0cb35e1cb3b3c134906d1062f65513fdda2d Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <[email protected]>
			
 
				+Date: Tue, 4 Oct 2022 10:37:09 +0200
			
 
				+Subject: [PATCH] mtd: core: simplify (a bit) code find partition-matching
			
 
				+ dynamic OF node
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+1. Don't hardcode "partition-" string twice
			
 
				+2. Use simpler logic & use ->name to avoid of_property_read_string()
			
 
				+3. Use mtd_get_of_node() helper
			
 
				+
			
 
				+Cc: Christian Marangi <[email protected]>
			
 
				+Signed-off-by: Rafał Miłecki <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/mtdcore.c | 16 +++++++---------
			
 
				+ 1 file changed, 7 insertions(+), 9 deletions(-)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdcore.c
			
 
				++++ b/drivers/mtd/mtdcore.c
			
 
				+@@ -569,18 +569,16 @@ static void mtd_check_of_node(struct mtd
			
 
				+ 	struct device_node *partitions, *parent_dn, *mtd_dn = NULL;
			
 
				+ 	const char *pname, *prefix = "partition-";
			
 
				+ 	int plen, mtd_name_len, offset, prefix_len;
			
 
				+-	struct mtd_info *parent;
			
 
				+ 	bool found = false;
			
 
				+ 
			
 
				+ 	/* Check if MTD already has a device node */
			
 
				+-	if (dev_of_node(&mtd->dev))
			
 
				++	if (mtd_get_of_node(mtd))
			
 
				+ 		return;
			
 
				+ 
			
 
				+ 	/* Check if a partitions node exist */
			
 
				+ 	if (!mtd_is_partition(mtd))
			
 
				+ 		return;
			
 
				+-	parent = mtd->parent;
			
 
				+-	parent_dn = of_node_get(dev_of_node(&parent->dev));
			
 
				++	parent_dn = of_node_get(mtd_get_of_node(mtd->parent));
			
 
				+ 	if (!parent_dn)
			
 
				+ 		return;
			
 
				+ 
			
 
				+@@ -593,15 +591,15 @@ static void mtd_check_of_node(struct mtd
			
 
				+ 
			
 
				+ 	/* Search if a partition is defined with the same name */
			
 
				+ 	for_each_child_of_node(partitions, mtd_dn) {
			
 
				+-		offset = 0;
			
 
				+-
			
 
				+ 		/* Skip partition with no/wrong prefix */
			
 
				+-		if (!of_node_name_prefix(mtd_dn, "partition-"))
			
 
				++		if (!of_node_name_prefix(mtd_dn, prefix))
			
 
				+ 			continue;
			
 
				+ 
			
 
				+ 		/* Label have priority. Check that first */
			
 
				+-		if (of_property_read_string(mtd_dn, "label", &pname)) {
			
 
				+-			of_property_read_string(mtd_dn, "name", &pname);
			
 
				++		if (!of_property_read_string(mtd_dn, "label", &pname)) {
			
 
				++			offset = 0;
			
 
				++		} else {
			
 
				++			pname = mtd_dn->name;
			
 
				+ 			offset = prefix_len;
			
 
				+ 		}
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/406-v6.2-0002-mtd-core-try-to-find-OF-node-for-every-MTD-partition.patch
+++ b/target/linux/generic/backport-6.1/406-v6.2-0002-mtd-core-try-to-find-OF-node-for-every-MTD-partition.patch
@@ -0,0 +1,84 @@
 
				+From ddb8cefb7af288950447ca6eeeafb09977dab56f Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <[email protected]>
			
 
				+Date: Tue, 4 Oct 2022 10:37:10 +0200
			
 
				+Subject: [PATCH] mtd: core: try to find OF node for every MTD partition
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+So far this feature was limited to the top-level "nvmem-cells" node.
			
 
				+There are multiple parsers creating partitions and subpartitions
			
 
				+dynamically. Extend that code to handle them too.
			
 
				+
			
 
				+This allows finding partition-* node for every MTD (sub)partition.
			
 
				+
			
 
				+Random example:
			
 
				+
			
 
				+partitions {
			
 
				+	compatible = "brcm,bcm947xx-cfe-partitions";
			
 
				+
			
 
				+	partition-firmware {
			
 
				+		compatible = "brcm,trx";
			
 
				+
			
 
				+		partition-loader {
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+Cc: Christian Marangi <[email protected]>
			
 
				+Signed-off-by: Rafał Miłecki <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/mtdcore.c | 18 ++++++------------
			
 
				+ 1 file changed, 6 insertions(+), 12 deletions(-)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdcore.c
			
 
				++++ b/drivers/mtd/mtdcore.c
			
 
				+@@ -569,20 +569,22 @@ static void mtd_check_of_node(struct mtd
			
 
				+ 	struct device_node *partitions, *parent_dn, *mtd_dn = NULL;
			
 
				+ 	const char *pname, *prefix = "partition-";
			
 
				+ 	int plen, mtd_name_len, offset, prefix_len;
			
 
				+-	bool found = false;
			
 
				+ 
			
 
				+ 	/* Check if MTD already has a device node */
			
 
				+ 	if (mtd_get_of_node(mtd))
			
 
				+ 		return;
			
 
				+ 
			
 
				+-	/* Check if a partitions node exist */
			
 
				+ 	if (!mtd_is_partition(mtd))
			
 
				+ 		return;
			
 
				++
			
 
				+ 	parent_dn = of_node_get(mtd_get_of_node(mtd->parent));
			
 
				+ 	if (!parent_dn)
			
 
				+ 		return;
			
 
				+ 
			
 
				+-	partitions = of_get_child_by_name(parent_dn, "partitions");
			
 
				++	if (mtd_is_partition(mtd->parent))
			
 
				++		partitions = of_node_get(parent_dn);
			
 
				++	else
			
 
				++		partitions = of_get_child_by_name(parent_dn, "partitions");
			
 
				+ 	if (!partitions)
			
 
				+ 		goto exit_parent;
			
 
				+ 
			
 
				+@@ -606,19 +608,11 @@ static void mtd_check_of_node(struct mtd
			
 
				+ 		plen = strlen(pname) - offset;
			
 
				+ 		if (plen == mtd_name_len &&
			
 
				+ 		    !strncmp(mtd->name, pname + offset, plen)) {
			
 
				+-			found = true;
			
 
				++			mtd_set_of_node(mtd, mtd_dn);
			
 
				+ 			break;
			
 
				+ 		}
			
 
				+ 	}
			
 
				+ 
			
 
				+-	if (!found)
			
 
				+-		goto exit_partitions;
			
 
				+-
			
 
				+-	/* Set of_node only for nvmem */
			
 
				+-	if (of_device_is_compatible(mtd_dn, "nvmem-cells"))
			
 
				+-		mtd_set_of_node(mtd, mtd_dn);
			
 
				+-
			
 
				+-exit_partitions:
			
 
				+ 	of_node_put(partitions);
			
 
				+ exit_parent:
			
 
				+ 	of_node_put(parent_dn);
			
--- a/target/linux/generic/backport-6.1/407-v5.17-mtd-parsers-qcom-Don-t-print-error-message-on-EPROBE.patch
+++ b/target/linux/generic/backport-6.1/407-v5.17-mtd-parsers-qcom-Don-t-print-error-message-on-EPROBE.patch
@@ -0,0 +1,32 @@
 
				+From 26bccc9671ba5e01f7153addbe94e7dc3f677375 Mon Sep 17 00:00:00 2001
			
 
				+From: Bryan O'Donoghue <[email protected]>
			
 
				+Date: Mon, 3 Jan 2022 03:03:16 +0000
			
 
				+Subject: [PATCH 13/14] mtd: parsers: qcom: Don't print error message on
			
 
				+ -EPROBE_DEFER
			
 
				+
			
 
				+Its possible for the main smem driver to not be loaded by the time we come
			
 
				+along to parse the smem partition description but, this is a perfectly
			
 
				+normal thing.
			
 
				+
			
 
				+No need to print out an error message in this case.
			
 
				+
			
 
				+Signed-off-by: Bryan O'Donoghue <[email protected]>
			
 
				+Reviewed-by: Manivannan Sadhasivam <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/parsers/qcomsmempart.c | 3 ++-
			
 
				+ 1 file changed, 2 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/drivers/mtd/parsers/qcomsmempart.c
			
 
				++++ b/drivers/mtd/parsers/qcomsmempart.c
			
 
				+@@ -75,7 +75,8 @@ static int parse_qcomsmem_part(struct mt
			
 
				+ 	pr_debug("Parsing partition table info from SMEM\n");
			
 
				+ 	ptable = qcom_smem_get(SMEM_APPS, SMEM_AARM_PARTITION_TABLE, &len);
			
 
				+ 	if (IS_ERR(ptable)) {
			
 
				+-		pr_err("Error reading partition table header\n");
			
 
				++		if (PTR_ERR(ptable) != -EPROBE_DEFER)
			
 
				++			pr_err("Error reading partition table header\n");
			
 
				+ 		return PTR_ERR(ptable);
			
 
				+ 	}
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/408-v6.2-mtd-core-set-ROOT_DEV-for-partitions-marked-as-rootf.patch
+++ b/target/linux/generic/backport-6.1/408-v6.2-mtd-core-set-ROOT_DEV-for-partitions-marked-as-rootf.patch
@@ -0,0 +1,47 @@
 
				+From 26422ac78e9d8767bd4aabfbae616b15edbf6a1b Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <[email protected]>
			
 
				+Date: Sat, 22 Oct 2022 23:13:18 +0200
			
 
				+Subject: [PATCH] mtd: core: set ROOT_DEV for partitions marked as rootfs in DT
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+This adds support for "linux,rootfs" binding that is used to mark flash
			
 
				+partition containing rootfs. It's useful for devices using device tree
			
 
				+that don't have bootloader passing root info in cmdline.
			
 
				+
			
 
				+Signed-off-by: Rafał Miłecki <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/mtdcore.c | 12 ++++++++++++
			
 
				+ 1 file changed, 12 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdcore.c
			
 
				++++ b/drivers/mtd/mtdcore.c
			
 
				+@@ -28,6 +28,7 @@
			
 
				+ #include <linux/leds.h>
			
 
				+ #include <linux/debugfs.h>
			
 
				+ #include <linux/nvmem-provider.h>
			
 
				++#include <linux/root_dev.h>
			
 
				+ 
			
 
				+ #include <linux/mtd/mtd.h>
			
 
				+ #include <linux/mtd/partitions.h>
			
 
				+@@ -748,6 +749,17 @@ int add_mtd_device(struct mtd_info *mtd)
			
 
				+ 		not->add(mtd);
			
 
				+ 
			
 
				+ 	mutex_unlock(&mtd_table_mutex);
			
 
				++
			
 
				++	if (of_find_property(mtd_get_of_node(mtd), "linux,rootfs", NULL)) {
			
 
				++		if (IS_BUILTIN(CONFIG_MTD)) {
			
 
				++			pr_info("mtd: setting mtd%d (%s) as root device\n", mtd->index, mtd->name);
			
 
				++			ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index);
			
 
				++		} else {
			
 
				++			pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n",
			
 
				++				mtd->index, mtd->name);
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				+ 	/* We _know_ we aren't being removed, because
			
 
				+ 	   our caller is still holding us here. So none
			
 
				+ 	   of this try_ nonsense, and no bitching about it
			
--- a/target/linux/generic/backport-6.1/410-v5.18-mtd-parsers-trx-allow-to-use-on-MediaTek-MIPS-SoCs.patch
+++ b/target/linux/generic/backport-6.1/410-v5.18-mtd-parsers-trx-allow-to-use-on-MediaTek-MIPS-SoCs.patch
@@ -0,0 +1,33 @@
 
				+From 2365f91c861cbfeef7141c69842848c7b2d3c2db Mon Sep 17 00:00:00 2001
			
 
				+From: INAGAKI Hiroshi <[email protected]>
			
 
				+Date: Sun, 13 Feb 2022 15:40:44 +0900
			
 
				+Subject: [PATCH] mtd: parsers: trx: allow to use on MediaTek MIPS SoCs
			
 
				+
			
 
				+Buffalo sells some router devices which have trx-formatted firmware,
			
 
				+based on MediaTek MIPS SoCs. To use parser_trx on those devices, add
			
 
				+"RALINK" to dependency and allow to compile for MediaTek MIPS SoCs.
			
 
				+
			
 
				+examples:
			
 
				+
			
 
				+- WCR-1166DS  (MT7628)
			
 
				+- WSR-1166DHP (MT7621)
			
 
				+- WSR-2533DHP (MT7621)
			
 
				+
			
 
				+Signed-off-by: INAGAKI Hiroshi <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/parsers/Kconfig | 2 +-
			
 
				+ 1 file changed, 1 insertion(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/drivers/mtd/parsers/Kconfig
			
 
				++++ b/drivers/mtd/parsers/Kconfig
			
 
				+@@ -115,7 +115,7 @@ config MTD_AFS_PARTS
			
 
				+ 
			
 
				+ config MTD_PARSER_TRX
			
 
				+ 	tristate "Parser for TRX format partitions"
			
 
				+-	depends on MTD && (BCM47XX || ARCH_BCM_5301X || ARCH_MEDIATEK || COMPILE_TEST)
			
 
				++	depends on MTD && (BCM47XX || ARCH_BCM_5301X || ARCH_MEDIATEK || RALINK || COMPILE_TEST)
			
 
				+ 	help
			
 
				+ 	  TRX is a firmware format used by Broadcom on their devices. It
			
 
				+ 	  may contain up to 3/4 partitions (depending on the version).
			
--- a/target/linux/generic/backport-6.1/420-v5.19-02-mtd-spinand-gigadevice-add-support-for-GD5FxGQ4xExxG.patch
+++ b/target/linux/generic/backport-6.1/420-v5.19-02-mtd-spinand-gigadevice-add-support-for-GD5FxGQ4xExxG.patch
@@ -0,0 +1,58 @@
 
				+From 573eec222bc82fb5e724586267fbbb1aed9ffd03 Mon Sep 17 00:00:00 2001
			
 
				+From: Chuanhong Guo <[email protected]>
			
 
				+Date: Sun, 20 Mar 2022 17:59:58 +0800
			
 
				+Subject: [PATCH 2/5] mtd: spinand: gigadevice: add support for GD5FxGQ4xExxG
			
 
				+
			
 
				+Add support for:
			
 
				+ GD5F1GQ4RExxG
			
 
				+ GD5F2GQ4{U,R}ExxG
			
 
				+
			
 
				+These chips differ from GD5F1GQ4UExxG only in chip ID, voltage
			
 
				+and capacity.
			
 
				+
			
 
				+Signed-off-by: Chuanhong Guo <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/nand/spi/gigadevice.c | 30 ++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 30 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/nand/spi/gigadevice.c
			
 
				++++ b/drivers/mtd/nand/spi/gigadevice.c
			
 
				+@@ -333,6 +333,36 @@ static const struct spinand_info gigadev
			
 
				+ 		     SPINAND_HAS_QE_BIT,
			
 
				+ 		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				+ 				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F1GQ4RExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xc1),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F2GQ4UExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xd2),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F2GQ4RExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xc2),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				+ 	SPINAND_INFO("GD5F1GQ4UFxxG",
			
 
				+ 		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE, 0xb1, 0x48),
			
 
				+ 		     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
			
--- a/target/linux/generic/backport-6.1/420-v5.19-03-mtd-spinand-gigadevice-add-support-for-GD5F1GQ5RExxG.patch
+++ b/target/linux/generic/backport-6.1/420-v5.19-03-mtd-spinand-gigadevice-add-support-for-GD5F1GQ5RExxG.patch
@@ -0,0 +1,33 @@
 
				+From 620a988813403318023296b61228ee8f3fcdb8e0 Mon Sep 17 00:00:00 2001
			
 
				+From: Chuanhong Guo <[email protected]>
			
 
				+Date: Sun, 20 Mar 2022 17:59:59 +0800
			
 
				+Subject: [PATCH 3/5] mtd: spinand: gigadevice: add support for GD5F1GQ5RExxG
			
 
				+
			
 
				+This chip is the 1.8v version of GD5F1GQ5UExxG.
			
 
				+
			
 
				+Signed-off-by: Chuanhong Guo <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/nand/spi/gigadevice.c | 10 ++++++++++
			
 
				+ 1 file changed, 10 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/nand/spi/gigadevice.c
			
 
				++++ b/drivers/mtd/nand/spi/gigadevice.c
			
 
				+@@ -383,6 +383,16 @@ static const struct spinand_info gigadev
			
 
				+ 		     SPINAND_HAS_QE_BIT,
			
 
				+ 		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				+ 				     gd5fxgq5xexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F1GQ5RExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x41),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(4, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq5xexxg_ecc_get_status)),
			
 
				+ };
			
 
				+ 
			
 
				+ static const struct spinand_manufacturer_ops gigadevice_spinand_manuf_ops = {
			
--- a/target/linux/generic/backport-6.1/420-v5.19-04-mtd-spinand-gigadevice-add-support-for-GD5F-2-4-GQ5x.patch
+++ b/target/linux/generic/backport-6.1/420-v5.19-04-mtd-spinand-gigadevice-add-support-for-GD5F-2-4-GQ5x.patch
@@ -0,0 +1,84 @@
 
				+From 194ec04b3a9e7fa97d1fbef296410631bc3cf1c8 Mon Sep 17 00:00:00 2001
			
 
				+From: Chuanhong Guo <[email protected]>
			
 
				+Date: Sun, 20 Mar 2022 18:00:00 +0800
			
 
				+Subject: [PATCH 4/5] mtd: spinand: gigadevice: add support for GD5F{2,
			
 
				+ 4}GQ5xExxG
			
 
				+
			
 
				+Add support for:
			
 
				+ GD5F2GQ5{U,R}ExxG
			
 
				+ GD5F4GQ6{U,R}ExxG
			
 
				+
			
 
				+These chips uses 4 dummy bytes for quad io and 2 dummy bytes for dual io.
			
 
				+Besides that and memory layout, they are identical to their 1G variant.
			
 
				+
			
 
				+Signed-off-by: Chuanhong Guo <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/nand/spi/gigadevice.c | 48 +++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 48 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/nand/spi/gigadevice.c
			
 
				++++ b/drivers/mtd/nand/spi/gigadevice.c
			
 
				+@@ -47,6 +47,14 @@ static SPINAND_OP_VARIANTS(read_cache_va
			
 
				+ 		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
			
 
				+ 		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
			
 
				+ 
			
 
				++static SPINAND_OP_VARIANTS(read_cache_variants_2gq5,
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 4, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 2, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
			
 
				++		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
			
 
				++
			
 
				+ static SPINAND_OP_VARIANTS(write_cache_variants,
			
 
				+ 		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
			
 
				+ 		SPINAND_PROG_LOAD(true, 0, NULL, 0));
			
 
				+@@ -391,6 +399,46 @@ static const struct spinand_info gigadev
			
 
				+ 					      &write_cache_variants,
			
 
				+ 					      &update_cache_variants),
			
 
				+ 		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq5xexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F2GQ5UExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x52),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(4, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_2gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq5xexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F2GQ5RExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x42),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(4, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_2gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq5xexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F4GQ6UExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x55),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 2, 1),
			
 
				++		     NAND_ECCREQ(4, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_2gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq5xexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F4GQ6RExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x45),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 2, 1),
			
 
				++		     NAND_ECCREQ(4, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_2gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				+ 		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				+ 				     gd5fxgq5xexxg_ecc_get_status)),
			
 
				+ };
			
--- a/target/linux/generic/backport-6.1/420-v5.19-05-mtd-spinand-gigadevice-add-support-for-GD5FxGM7xExxG.patch
+++ b/target/linux/generic/backport-6.1/420-v5.19-05-mtd-spinand-gigadevice-add-support-for-GD5FxGM7xExxG.patch
@@ -0,0 +1,91 @@
 
				+From 54647cd003c08b714474a5b599a147ec6a160486 Mon Sep 17 00:00:00 2001
			
 
				+From: Chuanhong Guo <[email protected]>
			
 
				+Date: Sun, 20 Mar 2022 18:00:01 +0800
			
 
				+Subject: [PATCH 5/5] mtd: spinand: gigadevice: add support for GD5FxGM7xExxG
			
 
				+
			
 
				+Add support for:
			
 
				+ GD5F{1,2}GM7{U,R}ExxG
			
 
				+ GD5F4GM8{U,R}ExxG
			
 
				+
			
 
				+These are new 27nm counterparts for the GD5FxGQ4 chips from GigaDevice
			
 
				+with 8b/512b on-die ECC capability.
			
 
				+These chips (and currently supported GD5FxGQ5 chips) have QIO DTR
			
 
				+instruction for reading page cache. It isn't added in this patch because
			
 
				+I don't have a DTR spi controller for testing.
			
 
				+
			
 
				+Signed-off-by: Chuanhong Guo <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/nand/spi/gigadevice.c | 60 +++++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 60 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/nand/spi/gigadevice.c
			
 
				++++ b/drivers/mtd/nand/spi/gigadevice.c
			
 
				+@@ -441,6 +441,66 @@ static const struct spinand_info gigadev
			
 
				+ 		     SPINAND_HAS_QE_BIT,
			
 
				+ 		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				+ 				     gd5fxgq5xexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F1GM7UExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x91),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F1GM7RExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x81),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F2GM7UExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x92),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F2GM7RExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x82),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F4GM8UExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x95),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 4096, 80, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				++	SPINAND_INFO("GD5F4GM8RExxG",
			
 
				++		     SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x85),
			
 
				++		     NAND_MEMORG(1, 2048, 128, 64, 4096, 80, 1, 1, 1),
			
 
				++		     NAND_ECCREQ(8, 512),
			
 
				++		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants_1gq5,
			
 
				++					      &write_cache_variants,
			
 
				++					      &update_cache_variants),
			
 
				++		     SPINAND_HAS_QE_BIT,
			
 
				++		     SPINAND_ECCINFO(&gd5fxgqx_variant2_ooblayout,
			
 
				++				     gd5fxgq4uexxg_ecc_get_status)),
			
 
				+ };
			
 
				+ 
			
 
				+ static const struct spinand_manufacturer_ops gigadevice_spinand_manuf_ops = {
			
--- a/target/linux/generic/backport-6.1/421-v6.2-mtd-parsers-add-TP-Link-SafeLoader-partitions-table-.patch
+++ b/target/linux/generic/backport-6.1/421-v6.2-mtd-parsers-add-TP-Link-SafeLoader-partitions-table-.patch
@@ -0,0 +1,229 @@
 
				+From aec4d5f5ffd0f0092bd9dc21ea90e0bc237d4b74 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <[email protected]>
			
 
				+Date: Sat, 15 Oct 2022 11:29:50 +0200
			
 
				+Subject: [PATCH] mtd: parsers: add TP-Link SafeLoader partitions table parser
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+This parser deals with most TP-Link home routers. It reads info about
			
 
				+partitions and registers them in the MTD subsystem.
			
 
				+
			
 
				+Example from TP-Link Archer C5 V2:
			
 
				+
			
 
				+spi-nor spi0.0: s25fl128s1 (16384 Kbytes)
			
 
				+15 tplink-safeloader partitions found on MTD device spi0.0
			
 
				+Creating 15 MTD partitions on "spi0.0":
			
 
				+0x000000000000-0x000000040000 : "fs-uboot"
			
 
				+0x000000040000-0x000000440000 : "os-image"
			
 
				+0x000000440000-0x000000e40000 : "rootfs"
			
 
				+0x000000e40000-0x000000e40200 : "default-mac"
			
 
				+0x000000e40200-0x000000e40400 : "pin"
			
 
				+0x000000e40400-0x000000e40600 : "product-info"
			
 
				+0x000000e50000-0x000000e60000 : "partition-table"
			
 
				+0x000000e60000-0x000000e60200 : "soft-version"
			
 
				+0x000000e61000-0x000000e70000 : "support-list"
			
 
				+0x000000e70000-0x000000e80000 : "profile"
			
 
				+0x000000e80000-0x000000e90000 : "default-config"
			
 
				+0x000000e90000-0x000000ee0000 : "user-config"
			
 
				+0x000000ee0000-0x000000fe0000 : "log"
			
 
				+0x000000fe0000-0x000000ff0000 : "radio_bk"
			
 
				+0x000000ff0000-0x000001000000 : "radio"
			
 
				+
			
 
				+Signed-off-by: Rafał Miłecki <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/parsers/Kconfig             |  15 +++
			
 
				+ drivers/mtd/parsers/Makefile            |   1 +
			
 
				+ drivers/mtd/parsers/tplink_safeloader.c | 150 ++++++++++++++++++++++++
			
 
				+ 3 files changed, 166 insertions(+)
			
 
				+ create mode 100644 drivers/mtd/parsers/tplink_safeloader.c
			
 
				+
			
 
				+--- a/drivers/mtd/parsers/Kconfig
			
 
				++++ b/drivers/mtd/parsers/Kconfig
			
 
				+@@ -113,6 +113,21 @@ config MTD_AFS_PARTS
			
 
				+ 	  for your particular device. It won't happen automatically. The
			
 
				+ 	  'physmap' map driver (CONFIG_MTD_PHYSMAP) does this, for example.
			
 
				+ 
			
 
				++config MTD_PARSER_TPLINK_SAFELOADER
			
 
				++	tristate "TP-Link Safeloader partitions parser"
			
 
				++	depends on MTD && (ARCH_BCM_5301X || ATH79 || SOC_MT7620 || SOC_MT7621 || COMPILE_TEST)
			
 
				++	help
			
 
				++	  TP-Link home routers use flash partitions to store various data. Info
			
 
				++	  about flash space layout is stored in a partitions table using a
			
 
				++	  custom ASCII-based format.
			
 
				++
			
 
				++	  That format was first found in devices with SafeLoader bootloader and
			
 
				++	  was named after it. Later it was adapted to CFE and U-Boot
			
 
				++	  bootloaders.
			
 
				++
			
 
				++	  This driver reads partitions table, parses it and creates MTD
			
 
				++	  partitions.
			
 
				++
			
 
				+ config MTD_PARSER_TRX
			
 
				+ 	tristate "Parser for TRX format partitions"
			
 
				+ 	depends on MTD && (BCM47XX || ARCH_BCM_5301X || ARCH_MEDIATEK || RALINK || COMPILE_TEST)
			
 
				+--- a/drivers/mtd/parsers/Makefile
			
 
				++++ b/drivers/mtd/parsers/Makefile
			
 
				+@@ -9,6 +9,7 @@ ofpart-$(CONFIG_MTD_OF_PARTS_BCM4908)	+=
			
 
				+ ofpart-$(CONFIG_MTD_OF_PARTS_LINKSYS_NS)+= ofpart_linksys_ns.o
			
 
				+ obj-$(CONFIG_MTD_PARSER_IMAGETAG)	+= parser_imagetag.o
			
 
				+ obj-$(CONFIG_MTD_AFS_PARTS)		+= afs.o
			
 
				++obj-$(CONFIG_MTD_PARSER_TPLINK_SAFELOADER)	+= tplink_safeloader.o
			
 
				+ obj-$(CONFIG_MTD_PARSER_TRX)		+= parser_trx.o
			
 
				+ obj-$(CONFIG_MTD_SERCOMM_PARTS)		+= scpart.o
			
 
				+ obj-$(CONFIG_MTD_SHARPSL_PARTS)		+= sharpslpart.o
			
 
				+--- /dev/null
			
 
				++++ b/drivers/mtd/parsers/tplink_safeloader.c
			
 
				+@@ -0,0 +1,150 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-only
			
 
				++/*
			
 
				++ * Copyright © 2022 Rafał Miłecki <[email protected]>
			
 
				++ */
			
 
				++
			
 
				++#include <linux/kernel.h>
			
 
				++#include <linux/module.h>
			
 
				++#include <linux/mtd/mtd.h>
			
 
				++#include <linux/mtd/partitions.h>
			
 
				++#include <linux/of.h>
			
 
				++#include <linux/slab.h>
			
 
				++
			
 
				++#define TPLINK_SAFELOADER_DATA_OFFSET		4
			
 
				++#define TPLINK_SAFELOADER_MAX_PARTS		32
			
 
				++
			
 
				++struct safeloader_cmn_header {
			
 
				++	__be32 size;
			
 
				++	uint32_t unused;
			
 
				++} __packed;
			
 
				++
			
 
				++static void *mtd_parser_tplink_safeloader_read_table(struct mtd_info *mtd)
			
 
				++{
			
 
				++	struct safeloader_cmn_header hdr;
			
 
				++	struct device_node *np;
			
 
				++	size_t bytes_read;
			
 
				++	size_t offset;
			
 
				++	size_t size;
			
 
				++	char *buf;
			
 
				++	int err;
			
 
				++
			
 
				++	np = mtd_get_of_node(mtd);
			
 
				++	if (mtd_is_partition(mtd))
			
 
				++		of_node_get(np);
			
 
				++	else
			
 
				++		np = of_get_child_by_name(np, "partitions");
			
 
				++
			
 
				++	if (of_property_read_u32(np, "partitions-table-offset", (u32 *)&offset)) {
			
 
				++		pr_err("Failed to get partitions table offset\n");
			
 
				++		goto err_put;
			
 
				++	}
			
 
				++
			
 
				++	err = mtd_read(mtd, offset, sizeof(hdr), &bytes_read, (uint8_t *)&hdr);
			
 
				++	if (err && !mtd_is_bitflip(err)) {
			
 
				++		pr_err("Failed to read from %s at 0x%zx\n", mtd->name, offset);
			
 
				++		goto err_put;
			
 
				++	}
			
 
				++
			
 
				++	size = be32_to_cpu(hdr.size);
			
 
				++
			
 
				++	buf = kmalloc(size + 1, GFP_KERNEL);
			
 
				++	if (!buf)
			
 
				++		goto err_put;
			
 
				++
			
 
				++	err = mtd_read(mtd, offset + sizeof(hdr), size, &bytes_read, buf);
			
 
				++	if (err && !mtd_is_bitflip(err)) {
			
 
				++		pr_err("Failed to read from %s at 0x%zx\n", mtd->name, offset + sizeof(hdr));
			
 
				++		goto err_kfree;
			
 
				++	}
			
 
				++
			
 
				++	buf[size] = '\0';
			
 
				++
			
 
				++	of_node_put(np);
			
 
				++
			
 
				++	return buf;
			
 
				++
			
 
				++err_kfree:
			
 
				++	kfree(buf);
			
 
				++err_put:
			
 
				++	of_node_put(np);
			
 
				++	return NULL;
			
 
				++}
			
 
				++
			
 
				++static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd,
			
 
				++					      const struct mtd_partition **pparts,
			
 
				++					      struct mtd_part_parser_data *data)
			
 
				++{
			
 
				++	struct mtd_partition *parts;
			
 
				++	char name[65];
			
 
				++	size_t offset;
			
 
				++	size_t bytes;
			
 
				++	char *buf;
			
 
				++	int idx;
			
 
				++	int err;
			
 
				++
			
 
				++	parts = kcalloc(TPLINK_SAFELOADER_MAX_PARTS, sizeof(*parts), GFP_KERNEL);
			
 
				++	if (!parts) {
			
 
				++		err = -ENOMEM;
			
 
				++		goto err_out;
			
 
				++	}
			
 
				++
			
 
				++	buf = mtd_parser_tplink_safeloader_read_table(mtd);
			
 
				++	if (!buf) {
			
 
				++		err = -ENOENT;
			
 
				++		goto err_out;
			
 
				++	}
			
 
				++
			
 
				++	for (idx = 0, offset = TPLINK_SAFELOADER_DATA_OFFSET;
			
 
				++	     idx < TPLINK_SAFELOADER_MAX_PARTS &&
			
 
				++	     sscanf(buf + offset, "partition %64s base 0x%llx size 0x%llx%zn\n",
			
 
				++		    name, &parts[idx].offset, &parts[idx].size, &bytes) == 3;
			
 
				++	     idx++, offset += bytes + 1) {
			
 
				++		parts[idx].name = kstrdup(name, GFP_KERNEL);
			
 
				++		if (!parts[idx].name) {
			
 
				++			err = -ENOMEM;
			
 
				++			goto err_free;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	if (idx == TPLINK_SAFELOADER_MAX_PARTS)
			
 
				++		pr_warn("Reached maximum number of partitions!\n");
			
 
				++
			
 
				++	kfree(buf);
			
 
				++
			
 
				++	*pparts = parts;
			
 
				++
			
 
				++	return idx;
			
 
				++
			
 
				++err_free:
			
 
				++	for (idx -= 1; idx >= 0; idx--)
			
 
				++		kfree(parts[idx].name);
			
 
				++err_out:
			
 
				++	return err;
			
 
				++};
			
 
				++
			
 
				++static void mtd_parser_tplink_safeloader_cleanup(const struct mtd_partition *pparts,
			
 
				++						 int nr_parts)
			
 
				++{
			
 
				++	int i;
			
 
				++
			
 
				++	for (i = 0; i < nr_parts; i++)
			
 
				++		kfree(pparts[i].name);
			
 
				++
			
 
				++	kfree(pparts);
			
 
				++}
			
 
				++
			
 
				++static const struct of_device_id mtd_parser_tplink_safeloader_of_match_table[] = {
			
 
				++	{ .compatible = "tplink,safeloader-partitions" },
			
 
				++	{},
			
 
				++};
			
 
				++MODULE_DEVICE_TABLE(of, mtd_parser_tplink_safeloader_of_match_table);
			
 
				++
			
 
				++static struct mtd_part_parser mtd_parser_tplink_safeloader = {
			
 
				++	.parse_fn = mtd_parser_tplink_safeloader_parse,
			
 
				++	.cleanup = mtd_parser_tplink_safeloader_cleanup,
			
 
				++	.name = "tplink-safeloader",
			
 
				++	.of_match_table = mtd_parser_tplink_safeloader_of_match_table,
			
 
				++};
			
 
				++module_mtd_part_parser(mtd_parser_tplink_safeloader);
			
 
				++
			
 
				++MODULE_LICENSE("GPL");
			
--- a/target/linux/generic/backport-6.1/422-v5.19-mtd-spi-nor-support-eon-en25qh256a.patch
+++ b/target/linux/generic/backport-6.1/422-v5.19-mtd-spi-nor-support-eon-en25qh256a.patch
@@ -0,0 +1,49 @@
 
				+From 6abef37d16d0c570ef5a149e63762fba2a30804b Mon Sep 17 00:00:00 2001
			
 
				+From: "Leon M. George" <[email protected]>
			
 
				+Date: Wed, 30 Mar 2022 16:16:56 +0200
			
 
				+Subject: [PATCH] mtd: spi-nor: support eon en25qh256a variant
			
 
				+
			
 
				+The EN25QH256A variant of the EN25QH256 doesn't initialize correctly from SFDP
			
 
				+alone and only accesses memory below 8m (addr_width is 4 but read_opcode takes
			
 
				+only 3 bytes).
			
 
				+
			
 
				+Set SNOR_F_4B_OPCODES if the flash chip variant was detected using hwcaps.
			
 
				+
			
 
				+The fix submitted upstream uses the PARSE_SFDP initializer that is not
			
 
				+available in the kernel used with Openwrt.
			
 
				+
			
 
				+Signed-off-by: Leon M. George <[email protected]>
			
 
				+---
			
 
				+ drivers/mtd/spi-nor/eon.c | 11 +++++++++++
			
 
				+ 1 file changed, 11 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/spi-nor/eon.c
			
 
				++++ b/drivers/mtd/spi-nor/eon.c
			
 
				+@@ -8,6 +8,16 @@
			
 
				+ 
			
 
				+ #include "core.h"
			
 
				+ 
			
 
				++static void en25qh256_post_sfdp_fixups(struct spi_nor *nor)
			
 
				++{
			
 
				++	if (nor->params->hwcaps.mask & SNOR_HWCAPS_READ_1_1_4)
			
 
				++		nor->flags |= SNOR_F_4B_OPCODES;
			
 
				++}
			
 
				++
			
 
				++static const struct spi_nor_fixups en25qh256_fixups = {
			
 
				++	.post_sfdp = en25qh256_post_sfdp_fixups,
			
 
				++};
			
 
				++
			
 
				+ static const struct flash_info eon_parts[] = {
			
 
				+ 	/* EON -- en25xxx */
			
 
				+ 	{ "en25f32",    INFO(0x1c3116, 0, 64 * 1024,   64, SECT_4K) },
			
 
				+@@ -23,7 +33,9 @@ static const struct flash_info eon_parts
			
 
				+ 	{ "en25qh64",   INFO(0x1c7017, 0, 64 * 1024,  128,
			
 
				+ 			     SECT_4K | SPI_NOR_DUAL_READ) },
			
 
				+ 	{ "en25qh128",  INFO(0x1c7018, 0, 64 * 1024,  256, 0) },
			
 
				+-	{ "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512, 0) },
			
 
				++	{ "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512,
			
 
				++		SPI_NOR_DUAL_READ)
			
 
				++		.fixups = &en25qh256_fixups },
			
 
				+ 	{ "en25s64",	INFO(0x1c3817, 0, 64 * 1024,  128, SECT_4K) },
			
 
				+ };
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/423-v6.1-0001-mtd-track-maximum-number-of-bitflips-for-each-read-r.patch
+++ b/target/linux/generic/backport-6.1/423-v6.1-0001-mtd-track-maximum-number-of-bitflips-for-each-read-r.patch
@@ -0,0 +1,73 @@
 
				+From e237285113963bd1dd2e925770aa8b3aa8a1894c Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= <[email protected]>
			
 
				+Date: Wed, 29 Jun 2022 14:57:34 +0200
			
 
				+Subject: [PATCH 1/4] mtd: track maximum number of bitflips for each read
			
 
				+ request
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+mtd_read_oob() callers are currently oblivious to the details of ECC
			
 
				+errors detected during the read operation - they only learn (through the
			
 
				+return value) whether any corrected bitflips or uncorrectable errors
			
 
				+occurred.  More detailed ECC information can be useful to user-space
			
 
				+applications for making better-informed choices about moving data
			
 
				+around.
			
 
				+
			
 
				+Extend struct mtd_oob_ops with a pointer to a newly-introduced struct
			
 
				+mtd_req_stats and set its 'max_bitflips' field to the maximum number of
			
 
				+bitflips found in a single ECC step during the read operation performed
			
 
				+by mtd_read_oob().  This is a prerequisite for ultimately passing that
			
 
				+value back to user space.
			
 
				+
			
 
				+Suggested-by: Boris Brezillon <[email protected]>
			
 
				+Signed-off-by: Michał Kępień <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/mtdcore.c   | 5 +++++
			
 
				+ include/linux/mtd/mtd.h | 5 +++++
			
 
				+ 2 files changed, 10 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdcore.c
			
 
				++++ b/drivers/mtd/mtdcore.c
			
 
				+@@ -1676,6 +1676,9 @@ int mtd_read_oob(struct mtd_info *mtd, l
			
 
				+ 	if (!master->_read_oob && (!master->_read || ops->oobbuf))
			
 
				+ 		return -EOPNOTSUPP;
			
 
				+ 
			
 
				++	if (ops->stats)
			
 
				++		memset(ops->stats, 0, sizeof(*ops->stats));
			
 
				++
			
 
				+ 	if (mtd->flags & MTD_SLC_ON_MLC_EMULATION)
			
 
				+ 		ret_code = mtd_io_emulated_slc(mtd, from, true, ops);
			
 
				+ 	else
			
 
				+@@ -1693,6 +1696,8 @@ int mtd_read_oob(struct mtd_info *mtd, l
			
 
				+ 		return ret_code;
			
 
				+ 	if (mtd->ecc_strength == 0)
			
 
				+ 		return 0;	/* device lacks ecc */
			
 
				++	if (ops->stats)
			
 
				++		ops->stats->max_bitflips = ret_code;
			
 
				+ 	return ret_code >= mtd->bitflip_threshold ? -EUCLEAN : 0;
			
 
				+ }
			
 
				+ EXPORT_SYMBOL_GPL(mtd_read_oob);
			
 
				+--- a/include/linux/mtd/mtd.h
			
 
				++++ b/include/linux/mtd/mtd.h
			
 
				+@@ -40,6 +40,10 @@ struct mtd_erase_region_info {
			
 
				+ 	unsigned long *lockmap;		/* If keeping bitmap of locks */
			
 
				+ };
			
 
				+ 
			
 
				++struct mtd_req_stats {
			
 
				++	unsigned int max_bitflips;
			
 
				++};
			
 
				++
			
 
				+ /**
			
 
				+  * struct mtd_oob_ops - oob operation operands
			
 
				+  * @mode:	operation mode
			
 
				+@@ -70,6 +74,7 @@ struct mtd_oob_ops {
			
 
				+ 	uint32_t	ooboffs;
			
 
				+ 	uint8_t		*datbuf;
			
 
				+ 	uint8_t		*oobbuf;
			
 
				++	struct mtd_req_stats *stats;
			
 
				+ };
			
 
				+ 
			
 
				+ #define MTD_MAX_OOBFREE_ENTRIES_LARGE	32
			
--- a/target/linux/generic/backport-6.1/423-v6.1-0002-mtd-always-initialize-stats-in-struct-mtd_oob_ops.patch
+++ b/target/linux/generic/backport-6.1/423-v6.1-0002-mtd-always-initialize-stats-in-struct-mtd_oob_ops.patch
@@ -0,0 +1,325 @@
 
				+From e97709c9d18903f5acd5fbe2985dd054da0432b1 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= <[email protected]>
			
 
				+Date: Wed, 29 Jun 2022 14:57:35 +0200
			
 
				+Subject: [PATCH 2/4] mtd: always initialize 'stats' in struct mtd_oob_ops
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+As the 'stats' field in struct mtd_oob_ops is used in conditional
			
 
				+expressions, ensure it is always zero-initialized in all such structures
			
 
				+to prevent random stack garbage from being interpreted as a pointer.
			
 
				+
			
 
				+Strictly speaking, this problem currently only needs to be fixed for
			
 
				+struct mtd_oob_ops structures subsequently passed to mtd_read_oob().
			
 
				+However, this commit goes a step further and makes all instances of
			
 
				+struct mtd_oob_ops in the tree zero-initialized, in hope of preventing
			
 
				+future problems, e.g. if struct mtd_req_stats gets extended with write
			
 
				+statistics at some point.
			
 
				+
			
 
				+Signed-off-by: Michał Kępień <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/inftlcore.c                 | 6 +++---
			
 
				+ drivers/mtd/mtdswap.c                   | 6 +++---
			
 
				+ drivers/mtd/nand/onenand/onenand_base.c | 4 ++--
			
 
				+ drivers/mtd/nand/onenand/onenand_bbt.c  | 2 +-
			
 
				+ drivers/mtd/nand/raw/nand_bbt.c         | 8 ++++----
			
 
				+ drivers/mtd/nand/raw/sm_common.c        | 2 +-
			
 
				+ drivers/mtd/nftlcore.c                  | 6 +++---
			
 
				+ drivers/mtd/sm_ftl.c                    | 4 ++--
			
 
				+ drivers/mtd/ssfdc.c                     | 2 +-
			
 
				+ drivers/mtd/tests/nandbiterrs.c         | 2 +-
			
 
				+ drivers/mtd/tests/oobtest.c             | 8 ++++----
			
 
				+ drivers/mtd/tests/readtest.c            | 2 +-
			
 
				+ fs/jffs2/wbuf.c                         | 6 +++---
			
 
				+ 13 files changed, 29 insertions(+), 29 deletions(-)
			
 
				+
			
 
				+--- a/drivers/mtd/inftlcore.c
			
 
				++++ b/drivers/mtd/inftlcore.c
			
 
				+@@ -136,7 +136,7 @@ static void inftl_remove_dev(struct mtd_
			
 
				+ int inftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len,
			
 
				+ 		   size_t *retlen, uint8_t *buf)
			
 
				+ {
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int res;
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_PLACE_OOB;
			
 
				+@@ -156,7 +156,7 @@ int inftl_read_oob(struct mtd_info *mtd,
			
 
				+ int inftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len,
			
 
				+ 		    size_t *retlen, uint8_t *buf)
			
 
				+ {
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int res;
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_PLACE_OOB;
			
 
				+@@ -176,7 +176,7 @@ int inftl_write_oob(struct mtd_info *mtd
			
 
				+ static int inftl_write(struct mtd_info *mtd, loff_t offs, size_t len,
			
 
				+ 		       size_t *retlen, uint8_t *buf, uint8_t *oob)
			
 
				+ {
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int res;
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_PLACE_OOB;
			
 
				+--- a/drivers/mtd/mtdswap.c
			
 
				++++ b/drivers/mtd/mtdswap.c
			
 
				+@@ -323,7 +323,7 @@ static int mtdswap_read_markers(struct m
			
 
				+ 	struct mtdswap_oobdata *data, *data2;
			
 
				+ 	int ret;
			
 
				+ 	loff_t offset;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 
			
 
				+ 	offset = mtdswap_eb_offset(d, eb);
			
 
				+ 
			
 
				+@@ -370,7 +370,7 @@ static int mtdswap_write_marker(struct m
			
 
				+ 	struct mtdswap_oobdata n;
			
 
				+ 	int ret;
			
 
				+ 	loff_t offset;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 
			
 
				+ 	ops.ooboffs = 0;
			
 
				+ 	ops.oobbuf = (uint8_t *)&n;
			
 
				+@@ -879,7 +879,7 @@ static unsigned int mtdswap_eblk_passes(
			
 
				+ 	loff_t base, pos;
			
 
				+ 	unsigned int *p1 = (unsigned int *)d->page_buf;
			
 
				+ 	unsigned char *p2 = (unsigned char *)d->oob_buf;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_AUTO_OOB;
			
 
				+--- a/drivers/mtd/nand/onenand/onenand_base.c
			
 
				++++ b/drivers/mtd/nand/onenand/onenand_base.c
			
 
				+@@ -2935,7 +2935,7 @@ static int do_otp_write(struct mtd_info
			
 
				+ 	struct onenand_chip *this = mtd->priv;
			
 
				+ 	unsigned char *pbuf = buf;
			
 
				+ 	int ret;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 
			
 
				+ 	/* Force buffer page aligned */
			
 
				+ 	if (len < mtd->writesize) {
			
 
				+@@ -2977,7 +2977,7 @@ static int do_otp_lock(struct mtd_info *
			
 
				+ 		size_t *retlen, u_char *buf)
			
 
				+ {
			
 
				+ 	struct onenand_chip *this = mtd->priv;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+ 	if (FLEXONENAND(this)) {
			
 
				+--- a/drivers/mtd/nand/onenand/onenand_bbt.c
			
 
				++++ b/drivers/mtd/nand/onenand/onenand_bbt.c
			
 
				+@@ -61,7 +61,7 @@ static int create_bbt(struct mtd_info *m
			
 
				+ 	int startblock;
			
 
				+ 	loff_t from;
			
 
				+ 	size_t readlen, ooblen;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int rgn;
			
 
				+ 
			
 
				+ 	printk(KERN_INFO "Scanning device for bad blocks\n");
			
 
				+--- a/drivers/mtd/nand/raw/nand_bbt.c
			
 
				++++ b/drivers/mtd/nand/raw/nand_bbt.c
			
 
				+@@ -313,7 +313,7 @@ static int scan_read_oob(struct nand_chi
			
 
				+ 			 size_t len)
			
 
				+ {
			
 
				+ 	struct mtd_info *mtd = nand_to_mtd(this);
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int res, ret = 0;
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_PLACE_OOB;
			
 
				+@@ -354,7 +354,7 @@ static int scan_write_bbt(struct nand_ch
			
 
				+ 			  uint8_t *buf, uint8_t *oob)
			
 
				+ {
			
 
				+ 	struct mtd_info *mtd = nand_to_mtd(this);
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_PLACE_OOB;
			
 
				+ 	ops.ooboffs = 0;
			
 
				+@@ -416,7 +416,7 @@ static int scan_block_fast(struct nand_c
			
 
				+ {
			
 
				+ 	struct mtd_info *mtd = nand_to_mtd(this);
			
 
				+ 
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int ret, page_offset;
			
 
				+ 
			
 
				+ 	ops.ooblen = mtd->oobsize;
			
 
				+@@ -756,7 +756,7 @@ static int write_bbt(struct nand_chip *t
			
 
				+ 	uint8_t rcode = td->reserved_block_code;
			
 
				+ 	size_t retlen, len = 0;
			
 
				+ 	loff_t to;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 
			
 
				+ 	ops.ooblen = mtd->oobsize;
			
 
				+ 	ops.ooboffs = 0;
			
 
				+--- a/drivers/mtd/nand/raw/sm_common.c
			
 
				++++ b/drivers/mtd/nand/raw/sm_common.c
			
 
				+@@ -99,7 +99,7 @@ static const struct mtd_ooblayout_ops oo
			
 
				+ static int sm_block_markbad(struct nand_chip *chip, loff_t ofs)
			
 
				+ {
			
 
				+ 	struct mtd_info *mtd = nand_to_mtd(chip);
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	struct sm_oob oob;
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+--- a/drivers/mtd/nftlcore.c
			
 
				++++ b/drivers/mtd/nftlcore.c
			
 
				+@@ -124,7 +124,7 @@ int nftl_read_oob(struct mtd_info *mtd,
			
 
				+ 		  size_t *retlen, uint8_t *buf)
			
 
				+ {
			
 
				+ 	loff_t mask = mtd->writesize - 1;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int res;
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_PLACE_OOB;
			
 
				+@@ -145,7 +145,7 @@ int nftl_write_oob(struct mtd_info *mtd,
			
 
				+ 		   size_t *retlen, uint8_t *buf)
			
 
				+ {
			
 
				+ 	loff_t mask = mtd->writesize - 1;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int res;
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_PLACE_OOB;
			
 
				+@@ -168,7 +168,7 @@ static int nftl_write(struct mtd_info *m
			
 
				+ 		      size_t *retlen, uint8_t *buf, uint8_t *oob)
			
 
				+ {
			
 
				+ 	loff_t mask = mtd->writesize - 1;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int res;
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_PLACE_OOB;
			
 
				+--- a/drivers/mtd/sm_ftl.c
			
 
				++++ b/drivers/mtd/sm_ftl.c
			
 
				+@@ -239,7 +239,7 @@ static int sm_read_sector(struct sm_ftl
			
 
				+ 			  uint8_t *buffer, struct sm_oob *oob)
			
 
				+ {
			
 
				+ 	struct mtd_info *mtd = ftl->trans->mtd;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	struct sm_oob tmp_oob;
			
 
				+ 	int ret = -EIO;
			
 
				+ 	int try = 0;
			
 
				+@@ -323,7 +323,7 @@ static int sm_write_sector(struct sm_ftl
			
 
				+ 			   int zone, int block, int boffset,
			
 
				+ 			   uint8_t *buffer, struct sm_oob *oob)
			
 
				+ {
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	struct mtd_info *mtd = ftl->trans->mtd;
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+--- a/drivers/mtd/ssfdc.c
			
 
				++++ b/drivers/mtd/ssfdc.c
			
 
				+@@ -163,7 +163,7 @@ static int read_physical_sector(struct m
			
 
				+ /* Read redundancy area (wrapper to MTD_READ_OOB */
			
 
				+ static int read_raw_oob(struct mtd_info *mtd, loff_t offs, uint8_t *buf)
			
 
				+ {
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_RAW;
			
 
				+--- a/drivers/mtd/tests/nandbiterrs.c
			
 
				++++ b/drivers/mtd/tests/nandbiterrs.c
			
 
				+@@ -99,7 +99,7 @@ static int write_page(int log)
			
 
				+ static int rewrite_page(int log)
			
 
				+ {
			
 
				+ 	int err = 0;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 
			
 
				+ 	if (log)
			
 
				+ 		pr_info("rewrite page\n");
			
 
				+--- a/drivers/mtd/tests/oobtest.c
			
 
				++++ b/drivers/mtd/tests/oobtest.c
			
 
				+@@ -56,7 +56,7 @@ static void do_vary_offset(void)
			
 
				+ static int write_eraseblock(int ebnum)
			
 
				+ {
			
 
				+ 	int i;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int err = 0;
			
 
				+ 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
			
 
				+ 
			
 
				+@@ -165,7 +165,7 @@ static size_t memffshow(loff_t addr, lof
			
 
				+ static int verify_eraseblock(int ebnum)
			
 
				+ {
			
 
				+ 	int i;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int err = 0;
			
 
				+ 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
			
 
				+ 	size_t bitflips;
			
 
				+@@ -260,7 +260,7 @@ static int verify_eraseblock(int ebnum)
			
 
				+ 
			
 
				+ static int verify_eraseblock_in_one_go(int ebnum)
			
 
				+ {
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int err = 0;
			
 
				+ 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
			
 
				+ 	size_t len = mtd->oobavail * pgcnt;
			
 
				+@@ -338,7 +338,7 @@ static int __init mtd_oobtest_init(void)
			
 
				+ 	int err = 0;
			
 
				+ 	unsigned int i;
			
 
				+ 	uint64_t tmp;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	loff_t addr = 0, addr0;
			
 
				+ 
			
 
				+ 	printk(KERN_INFO "\n");
			
 
				+--- a/drivers/mtd/tests/readtest.c
			
 
				++++ b/drivers/mtd/tests/readtest.c
			
 
				+@@ -47,7 +47,7 @@ static int read_eraseblock_by_page(int e
			
 
				+ 				err = ret;
			
 
				+ 		}
			
 
				+ 		if (mtd->oobsize) {
			
 
				+-			struct mtd_oob_ops ops;
			
 
				++			struct mtd_oob_ops ops = { };
			
 
				+ 
			
 
				+ 			ops.mode      = MTD_OPS_PLACE_OOB;
			
 
				+ 			ops.len       = 0;
			
 
				+--- a/fs/jffs2/wbuf.c
			
 
				++++ b/fs/jffs2/wbuf.c
			
 
				+@@ -1035,7 +1035,7 @@ int jffs2_check_oob_empty(struct jffs2_s
			
 
				+ {
			
 
				+ 	int i, ret;
			
 
				+ 	int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_AUTO_OOB;
			
 
				+ 	ops.ooblen = NR_OOB_SCAN_PAGES * c->oobavail;
			
 
				+@@ -1076,7 +1076,7 @@ int jffs2_check_oob_empty(struct jffs2_s
			
 
				+ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
			
 
				+ 				 struct jffs2_eraseblock *jeb)
			
 
				+ {
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int ret, cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_AUTO_OOB;
			
 
				+@@ -1101,7 +1101,7 @@ int jffs2_write_nand_cleanmarker(struct
			
 
				+ 				 struct jffs2_eraseblock *jeb)
			
 
				+ {
			
 
				+ 	int ret;
			
 
				+-	struct mtd_oob_ops ops;
			
 
				++	struct mtd_oob_ops ops = { };
			
 
				+ 	int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
			
 
				+ 
			
 
				+ 	ops.mode = MTD_OPS_AUTO_OOB;
			
--- a/target/linux/generic/backport-6.1/423-v6.1-0003-mtd-add-ECC-error-accounting-for-each-read-request.patch
+++ b/target/linux/generic/backport-6.1/423-v6.1-0003-mtd-add-ECC-error-accounting-for-each-read-request.patch
@@ -0,0 +1,172 @@
 
				+From 2ed18d818d1f7492172f8dd5904344c7d367e8ed Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= <[email protected]>
			
 
				+Date: Wed, 29 Jun 2022 14:57:36 +0200
			
 
				+Subject: [PATCH 3/4] mtd: add ECC error accounting for each read request
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Extend struct mtd_req_stats with two new fields holding the number of
			
 
				+corrected bitflips and uncorrectable errors detected during a read
			
 
				+operation.  This is a prerequisite for ultimately passing those counters
			
 
				+to user space, where they can be useful to applications for making
			
 
				+better-informed choices about moving data around.
			
 
				+
			
 
				+Unlike 'max_bitflips' (which is set - in a common code path - to the
			
 
				+return value of a function called while the MTD device's mutex is held),
			
 
				+these counters have to be maintained in each MTD driver which defines
			
 
				+the '_read_oob' callback because the statistics need to be calculated
			
 
				+while the MTD device's mutex is held.
			
 
				+
			
 
				+Suggested-by: Boris Brezillon <[email protected]>
			
 
				+Signed-off-by: Michał Kępień <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/devices/docg3.c             |  8 ++++++++
			
 
				+ drivers/mtd/nand/onenand/onenand_base.c | 12 ++++++++++++
			
 
				+ drivers/mtd/nand/raw/nand_base.c        | 10 ++++++++++
			
 
				+ drivers/mtd/nand/spi/core.c             | 10 ++++++++++
			
 
				+ include/linux/mtd/mtd.h                 |  2 ++
			
 
				+ 5 files changed, 42 insertions(+)
			
 
				+
			
 
				+--- a/drivers/mtd/devices/docg3.c
			
 
				++++ b/drivers/mtd/devices/docg3.c
			
 
				+@@ -871,6 +871,7 @@ static int doc_read_oob(struct mtd_info
			
 
				+ 	u8 *buf = ops->datbuf;
			
 
				+ 	size_t len, ooblen, nbdata, nboob;
			
 
				+ 	u8 hwecc[DOC_ECC_BCH_SIZE], eccconf1;
			
 
				++	struct mtd_ecc_stats old_stats;
			
 
				+ 	int max_bitflips = 0;
			
 
				+ 
			
 
				+ 	if (buf)
			
 
				+@@ -895,6 +896,7 @@ static int doc_read_oob(struct mtd_info
			
 
				+ 	ret = 0;
			
 
				+ 	skip = from % DOC_LAYOUT_PAGE_SIZE;
			
 
				+ 	mutex_lock(&docg3->cascade->lock);
			
 
				++	old_stats = mtd->ecc_stats;
			
 
				+ 	while (ret >= 0 && (len > 0 || ooblen > 0)) {
			
 
				+ 		calc_block_sector(from - skip, &block0, &block1, &page, &ofs,
			
 
				+ 			docg3->reliable);
			
 
				+@@ -966,6 +968,12 @@ static int doc_read_oob(struct mtd_info
			
 
				+ 	}
			
 
				+ 
			
 
				+ out:
			
 
				++	if (ops->stats) {
			
 
				++		ops->stats->uncorrectable_errors +=
			
 
				++			mtd->ecc_stats.failed - old_stats.failed;
			
 
				++		ops->stats->corrected_bitflips +=
			
 
				++			mtd->ecc_stats.corrected - old_stats.corrected;
			
 
				++	}
			
 
				+ 	mutex_unlock(&docg3->cascade->lock);
			
 
				+ 	return ret;
			
 
				+ err_in_read:
			
 
				+--- a/drivers/mtd/nand/onenand/onenand_base.c
			
 
				++++ b/drivers/mtd/nand/onenand/onenand_base.c
			
 
				+@@ -1440,6 +1440,7 @@ static int onenand_read_oob(struct mtd_i
			
 
				+ 			    struct mtd_oob_ops *ops)
			
 
				+ {
			
 
				+ 	struct onenand_chip *this = mtd->priv;
			
 
				++	struct mtd_ecc_stats old_stats;
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+ 	switch (ops->mode) {
			
 
				+@@ -1453,12 +1454,23 @@ static int onenand_read_oob(struct mtd_i
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	onenand_get_device(mtd, FL_READING);
			
 
				++
			
 
				++	old_stats = mtd->ecc_stats;
			
 
				++
			
 
				+ 	if (ops->datbuf)
			
 
				+ 		ret = ONENAND_IS_4KB_PAGE(this) ?
			
 
				+ 			onenand_mlc_read_ops_nolock(mtd, from, ops) :
			
 
				+ 			onenand_read_ops_nolock(mtd, from, ops);
			
 
				+ 	else
			
 
				+ 		ret = onenand_read_oob_nolock(mtd, from, ops);
			
 
				++
			
 
				++	if (ops->stats) {
			
 
				++		ops->stats->uncorrectable_errors +=
			
 
				++			mtd->ecc_stats.failed - old_stats.failed;
			
 
				++		ops->stats->corrected_bitflips +=
			
 
				++			mtd->ecc_stats.corrected - old_stats.corrected;
			
 
				++	}
			
 
				++
			
 
				+ 	onenand_release_device(mtd);
			
 
				+ 
			
 
				+ 	return ret;
			
 
				+--- a/drivers/mtd/nand/raw/nand_base.c
			
 
				++++ b/drivers/mtd/nand/raw/nand_base.c
			
 
				+@@ -3815,6 +3815,7 @@ static int nand_read_oob(struct mtd_info
			
 
				+ 			 struct mtd_oob_ops *ops)
			
 
				+ {
			
 
				+ 	struct nand_chip *chip = mtd_to_nand(mtd);
			
 
				++	struct mtd_ecc_stats old_stats;
			
 
				+ 	int ret;
			
 
				+ 
			
 
				+ 	ops->retlen = 0;
			
 
				+@@ -3826,11 +3827,20 @@ static int nand_read_oob(struct mtd_info
			
 
				+ 
			
 
				+ 	nand_get_device(chip);
			
 
				+ 
			
 
				++	old_stats = mtd->ecc_stats;
			
 
				++
			
 
				+ 	if (!ops->datbuf)
			
 
				+ 		ret = nand_do_read_oob(chip, from, ops);
			
 
				+ 	else
			
 
				+ 		ret = nand_do_read_ops(chip, from, ops);
			
 
				+ 
			
 
				++	if (ops->stats) {
			
 
				++		ops->stats->uncorrectable_errors +=
			
 
				++			mtd->ecc_stats.failed - old_stats.failed;
			
 
				++		ops->stats->corrected_bitflips +=
			
 
				++			mtd->ecc_stats.corrected - old_stats.corrected;
			
 
				++	}
			
 
				++
			
 
				+ 	nand_release_device(chip);
			
 
				+ 	return ret;
			
 
				+ }
			
 
				+--- a/drivers/mtd/nand/spi/core.c
			
 
				++++ b/drivers/mtd/nand/spi/core.c
			
 
				+@@ -629,6 +629,7 @@ static int spinand_mtd_read(struct mtd_i
			
 
				+ {
			
 
				+ 	struct spinand_device *spinand = mtd_to_spinand(mtd);
			
 
				+ 	struct nand_device *nand = mtd_to_nanddev(mtd);
			
 
				++	struct mtd_ecc_stats old_stats;
			
 
				+ 	unsigned int max_bitflips = 0;
			
 
				+ 	struct nand_io_iter iter;
			
 
				+ 	bool disable_ecc = false;
			
 
				+@@ -640,6 +641,8 @@ static int spinand_mtd_read(struct mtd_i
			
 
				+ 
			
 
				+ 	mutex_lock(&spinand->lock);
			
 
				+ 
			
 
				++	old_stats = mtd->ecc_stats;
			
 
				++
			
 
				+ 	nanddev_io_for_each_page(nand, NAND_PAGE_READ, from, ops, &iter) {
			
 
				+ 		if (disable_ecc)
			
 
				+ 			iter.req.mode = MTD_OPS_RAW;
			
 
				+@@ -662,6 +665,13 @@ static int spinand_mtd_read(struct mtd_i
			
 
				+ 		ops->oobretlen += iter.req.ooblen;
			
 
				+ 	}
			
 
				+ 
			
 
				++	if (ops->stats) {
			
 
				++		ops->stats->uncorrectable_errors +=
			
 
				++			mtd->ecc_stats.failed - old_stats.failed;
			
 
				++		ops->stats->corrected_bitflips +=
			
 
				++			mtd->ecc_stats.corrected - old_stats.corrected;
			
 
				++	}
			
 
				++
			
 
				+ 	mutex_unlock(&spinand->lock);
			
 
				+ 
			
 
				+ 	if (ecc_failed && !ret)
			
 
				+--- a/include/linux/mtd/mtd.h
			
 
				++++ b/include/linux/mtd/mtd.h
			
 
				+@@ -41,6 +41,8 @@ struct mtd_erase_region_info {
			
 
				+ };
			
 
				+ 
			
 
				+ struct mtd_req_stats {
			
 
				++	unsigned int uncorrectable_errors;
			
 
				++	unsigned int corrected_bitflips;
			
 
				+ 	unsigned int max_bitflips;
			
 
				+ };
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/423-v6.1-0004-mtdchar-add-MEMREAD-ioctl.patch
+++ b/target/linux/generic/backport-6.1/423-v6.1-0004-mtdchar-add-MEMREAD-ioctl.patch
@@ -0,0 +1,321 @@
 
				+From 2c9745d36e04ac27161acd78514f647b9b587ad4 Mon Sep 17 00:00:00 2001
			
 
				+From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= <[email protected]>
			
 
				+Date: Wed, 29 Jun 2022 14:57:37 +0200
			
 
				+Subject: [PATCH 4/4] mtdchar: add MEMREAD ioctl
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+User-space applications making use of MTD devices via /dev/mtd*
			
 
				+character devices currently have limited capabilities for reading data:
			
 
				+
			
 
				+  - only deprecated methods of accessing OOB layout information exist,
			
 
				+
			
 
				+  - there is no way to explicitly specify MTD operation mode to use; it
			
 
				+    is auto-selected based on the MTD file mode (MTD_FILE_MODE_*) set
			
 
				+    for the character device; in particular, this prevents using
			
 
				+    MTD_OPS_AUTO_OOB for reads,
			
 
				+
			
 
				+  - all existing user-space interfaces which cause mtd_read() or
			
 
				+    mtd_read_oob() to be called (via mtdchar_read() and
			
 
				+    mtdchar_read_oob(), respectively) return success even when those
			
 
				+    functions return -EUCLEAN or -EBADMSG; this renders user-space
			
 
				+    applications using these interfaces unaware of any corrected
			
 
				+    bitflips or uncorrectable ECC errors detected during reads.
			
 
				+
			
 
				+Note that the existing MEMWRITE ioctl allows the MTD operation mode to
			
 
				+be explicitly set, allowing user-space applications to write page data
			
 
				+and OOB data without requiring them to know anything about the OOB
			
 
				+layout of the MTD device they are writing to (MTD_OPS_AUTO_OOB).  Also,
			
 
				+the MEMWRITE ioctl does not mangle the return value of mtd_write_oob().
			
 
				+
			
 
				+Add a new ioctl, MEMREAD, which addresses the above issues.  It is
			
 
				+intended to be a read-side counterpart of the existing MEMWRITE ioctl.
			
 
				+Similarly to the latter, the read operation is performed in a loop which
			
 
				+processes at most mtd->erasesize bytes in each iteration.  This is done
			
 
				+to prevent unbounded memory allocations caused by calling kmalloc() with
			
 
				+the 'size' argument taken directly from the struct mtd_read_req provided
			
 
				+by user space.  However, the new ioctl is implemented so that the values
			
 
				+it returns match those that would have been returned if just a single
			
 
				+mtd_read_oob() call was issued to handle the entire read operation in
			
 
				+one go.
			
 
				+
			
 
				+Note that while just returning -EUCLEAN or -EBADMSG to user space would
			
 
				+already be a valid and useful indication of the ECC algorithm detecting
			
 
				+errors during a read operation, that signal would not be granular enough
			
 
				+to cover all use cases.  For example, knowing the maximum number of
			
 
				+bitflips detected in a single ECC step during a read operation performed
			
 
				+on a given page may be useful when dealing with an MTD partition whose
			
 
				+ECC layout varies across pages (e.g. a partition consisting of a
			
 
				+bootloader area using a "custom" ECC layout followed by data pages using
			
 
				+a "standard" ECC layout).  To address that, include ECC statistics in
			
 
				+the structure returned to user space by the new MEMREAD ioctl.
			
 
				+
			
 
				+Link: https://www.infradead.org/pipermail/linux-mtd/2016-April/067085.html
			
 
				+
			
 
				+Suggested-by: Boris Brezillon <[email protected]>
			
 
				+Signed-off-by: Michał Kępień <[email protected]>
			
 
				+Acked-by: Richard Weinberger <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/mtdchar.c      | 139 +++++++++++++++++++++++++++++++++++++
			
 
				+ include/uapi/mtd/mtd-abi.h |  64 +++++++++++++++--
			
 
				+ 2 files changed, 198 insertions(+), 5 deletions(-)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdchar.c
			
 
				++++ b/drivers/mtd/mtdchar.c
			
 
				+@@ -621,6 +621,137 @@ static int mtdchar_write_ioctl(struct mt
			
 
				+ 	return ret;
			
 
				+ }
			
 
				+ 
			
 
				++static int mtdchar_read_ioctl(struct mtd_info *mtd,
			
 
				++		struct mtd_read_req __user *argp)
			
 
				++{
			
 
				++	struct mtd_info *master = mtd_get_master(mtd);
			
 
				++	struct mtd_read_req req;
			
 
				++	void __user *usr_data, *usr_oob;
			
 
				++	uint8_t *datbuf = NULL, *oobbuf = NULL;
			
 
				++	size_t datbuf_len, oobbuf_len;
			
 
				++	size_t orig_len, orig_ooblen;
			
 
				++	int ret = 0;
			
 
				++
			
 
				++	if (copy_from_user(&req, argp, sizeof(req)))
			
 
				++		return -EFAULT;
			
 
				++
			
 
				++	orig_len = req.len;
			
 
				++	orig_ooblen = req.ooblen;
			
 
				++
			
 
				++	usr_data = (void __user *)(uintptr_t)req.usr_data;
			
 
				++	usr_oob = (void __user *)(uintptr_t)req.usr_oob;
			
 
				++
			
 
				++	if (!master->_read_oob)
			
 
				++		return -EOPNOTSUPP;
			
 
				++
			
 
				++	if (!usr_data)
			
 
				++		req.len = 0;
			
 
				++
			
 
				++	if (!usr_oob)
			
 
				++		req.ooblen = 0;
			
 
				++
			
 
				++	req.ecc_stats.uncorrectable_errors = 0;
			
 
				++	req.ecc_stats.corrected_bitflips = 0;
			
 
				++	req.ecc_stats.max_bitflips = 0;
			
 
				++
			
 
				++	req.len &= 0xffffffff;
			
 
				++	req.ooblen &= 0xffffffff;
			
 
				++
			
 
				++	if (req.start + req.len > mtd->size) {
			
 
				++		ret = -EINVAL;
			
 
				++		goto out;
			
 
				++	}
			
 
				++
			
 
				++	datbuf_len = min_t(size_t, req.len, mtd->erasesize);
			
 
				++	if (datbuf_len > 0) {
			
 
				++		datbuf = kvmalloc(datbuf_len, GFP_KERNEL);
			
 
				++		if (!datbuf) {
			
 
				++			ret = -ENOMEM;
			
 
				++			goto out;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	oobbuf_len = min_t(size_t, req.ooblen, mtd->erasesize);
			
 
				++	if (oobbuf_len > 0) {
			
 
				++		oobbuf = kvmalloc(oobbuf_len, GFP_KERNEL);
			
 
				++		if (!oobbuf) {
			
 
				++			ret = -ENOMEM;
			
 
				++			goto out;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				++	while (req.len > 0 || (!usr_data && req.ooblen > 0)) {
			
 
				++		struct mtd_req_stats stats;
			
 
				++		struct mtd_oob_ops ops = {
			
 
				++			.mode = req.mode,
			
 
				++			.len = min_t(size_t, req.len, datbuf_len),
			
 
				++			.ooblen = min_t(size_t, req.ooblen, oobbuf_len),
			
 
				++			.datbuf = datbuf,
			
 
				++			.oobbuf = oobbuf,
			
 
				++			.stats = &stats,
			
 
				++		};
			
 
				++
			
 
				++		/*
			
 
				++		 * Shorten non-page-aligned, eraseblock-sized reads so that the
			
 
				++		 * read ends on an eraseblock boundary.  This is necessary in
			
 
				++		 * order to prevent OOB data for some pages from being
			
 
				++		 * duplicated in the output of non-page-aligned reads requiring
			
 
				++		 * multiple mtd_read_oob() calls to be completed.
			
 
				++		 */
			
 
				++		if (ops.len == mtd->erasesize)
			
 
				++			ops.len -= mtd_mod_by_ws(req.start + ops.len, mtd);
			
 
				++
			
 
				++		ret = mtd_read_oob(mtd, (loff_t)req.start, &ops);
			
 
				++
			
 
				++		req.ecc_stats.uncorrectable_errors +=
			
 
				++			stats.uncorrectable_errors;
			
 
				++		req.ecc_stats.corrected_bitflips += stats.corrected_bitflips;
			
 
				++		req.ecc_stats.max_bitflips =
			
 
				++			max(req.ecc_stats.max_bitflips, stats.max_bitflips);
			
 
				++
			
 
				++		if (ret && !mtd_is_bitflip_or_eccerr(ret))
			
 
				++			break;
			
 
				++
			
 
				++		if (copy_to_user(usr_data, ops.datbuf, ops.retlen) ||
			
 
				++		    copy_to_user(usr_oob, ops.oobbuf, ops.oobretlen)) {
			
 
				++			ret = -EFAULT;
			
 
				++			break;
			
 
				++		}
			
 
				++
			
 
				++		req.start += ops.retlen;
			
 
				++		req.len -= ops.retlen;
			
 
				++		usr_data += ops.retlen;
			
 
				++
			
 
				++		req.ooblen -= ops.oobretlen;
			
 
				++		usr_oob += ops.oobretlen;
			
 
				++	}
			
 
				++
			
 
				++	/*
			
 
				++	 * As multiple iterations of the above loop (and therefore multiple
			
 
				++	 * mtd_read_oob() calls) may be necessary to complete the read request,
			
 
				++	 * adjust the final return code to ensure it accounts for all detected
			
 
				++	 * ECC errors.
			
 
				++	 */
			
 
				++	if (!ret || mtd_is_bitflip(ret)) {
			
 
				++		if (req.ecc_stats.uncorrectable_errors > 0)
			
 
				++			ret = -EBADMSG;
			
 
				++		else if (req.ecc_stats.corrected_bitflips > 0)
			
 
				++			ret = -EUCLEAN;
			
 
				++	}
			
 
				++
			
 
				++out:
			
 
				++	req.len = orig_len - req.len;
			
 
				++	req.ooblen = orig_ooblen - req.ooblen;
			
 
				++
			
 
				++	if (copy_to_user(argp, &req, sizeof(req)))
			
 
				++		ret = -EFAULT;
			
 
				++
			
 
				++	kvfree(datbuf);
			
 
				++	kvfree(oobbuf);
			
 
				++
			
 
				++	return ret;
			
 
				++}
			
 
				++
			
 
				+ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
			
 
				+ {
			
 
				+ 	struct mtd_file_info *mfi = file->private_data;
			
 
				+@@ -643,6 +774,7 @@ static int mtdchar_ioctl(struct file *fi
			
 
				+ 	case MEMGETINFO:
			
 
				+ 	case MEMREADOOB:
			
 
				+ 	case MEMREADOOB64:
			
 
				++	case MEMREAD:
			
 
				+ 	case MEMISLOCKED:
			
 
				+ 	case MEMGETOOBSEL:
			
 
				+ 	case MEMGETBADBLOCK:
			
 
				+@@ -817,6 +949,13 @@ static int mtdchar_ioctl(struct file *fi
			
 
				+ 		break;
			
 
				+ 	}
			
 
				+ 
			
 
				++	case MEMREAD:
			
 
				++	{
			
 
				++		ret = mtdchar_read_ioctl(mtd,
			
 
				++		      (struct mtd_read_req __user *)arg);
			
 
				++		break;
			
 
				++	}
			
 
				++
			
 
				+ 	case MEMLOCK:
			
 
				+ 	{
			
 
				+ 		struct erase_info_user einfo;
			
 
				+--- a/include/uapi/mtd/mtd-abi.h
			
 
				++++ b/include/uapi/mtd/mtd-abi.h
			
 
				+@@ -55,9 +55,9 @@ struct mtd_oob_buf64 {
			
 
				+  * @MTD_OPS_RAW:	data are transferred as-is, with no error correction;
			
 
				+  *			this mode implies %MTD_OPS_PLACE_OOB
			
 
				+  *
			
 
				+- * These modes can be passed to ioctl(MEMWRITE) and are also used internally.
			
 
				+- * See notes on "MTD file modes" for discussion on %MTD_OPS_RAW vs.
			
 
				+- * %MTD_FILE_MODE_RAW.
			
 
				++ * These modes can be passed to ioctl(MEMWRITE) and ioctl(MEMREAD); they are
			
 
				++ * also used internally. See notes on "MTD file modes" for discussion on
			
 
				++ * %MTD_OPS_RAW vs. %MTD_FILE_MODE_RAW.
			
 
				+  */
			
 
				+ enum {
			
 
				+ 	MTD_OPS_PLACE_OOB = 0,
			
 
				+@@ -91,6 +91,53 @@ struct mtd_write_req {
			
 
				+ 	__u8 padding[7];
			
 
				+ };
			
 
				+ 
			
 
				++/**
			
 
				++ * struct mtd_read_req_ecc_stats - ECC statistics for a read operation
			
 
				++ *
			
 
				++ * @uncorrectable_errors: the number of uncorrectable errors that happened
			
 
				++ *			  during the read operation
			
 
				++ * @corrected_bitflips: the number of bitflips corrected during the read
			
 
				++ *			operation
			
 
				++ * @max_bitflips: the maximum number of bitflips detected in any single ECC
			
 
				++ *		  step for the data read during the operation; this information
			
 
				++ *		  can be used to decide whether the data stored in a specific
			
 
				++ *		  region of the MTD device should be moved somewhere else to
			
 
				++ *		  avoid data loss.
			
 
				++ */
			
 
				++struct mtd_read_req_ecc_stats {
			
 
				++	__u32 uncorrectable_errors;
			
 
				++	__u32 corrected_bitflips;
			
 
				++	__u32 max_bitflips;
			
 
				++};
			
 
				++
			
 
				++/**
			
 
				++ * struct mtd_read_req - data structure for requesting a read operation
			
 
				++ *
			
 
				++ * @start:	start address
			
 
				++ * @len:	length of data buffer (only lower 32 bits are used)
			
 
				++ * @ooblen:	length of OOB buffer (only lower 32 bits are used)
			
 
				++ * @usr_data:	user-provided data buffer
			
 
				++ * @usr_oob:	user-provided OOB buffer
			
 
				++ * @mode:	MTD mode (see "MTD operation modes")
			
 
				++ * @padding:	reserved, must be set to 0
			
 
				++ * @ecc_stats:	ECC statistics for the read operation
			
 
				++ *
			
 
				++ * This structure supports ioctl(MEMREAD) operations, allowing data and/or OOB
			
 
				++ * reads in various modes. To read from OOB-only, set @usr_data == NULL, and to
			
 
				++ * read data-only, set @usr_oob == NULL. However, setting both @usr_data and
			
 
				++ * @usr_oob to NULL is not allowed.
			
 
				++ */
			
 
				++struct mtd_read_req {
			
 
				++	__u64 start;
			
 
				++	__u64 len;
			
 
				++	__u64 ooblen;
			
 
				++	__u64 usr_data;
			
 
				++	__u64 usr_oob;
			
 
				++	__u8 mode;
			
 
				++	__u8 padding[7];
			
 
				++	struct mtd_read_req_ecc_stats ecc_stats;
			
 
				++};
			
 
				++
			
 
				+ #define MTD_ABSENT		0
			
 
				+ #define MTD_RAM			1
			
 
				+ #define MTD_ROM			2
			
 
				+@@ -207,6 +254,12 @@ struct otp_info {
			
 
				+ #define MEMWRITE		_IOWR('M', 24, struct mtd_write_req)
			
 
				+ /* Erase a given range of user data (must be in mode %MTD_FILE_MODE_OTP_USER) */
			
 
				+ #define OTPERASE		_IOW('M', 25, struct otp_info)
			
 
				++/*
			
 
				++ * Most generic read interface; can read in-band and/or out-of-band in various
			
 
				++ * modes (see "struct mtd_read_req"). This ioctl is not supported for flashes
			
 
				++ * without OOB, e.g., NOR flash.
			
 
				++ */
			
 
				++#define MEMREAD			_IOWR('M', 26, struct mtd_read_req)
			
 
				+ 
			
 
				+ /*
			
 
				+  * Obsolete legacy interface. Keep it in order not to break userspace
			
 
				+@@ -270,8 +323,9 @@ struct mtd_ecc_stats {
			
 
				+  * Note: %MTD_FILE_MODE_RAW provides the same functionality as %MTD_OPS_RAW -
			
 
				+  * raw access to the flash, without error correction or autoplacement schemes.
			
 
				+  * Wherever possible, the MTD_OPS_* mode will override the MTD_FILE_MODE_* mode
			
 
				+- * (e.g., when using ioctl(MEMWRITE)), but in some cases, the MTD_FILE_MODE is
			
 
				+- * used out of necessity (e.g., `write()', ioctl(MEMWRITEOOB64)).
			
 
				++ * (e.g., when using ioctl(MEMWRITE) or ioctl(MEMREAD)), but in some cases, the
			
 
				++ * MTD_FILE_MODE is used out of necessity (e.g., `write()',
			
 
				++ * ioctl(MEMWRITEOOB64)).
			
 
				+  */
			
 
				+ enum mtd_file_modes {
			
 
				+ 	MTD_FILE_MODE_NORMAL = MTD_OTP_OFF,
			
--- a/target/linux/generic/backport-6.1/423-v6.3-mtd-spinand-macronix-use-scratch-buffer-for-DMA-oper.patch
+++ b/target/linux/generic/backport-6.1/423-v6.3-mtd-spinand-macronix-use-scratch-buffer-for-DMA-oper.patch
@@ -0,0 +1,35 @@
 
				+From ebed787a0becb9354f0a23620a5130cccd6c730c Mon Sep 17 00:00:00 2001
			
 
				+From: Daniel Golle <[email protected]>
			
 
				+Date: Thu, 19 Jan 2023 03:45:43 +0000
			
 
				+Subject: [PATCH] mtd: spinand: macronix: use scratch buffer for DMA operation
			
 
				+
			
 
				+The mx35lf1ge4ab_get_eccsr() function uses an SPI DMA operation to
			
 
				+read the eccsr, hence the buffer should not be on stack. Since commit
			
 
				+380583227c0c7f ("spi: spi-mem: Add extra sanity checks on the op param")
			
 
				+the kernel emmits a warning and blocks such operations.
			
 
				+
			
 
				+Use the scratch buffer to get eccsr instead of trying to directly read
			
 
				+into a stack-allocated variable.
			
 
				+
			
 
				+Signed-off-by: Daniel Golle <[email protected]>
			
 
				+Reviewed-by: Dhruva Gole <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/nand/spi/macronix.c | 3 ++-
			
 
				+ 1 file changed, 2 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/drivers/mtd/nand/spi/macronix.c
			
 
				++++ b/drivers/mtd/nand/spi/macronix.c
			
 
				+@@ -83,9 +83,10 @@ static int mx35lf1ge4ab_ecc_get_status(s
			
 
				+ 		 * in order to avoid forcing the wear-leveling layer to move
			
 
				+ 		 * data around if it's not necessary.
			
 
				+ 		 */
			
 
				+-		if (mx35lf1ge4ab_get_eccsr(spinand, &eccsr))
			
 
				++		if (mx35lf1ge4ab_get_eccsr(spinand, spinand->scratchbuf))
			
 
				+ 			return nanddev_get_ecc_conf(nand)->strength;
			
 
				+ 
			
 
				++		eccsr = *spinand->scratchbuf;
			
 
				+ 		if (WARN_ON(eccsr > nanddev_get_ecc_conf(nand)->strength ||
			
 
				+ 			    !eccsr))
			
 
				+ 			return nanddev_get_ecc_conf(nand)->strength;
			
--- a/target/linux/generic/backport-6.1/424-v6.4-0004-mtd-core-prepare-mtd_otp_nvmem_add-to-handle-EPROBE_.patch
+++ b/target/linux/generic/backport-6.1/424-v6.4-0004-mtd-core-prepare-mtd_otp_nvmem_add-to-handle-EPROBE_.patch
@@ -0,0 +1,47 @@
 
				+From 281f7a6c1a33fffcde32001bacbb4f672140fbf9 Mon Sep 17 00:00:00 2001
			
 
				+From: Michael Walle <[email protected]>
			
 
				+Date: Wed, 8 Mar 2023 09:20:21 +0100
			
 
				+Subject: [PATCH] mtd: core: prepare mtd_otp_nvmem_add() to handle
			
 
				+ -EPROBE_DEFER
			
 
				+
			
 
				+NVMEM soon will get the ability for nvmem layouts and these might
			
 
				+not be ready when nvmem_register() is called and thus it might
			
 
				+return -EPROBE_DEFER. Don't print the error message in this case.
			
 
				+
			
 
				+Signed-off-by: Michael Walle <[email protected]>
			
 
				+Signed-off-by: Miquel Raynal <[email protected]>
			
 
				+Link: https://lore.kernel.org/linux-mtd/[email protected]
			
 
				+---
			
 
				+ drivers/mtd/mtdcore.c | 7 +++----
			
 
				+ 1 file changed, 3 insertions(+), 4 deletions(-)
			
 
				+
			
 
				+--- a/drivers/mtd/mtdcore.c
			
 
				++++ b/drivers/mtd/mtdcore.c
			
 
				+@@ -960,8 +960,8 @@ static int mtd_otp_nvmem_add(struct mtd_
			
 
				+ 			nvmem = mtd_otp_nvmem_register(mtd, "user-otp", size,
			
 
				+ 						       mtd_nvmem_user_otp_reg_read);
			
 
				+ 			if (IS_ERR(nvmem)) {
			
 
				+-				dev_err(dev, "Failed to register OTP NVMEM device\n");
			
 
				+-				return PTR_ERR(nvmem);
			
 
				++				err = PTR_ERR(nvmem);
			
 
				++				goto err;
			
 
				+ 			}
			
 
				+ 			mtd->otp_user_nvmem = nvmem;
			
 
				+ 		}
			
 
				+@@ -978,7 +978,6 @@ static int mtd_otp_nvmem_add(struct mtd_
			
 
				+ 			nvmem = mtd_otp_nvmem_register(mtd, "factory-otp", size,
			
 
				+ 						       mtd_nvmem_fact_otp_reg_read);
			
 
				+ 			if (IS_ERR(nvmem)) {
			
 
				+-				dev_err(dev, "Failed to register OTP NVMEM device\n");
			
 
				+ 				err = PTR_ERR(nvmem);
			
 
				+ 				goto err;
			
 
				+ 			}
			
 
				+@@ -991,7 +990,7 @@ static int mtd_otp_nvmem_add(struct mtd_
			
 
				+ err:
			
 
				+ 	if (mtd->otp_user_nvmem)
			
 
				+ 		nvmem_unregister(mtd->otp_user_nvmem);
			
 
				+-	return err;
			
 
				++	return dev_err_probe(dev, err, "Failed to register OTP NVMEM device\n");
			
 
				+ }
			
 
				+ 
			
 
				+ /**
			
--- a/target/linux/generic/backport-6.1/600-v5.18-page_pool-Add-allocation-stats.patch
+++ b/target/linux/generic/backport-6.1/600-v5.18-page_pool-Add-allocation-stats.patch
@@ -0,0 +1,165 @@
 
				+From 8610037e8106b48c79cfe0afb92b2b2466e51c3d Mon Sep 17 00:00:00 2001
			
 
				+From: Joe Damato <[email protected]>
			
 
				+Date: Tue, 1 Mar 2022 23:55:47 -0800
			
 
				+Subject: [PATCH] page_pool: Add allocation stats
			
 
				+
			
 
				+Add per-pool statistics counters for the allocation path of a page pool.
			
 
				+These stats are incremented in softirq context, so no locking or per-cpu
			
 
				+variables are needed.
			
 
				+
			
 
				+This code is disabled by default and a kernel config option is provided for
			
 
				+users who wish to enable them.
			
 
				+
			
 
				+The statistics added are:
			
 
				+	- fast: successful fast path allocations
			
 
				+	- slow: slow path order-0 allocations
			
 
				+	- slow_high_order: slow path high order allocations
			
 
				+	- empty: ptr ring is empty, so a slow path allocation was forced.
			
 
				+	- refill: an allocation which triggered a refill of the cache
			
 
				+	- waive: pages obtained from the ptr ring that cannot be added to
			
 
				+	  the cache due to a NUMA mismatch.
			
 
				+
			
 
				+Signed-off-by: Joe Damato <[email protected]>
			
 
				+Acked-by: Jesper Dangaard Brouer <[email protected]>
			
 
				+Reviewed-by: Ilias Apalodimas <[email protected]>
			
 
				+Signed-off-by: David S. Miller <[email protected]>
			
 
				+---
			
 
				+ include/net/page_pool.h | 18 ++++++++++++++++++
			
 
				+ net/Kconfig             | 13 +++++++++++++
			
 
				+ net/core/page_pool.c    | 24 ++++++++++++++++++++----
			
 
				+ 3 files changed, 51 insertions(+), 4 deletions(-)
			
 
				+
			
 
				+--- a/include/net/page_pool.h
			
 
				++++ b/include/net/page_pool.h
			
 
				+@@ -82,6 +82,19 @@ struct page_pool_params {
			
 
				+ 	unsigned int	offset;  /* DMA addr offset */
			
 
				+ };
			
 
				+ 
			
 
				++#ifdef CONFIG_PAGE_POOL_STATS
			
 
				++struct page_pool_alloc_stats {
			
 
				++	u64 fast; /* fast path allocations */
			
 
				++	u64 slow; /* slow-path order 0 allocations */
			
 
				++	u64 slow_high_order; /* slow-path high order allocations */
			
 
				++	u64 empty; /* failed refills due to empty ptr ring, forcing
			
 
				++		    * slow path allocation
			
 
				++		    */
			
 
				++	u64 refill; /* allocations via successful refill */
			
 
				++	u64 waive;  /* failed refills due to numa zone mismatch */
			
 
				++};
			
 
				++#endif
			
 
				++
			
 
				+ struct page_pool {
			
 
				+ 	struct page_pool_params p;
			
 
				+ 
			
 
				+@@ -132,6 +145,11 @@ struct page_pool {
			
 
				+ 	refcount_t user_cnt;
			
 
				+ 
			
 
				+ 	u64 destroy_cnt;
			
 
				++
			
 
				++#ifdef CONFIG_PAGE_POOL_STATS
			
 
				++	/* these stats are incremented while in softirq context */
			
 
				++	struct page_pool_alloc_stats alloc_stats;
			
 
				++#endif
			
 
				+ };
			
 
				+ 
			
 
				+ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
			
 
				+--- a/net/Kconfig
			
 
				++++ b/net/Kconfig
			
 
				+@@ -434,6 +434,19 @@ config NET_DEVLINK
			
 
				+ config PAGE_POOL
			
 
				+ 	bool
			
 
				+ 
			
 
				++config PAGE_POOL_STATS
			
 
				++	default n
			
 
				++	bool "Page pool stats"
			
 
				++	depends on PAGE_POOL
			
 
				++	help
			
 
				++	  Enable page pool statistics to track page allocation and recycling
			
 
				++	  in page pools. This option incurs additional CPU cost in allocation
			
 
				++	  and recycle paths and additional memory cost to store the statistics.
			
 
				++	  These statistics are only available if this option is enabled and if
			
 
				++	  the driver using the page pool supports exporting this data.
			
 
				++
			
 
				++	  If unsure, say N.
			
 
				++
			
 
				+ config FAILOVER
			
 
				+ 	tristate "Generic failover module"
			
 
				+ 	help
			
 
				+--- a/net/core/page_pool.c
			
 
				++++ b/net/core/page_pool.c
			
 
				+@@ -26,6 +26,13 @@
			
 
				+ 
			
 
				+ #define BIAS_MAX	LONG_MAX
			
 
				+ 
			
 
				++#ifdef CONFIG_PAGE_POOL_STATS
			
 
				++/* alloc_stat_inc is intended to be used in softirq context */
			
 
				++#define alloc_stat_inc(pool, __stat)	(pool->alloc_stats.__stat++)
			
 
				++#else
			
 
				++#define alloc_stat_inc(pool, __stat)
			
 
				++#endif
			
 
				++
			
 
				+ static int page_pool_init(struct page_pool *pool,
			
 
				+ 			  const struct page_pool_params *params)
			
 
				+ {
			
 
				+@@ -117,8 +124,10 @@ static struct page *page_pool_refill_all
			
 
				+ 	int pref_nid; /* preferred NUMA node */
			
 
				+ 
			
 
				+ 	/* Quicker fallback, avoid locks when ring is empty */
			
 
				+-	if (__ptr_ring_empty(r))
			
 
				++	if (__ptr_ring_empty(r)) {
			
 
				++		alloc_stat_inc(pool, empty);
			
 
				+ 		return NULL;
			
 
				++	}
			
 
				+ 
			
 
				+ 	/* Softirq guarantee CPU and thus NUMA node is stable. This,
			
 
				+ 	 * assumes CPU refilling driver RX-ring will also run RX-NAPI.
			
 
				+@@ -148,14 +157,17 @@ static struct page *page_pool_refill_all
			
 
				+ 			 * This limit stress on page buddy alloactor.
			
 
				+ 			 */
			
 
				+ 			page_pool_return_page(pool, page);
			
 
				++			alloc_stat_inc(pool, waive);
			
 
				+ 			page = NULL;
			
 
				+ 			break;
			
 
				+ 		}
			
 
				+ 	} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
			
 
				+ 
			
 
				+ 	/* Return last page */
			
 
				+-	if (likely(pool->alloc.count > 0))
			
 
				++	if (likely(pool->alloc.count > 0)) {
			
 
				+ 		page = pool->alloc.cache[--pool->alloc.count];
			
 
				++		alloc_stat_inc(pool, refill);
			
 
				++	}
			
 
				+ 
			
 
				+ 	spin_unlock(&r->consumer_lock);
			
 
				+ 	return page;
			
 
				+@@ -170,6 +182,7 @@ static struct page *__page_pool_get_cach
			
 
				+ 	if (likely(pool->alloc.count)) {
			
 
				+ 		/* Fast-path */
			
 
				+ 		page = pool->alloc.cache[--pool->alloc.count];
			
 
				++		alloc_stat_inc(pool, fast);
			
 
				+ 	} else {
			
 
				+ 		page = page_pool_refill_alloc_cache(pool);
			
 
				+ 	}
			
 
				+@@ -241,6 +254,7 @@ static struct page *__page_pool_alloc_pa
			
 
				+ 		return NULL;
			
 
				+ 	}
			
 
				+ 
			
 
				++	alloc_stat_inc(pool, slow_high_order);
			
 
				+ 	page_pool_set_pp_info(pool, page);
			
 
				+ 
			
 
				+ 	/* Track how many pages are held 'in-flight' */
			
 
				+@@ -295,10 +309,12 @@ static struct page *__page_pool_alloc_pa
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	/* Return last page */
			
 
				+-	if (likely(pool->alloc.count > 0))
			
 
				++	if (likely(pool->alloc.count > 0)) {
			
 
				+ 		page = pool->alloc.cache[--pool->alloc.count];
			
 
				+-	else
			
 
				++		alloc_stat_inc(pool, slow);
			
 
				++	} else {
			
 
				+ 		page = NULL;
			
 
				++	}
			
 
				+ 
			
 
				+ 	/* When page just alloc'ed is should/must have refcnt 1. */
			
 
				+ 	return page;
			
--- a/target/linux/generic/backport-6.1/601-v5.18-page_pool-Add-recycle-stats.patch
+++ b/target/linux/generic/backport-6.1/601-v5.18-page_pool-Add-recycle-stats.patch
@@ -0,0 +1,140 @@
 
				+From ad6fa1e1ab1b8164f1ba296b1b4dc556a483bcad Mon Sep 17 00:00:00 2001
			
 
				+From: Joe Damato <[email protected]>
			
 
				+Date: Tue, 1 Mar 2022 23:55:48 -0800
			
 
				+Subject: [PATCH 2/3] page_pool: Add recycle stats
			
 
				+
			
 
				+Add per-cpu stats tracking page pool recycling events:
			
 
				+	- cached: recycling placed page in the page pool cache
			
 
				+	- cache_full: page pool cache was full
			
 
				+	- ring: page placed into the ptr ring
			
 
				+	- ring_full: page released from page pool because the ptr ring was full
			
 
				+	- released_refcnt: page released (and not recycled) because refcnt > 1
			
 
				+
			
 
				+Signed-off-by: Joe Damato <[email protected]>
			
 
				+Acked-by: Jesper Dangaard Brouer <[email protected]>
			
 
				+Reviewed-by: Ilias Apalodimas <[email protected]>
			
 
				+Signed-off-by: David S. Miller <[email protected]>
			
 
				+---
			
 
				+ include/net/page_pool.h | 16 ++++++++++++++++
			
 
				+ net/core/page_pool.c    | 30 ++++++++++++++++++++++++++++--
			
 
				+ 2 files changed, 44 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+--- a/include/net/page_pool.h
			
 
				++++ b/include/net/page_pool.h
			
 
				+@@ -93,6 +93,18 @@ struct page_pool_alloc_stats {
			
 
				+ 	u64 refill; /* allocations via successful refill */
			
 
				+ 	u64 waive;  /* failed refills due to numa zone mismatch */
			
 
				+ };
			
 
				++
			
 
				++struct page_pool_recycle_stats {
			
 
				++	u64 cached;	/* recycling placed page in the cache. */
			
 
				++	u64 cache_full; /* cache was full */
			
 
				++	u64 ring;	/* recycling placed page back into ptr ring */
			
 
				++	u64 ring_full;	/* page was released from page-pool because
			
 
				++			 * PTR ring was full.
			
 
				++			 */
			
 
				++	u64 released_refcnt; /* page released because of elevated
			
 
				++			      * refcnt
			
 
				++			      */
			
 
				++};
			
 
				+ #endif
			
 
				+ 
			
 
				+ struct page_pool {
			
 
				+@@ -136,6 +148,10 @@ struct page_pool {
			
 
				+ 	 */
			
 
				+ 	struct ptr_ring ring;
			
 
				+ 
			
 
				++#ifdef CONFIG_PAGE_POOL_STATS
			
 
				++	/* recycle stats are per-cpu to avoid locking */
			
 
				++	struct page_pool_recycle_stats __percpu *recycle_stats;
			
 
				++#endif
			
 
				+ 	atomic_t pages_state_release_cnt;
			
 
				+ 
			
 
				+ 	/* A page_pool is strictly tied to a single RX-queue being
			
 
				+--- a/net/core/page_pool.c
			
 
				++++ b/net/core/page_pool.c
			
 
				+@@ -29,8 +29,15 @@
			
 
				+ #ifdef CONFIG_PAGE_POOL_STATS
			
 
				+ /* alloc_stat_inc is intended to be used in softirq context */
			
 
				+ #define alloc_stat_inc(pool, __stat)	(pool->alloc_stats.__stat++)
			
 
				++/* recycle_stat_inc is safe to use when preemption is possible. */
			
 
				++#define recycle_stat_inc(pool, __stat)							\
			
 
				++	do {										\
			
 
				++		struct page_pool_recycle_stats __percpu *s = pool->recycle_stats;	\
			
 
				++		this_cpu_inc(s->__stat);						\
			
 
				++	} while (0)
			
 
				+ #else
			
 
				+ #define alloc_stat_inc(pool, __stat)
			
 
				++#define recycle_stat_inc(pool, __stat)
			
 
				+ #endif
			
 
				+ 
			
 
				+ static int page_pool_init(struct page_pool *pool,
			
 
				+@@ -80,6 +87,12 @@ static int page_pool_init(struct page_po
			
 
				+ 	    pool->p.flags & PP_FLAG_PAGE_FRAG)
			
 
				+ 		return -EINVAL;
			
 
				+ 
			
 
				++#ifdef CONFIG_PAGE_POOL_STATS
			
 
				++	pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
			
 
				++	if (!pool->recycle_stats)
			
 
				++		return -ENOMEM;
			
 
				++#endif
			
 
				++
			
 
				+ 	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
			
 
				+ 		return -ENOMEM;
			
 
				+ 
			
 
				+@@ -412,7 +425,12 @@ static bool page_pool_recycle_in_ring(st
			
 
				+ 	else
			
 
				+ 		ret = ptr_ring_produce_bh(&pool->ring, page);
			
 
				+ 
			
 
				+-	return (ret == 0) ? true : false;
			
 
				++	if (!ret) {
			
 
				++		recycle_stat_inc(pool, ring);
			
 
				++		return true;
			
 
				++	}
			
 
				++
			
 
				++	return false;
			
 
				+ }
			
 
				+ 
			
 
				+ /* Only allow direct recycling in special circumstances, into the
			
 
				+@@ -423,11 +441,14 @@ static bool page_pool_recycle_in_ring(st
			
 
				+ static bool page_pool_recycle_in_cache(struct page *page,
			
 
				+ 				       struct page_pool *pool)
			
 
				+ {
			
 
				+-	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
			
 
				++	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
			
 
				++		recycle_stat_inc(pool, cache_full);
			
 
				+ 		return false;
			
 
				++	}
			
 
				+ 
			
 
				+ 	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
			
 
				+ 	pool->alloc.cache[pool->alloc.count++] = page;
			
 
				++	recycle_stat_inc(pool, cached);
			
 
				+ 	return true;
			
 
				+ }
			
 
				+ 
			
 
				+@@ -482,6 +503,7 @@ __page_pool_put_page(struct page_pool *p
			
 
				+ 	 * doing refcnt based recycle tricks, meaning another process
			
 
				+ 	 * will be invoking put_page.
			
 
				+ 	 */
			
 
				++	recycle_stat_inc(pool, released_refcnt);
			
 
				+ 	/* Do not replace this with page_pool_return_page() */
			
 
				+ 	page_pool_release_page(pool, page);
			
 
				+ 	put_page(page);
			
 
				+@@ -495,6 +517,7 @@ void page_pool_put_page(struct page_pool
			
 
				+ 	page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
			
 
				+ 	if (page && !page_pool_recycle_in_ring(pool, page)) {
			
 
				+ 		/* Cache full, fallback to free pages */
			
 
				++		recycle_stat_inc(pool, ring_full);
			
 
				+ 		page_pool_return_page(pool, page);
			
 
				+ 	}
			
 
				+ }
			
 
				+@@ -641,6 +664,9 @@ static void page_pool_free(struct page_p
			
 
				+ 	if (pool->p.flags & PP_FLAG_DMA_MAP)
			
 
				+ 		put_device(pool->p.dev);
			
 
				+ 
			
 
				++#ifdef CONFIG_PAGE_POOL_STATS
			
 
				++	free_percpu(pool->recycle_stats);
			
 
				++#endif
			
 
				+ 	kfree(pool);
			
 
				+ }
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/602-v5.18-page_pool-Add-function-to-batch-and-return-stats.patch
+++ b/target/linux/generic/backport-6.1/602-v5.18-page_pool-Add-function-to-batch-and-return-stats.patch
@@ -0,0 +1,77 @@
 
				+From 6b95e3388b1ea0ca63500c5a6e39162dbf828433 Mon Sep 17 00:00:00 2001
			
 
				+From: Joe Damato <[email protected]>
			
 
				+Date: Tue, 1 Mar 2022 23:55:49 -0800
			
 
				+Subject: [PATCH 3/3] page_pool: Add function to batch and return stats
			
 
				+
			
 
				+Adds a function page_pool_get_stats which can be used by drivers to obtain
			
 
				+stats for a specified page_pool.
			
 
				+
			
 
				+Signed-off-by: Joe Damato <[email protected]>
			
 
				+Acked-by: Jesper Dangaard Brouer <[email protected]>
			
 
				+Reviewed-by: Ilias Apalodimas <[email protected]>
			
 
				+Signed-off-by: David S. Miller <[email protected]>
			
 
				+---
			
 
				+ include/net/page_pool.h | 17 +++++++++++++++++
			
 
				+ net/core/page_pool.c    | 25 +++++++++++++++++++++++++
			
 
				+ 2 files changed, 42 insertions(+)
			
 
				+
			
 
				+--- a/include/net/page_pool.h
			
 
				++++ b/include/net/page_pool.h
			
 
				+@@ -105,6 +105,23 @@ struct page_pool_recycle_stats {
			
 
				+ 			      * refcnt
			
 
				+ 			      */
			
 
				+ };
			
 
				++
			
 
				++/* This struct wraps the above stats structs so users of the
			
 
				++ * page_pool_get_stats API can pass a single argument when requesting the
			
 
				++ * stats for the page pool.
			
 
				++ */
			
 
				++struct page_pool_stats {
			
 
				++	struct page_pool_alloc_stats alloc_stats;
			
 
				++	struct page_pool_recycle_stats recycle_stats;
			
 
				++};
			
 
				++
			
 
				++/*
			
 
				++ * Drivers that wish to harvest page pool stats and report them to users
			
 
				++ * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
			
 
				++ * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
			
 
				++ */
			
 
				++bool page_pool_get_stats(struct page_pool *pool,
			
 
				++			 struct page_pool_stats *stats);
			
 
				+ #endif
			
 
				+ 
			
 
				+ struct page_pool {
			
 
				+--- a/net/core/page_pool.c
			
 
				++++ b/net/core/page_pool.c
			
 
				+@@ -35,6 +35,31 @@
			
 
				+ 		struct page_pool_recycle_stats __percpu *s = pool->recycle_stats;	\
			
 
				+ 		this_cpu_inc(s->__stat);						\
			
 
				+ 	} while (0)
			
 
				++
			
 
				++bool page_pool_get_stats(struct page_pool *pool,
			
 
				++			 struct page_pool_stats *stats)
			
 
				++{
			
 
				++	int cpu = 0;
			
 
				++
			
 
				++	if (!stats)
			
 
				++		return false;
			
 
				++
			
 
				++	memcpy(&stats->alloc_stats, &pool->alloc_stats, sizeof(pool->alloc_stats));
			
 
				++
			
 
				++	for_each_possible_cpu(cpu) {
			
 
				++		const struct page_pool_recycle_stats *pcpu =
			
 
				++			per_cpu_ptr(pool->recycle_stats, cpu);
			
 
				++
			
 
				++		stats->recycle_stats.cached += pcpu->cached;
			
 
				++		stats->recycle_stats.cache_full += pcpu->cache_full;
			
 
				++		stats->recycle_stats.ring += pcpu->ring;
			
 
				++		stats->recycle_stats.ring_full += pcpu->ring_full;
			
 
				++		stats->recycle_stats.released_refcnt += pcpu->released_refcnt;
			
 
				++	}
			
 
				++
			
 
				++	return true;
			
 
				++}
			
 
				++EXPORT_SYMBOL(page_pool_get_stats);
			
 
				+ #else
			
 
				+ #define alloc_stat_inc(pool, __stat)
			
 
				+ #define recycle_stat_inc(pool, __stat)
			
--- a/target/linux/generic/backport-6.1/603-v5.19-page_pool-Add-recycle-stats-to-page_pool_put_page_bu.patch
+++ b/target/linux/generic/backport-6.1/603-v5.19-page_pool-Add-recycle-stats-to-page_pool_put_page_bu.patch
@@ -0,0 +1,55 @@
 
				+From 590032a4d2133ecc10d3078a8db1d85a4842f12c Mon Sep 17 00:00:00 2001
			
 
				+From: Lorenzo Bianconi <[email protected]>
			
 
				+Date: Mon, 11 Apr 2022 16:05:26 +0200
			
 
				+Subject: [PATCH] page_pool: Add recycle stats to page_pool_put_page_bulk
			
 
				+
			
 
				+Add missing recycle stats to page_pool_put_page_bulk routine.
			
 
				+
			
 
				+Reviewed-by: Joe Damato <[email protected]>
			
 
				+Signed-off-by: Lorenzo Bianconi <[email protected]>
			
 
				+Reviewed-by: Ilias Apalodimas <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/3712178b51c007cfaed910ea80e68f00c916b1fa.1649685634.git.lorenzo@kernel.org
			
 
				+Signed-off-by: Paolo Abeni <[email protected]>
			
 
				+---
			
 
				+ net/core/page_pool.c | 15 +++++++++++++--
			
 
				+ 1 file changed, 13 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+--- a/net/core/page_pool.c
			
 
				++++ b/net/core/page_pool.c
			
 
				+@@ -36,6 +36,12 @@
			
 
				+ 		this_cpu_inc(s->__stat);						\
			
 
				+ 	} while (0)
			
 
				+ 
			
 
				++#define recycle_stat_add(pool, __stat, val)						\
			
 
				++	do {										\
			
 
				++		struct page_pool_recycle_stats __percpu *s = pool->recycle_stats;	\
			
 
				++		this_cpu_add(s->__stat, val);						\
			
 
				++	} while (0)
			
 
				++
			
 
				+ bool page_pool_get_stats(struct page_pool *pool,
			
 
				+ 			 struct page_pool_stats *stats)
			
 
				+ {
			
 
				+@@ -63,6 +69,7 @@ EXPORT_SYMBOL(page_pool_get_stats);
			
 
				+ #else
			
 
				+ #define alloc_stat_inc(pool, __stat)
			
 
				+ #define recycle_stat_inc(pool, __stat)
			
 
				++#define recycle_stat_add(pool, __stat, val)
			
 
				+ #endif
			
 
				+ 
			
 
				+ static int page_pool_init(struct page_pool *pool,
			
 
				+@@ -569,9 +576,13 @@ void page_pool_put_page_bulk(struct page
			
 
				+ 	/* Bulk producer into ptr_ring page_pool cache */
			
 
				+ 	page_pool_ring_lock(pool);
			
 
				+ 	for (i = 0; i < bulk_len; i++) {
			
 
				+-		if (__ptr_ring_produce(&pool->ring, data[i]))
			
 
				+-			break; /* ring full */
			
 
				++		if (__ptr_ring_produce(&pool->ring, data[i])) {
			
 
				++			/* ring full */
			
 
				++			recycle_stat_inc(pool, ring_full);
			
 
				++			break;
			
 
				++		}
			
 
				+ 	}
			
 
				++	recycle_stat_add(pool, ring, i);
			
 
				+ 	page_pool_ring_unlock(pool);
			
 
				+ 
			
 
				+ 	/* Hopefully all pages was return into ptr_ring */
			
--- a/target/linux/generic/backport-6.1/604-v5.19-net-page_pool-introduce-ethtool-stats.patch
+++ b/target/linux/generic/backport-6.1/604-v5.19-net-page_pool-introduce-ethtool-stats.patch
@@ -0,0 +1,147 @@
 
				+From f3c5264f452a5b0ac1de1f2f657efbabdea3c76a Mon Sep 17 00:00:00 2001
			
 
				+From: Lorenzo Bianconi <[email protected]>
			
 
				+Date: Tue, 12 Apr 2022 18:31:58 +0200
			
 
				+Subject: [PATCH] net: page_pool: introduce ethtool stats
			
 
				+
			
 
				+Introduce page_pool APIs to report stats through ethtool and reduce
			
 
				+duplicated code in each driver.
			
 
				+
			
 
				+Signed-off-by: Lorenzo Bianconi <[email protected]>
			
 
				+Reviewed-by: Jakub Kicinski <[email protected]>
			
 
				+Reviewed-by: Ilias Apalodimas <[email protected]>
			
 
				+Signed-off-by: David S. Miller <[email protected]>
			
 
				+---
			
 
				+ include/net/page_pool.h | 21 ++++++++++++++
			
 
				+ net/core/page_pool.c    | 63 ++++++++++++++++++++++++++++++++++++++++-
			
 
				+ 2 files changed, 83 insertions(+), 1 deletion(-)
			
 
				+
			
 
				+--- a/include/net/page_pool.h
			
 
				++++ b/include/net/page_pool.h
			
 
				+@@ -115,6 +115,10 @@ struct page_pool_stats {
			
 
				+ 	struct page_pool_recycle_stats recycle_stats;
			
 
				+ };
			
 
				+ 
			
 
				++int page_pool_ethtool_stats_get_count(void);
			
 
				++u8 *page_pool_ethtool_stats_get_strings(u8 *data);
			
 
				++u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
			
 
				++
			
 
				+ /*
			
 
				+  * Drivers that wish to harvest page pool stats and report them to users
			
 
				+  * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
			
 
				+@@ -122,6 +126,23 @@ struct page_pool_stats {
			
 
				+  */
			
 
				+ bool page_pool_get_stats(struct page_pool *pool,
			
 
				+ 			 struct page_pool_stats *stats);
			
 
				++#else
			
 
				++
			
 
				++static inline int page_pool_ethtool_stats_get_count(void)
			
 
				++{
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
			
 
				++{
			
 
				++	return data;
			
 
				++}
			
 
				++
			
 
				++static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
			
 
				++{
			
 
				++	return data;
			
 
				++}
			
 
				++
			
 
				+ #endif
			
 
				+ 
			
 
				+ struct page_pool {
			
 
				+--- a/net/core/page_pool.c
			
 
				++++ b/net/core/page_pool.c
			
 
				+@@ -18,6 +18,7 @@
			
 
				+ #include <linux/page-flags.h>
			
 
				+ #include <linux/mm.h> /* for __put_page() */
			
 
				+ #include <linux/poison.h>
			
 
				++#include <linux/ethtool.h>
			
 
				+ 
			
 
				+ #include <trace/events/page_pool.h>
			
 
				+ 
			
 
				+@@ -42,6 +43,20 @@
			
 
				+ 		this_cpu_add(s->__stat, val);						\
			
 
				+ 	} while (0)
			
 
				+ 
			
 
				++static const char pp_stats[][ETH_GSTRING_LEN] = {
			
 
				++	"rx_pp_alloc_fast",
			
 
				++	"rx_pp_alloc_slow",
			
 
				++	"rx_pp_alloc_slow_ho",
			
 
				++	"rx_pp_alloc_empty",
			
 
				++	"rx_pp_alloc_refill",
			
 
				++	"rx_pp_alloc_waive",
			
 
				++	"rx_pp_recycle_cached",
			
 
				++	"rx_pp_recycle_cache_full",
			
 
				++	"rx_pp_recycle_ring",
			
 
				++	"rx_pp_recycle_ring_full",
			
 
				++	"rx_pp_recycle_released_ref",
			
 
				++};
			
 
				++
			
 
				+ bool page_pool_get_stats(struct page_pool *pool,
			
 
				+ 			 struct page_pool_stats *stats)
			
 
				+ {
			
 
				+@@ -50,7 +65,13 @@ bool page_pool_get_stats(struct page_poo
			
 
				+ 	if (!stats)
			
 
				+ 		return false;
			
 
				+ 
			
 
				+-	memcpy(&stats->alloc_stats, &pool->alloc_stats, sizeof(pool->alloc_stats));
			
 
				++	/* The caller is responsible to initialize stats. */
			
 
				++	stats->alloc_stats.fast += pool->alloc_stats.fast;
			
 
				++	stats->alloc_stats.slow += pool->alloc_stats.slow;
			
 
				++	stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order;
			
 
				++	stats->alloc_stats.empty += pool->alloc_stats.empty;
			
 
				++	stats->alloc_stats.refill += pool->alloc_stats.refill;
			
 
				++	stats->alloc_stats.waive += pool->alloc_stats.waive;
			
 
				+ 
			
 
				+ 	for_each_possible_cpu(cpu) {
			
 
				+ 		const struct page_pool_recycle_stats *pcpu =
			
 
				+@@ -66,6 +87,46 @@ bool page_pool_get_stats(struct page_poo
			
 
				+ 	return true;
			
 
				+ }
			
 
				+ EXPORT_SYMBOL(page_pool_get_stats);
			
 
				++
			
 
				++u8 *page_pool_ethtool_stats_get_strings(u8 *data)
			
 
				++{
			
 
				++	int i;
			
 
				++
			
 
				++	for (i = 0; i < ARRAY_SIZE(pp_stats); i++) {
			
 
				++		memcpy(data, pp_stats[i], ETH_GSTRING_LEN);
			
 
				++		data += ETH_GSTRING_LEN;
			
 
				++	}
			
 
				++
			
 
				++	return data;
			
 
				++}
			
 
				++EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings);
			
 
				++
			
 
				++int page_pool_ethtool_stats_get_count(void)
			
 
				++{
			
 
				++	return ARRAY_SIZE(pp_stats);
			
 
				++}
			
 
				++EXPORT_SYMBOL(page_pool_ethtool_stats_get_count);
			
 
				++
			
 
				++u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
			
 
				++{
			
 
				++	struct page_pool_stats *pool_stats = stats;
			
 
				++
			
 
				++	*data++ = pool_stats->alloc_stats.fast;
			
 
				++	*data++ = pool_stats->alloc_stats.slow;
			
 
				++	*data++ = pool_stats->alloc_stats.slow_high_order;
			
 
				++	*data++ = pool_stats->alloc_stats.empty;
			
 
				++	*data++ = pool_stats->alloc_stats.refill;
			
 
				++	*data++ = pool_stats->alloc_stats.waive;
			
 
				++	*data++ = pool_stats->recycle_stats.cached;
			
 
				++	*data++ = pool_stats->recycle_stats.cache_full;
			
 
				++	*data++ = pool_stats->recycle_stats.ring;
			
 
				++	*data++ = pool_stats->recycle_stats.ring_full;
			
 
				++	*data++ = pool_stats->recycle_stats.released_refcnt;
			
 
				++
			
 
				++	return data;
			
 
				++}
			
 
				++EXPORT_SYMBOL(page_pool_ethtool_stats_get);
			
 
				++
			
 
				+ #else
			
 
				+ #define alloc_stat_inc(pool, __stat)
			
 
				+ #define recycle_stat_inc(pool, __stat)
			
--- a/target/linux/generic/backport-6.1/605-v5.18-xdp-introduce-flags-field-in-xdp_buff-xdp_frame.patch
+++ b/target/linux/generic/backport-6.1/605-v5.18-xdp-introduce-flags-field-in-xdp_buff-xdp_frame.patch
@@ -0,0 +1,99 @@
 
				+From 2e88d4ff03013937028f5397268b21e10cf68713 Mon Sep 17 00:00:00 2001
			
 
				+From: Lorenzo Bianconi <[email protected]>
			
 
				+Date: Fri, 21 Jan 2022 11:09:45 +0100
			
 
				+Subject: [PATCH] xdp: introduce flags field in xdp_buff/xdp_frame
			
 
				+
			
 
				+Introduce flags field in xdp_frame and xdp_buffer data structures
			
 
				+to define additional buffer features. At the moment the only
			
 
				+supported buffer feature is frags bit (XDP_FLAGS_HAS_FRAGS).
			
 
				+frags bit is used to specify if this is a linear buffer
			
 
				+(XDP_FLAGS_HAS_FRAGS not set) or a frags frame (XDP_FLAGS_HAS_FRAGS
			
 
				+set). In the latter case the driver is expected to initialize the
			
 
				+skb_shared_info structure at the end of the first buffer to link together
			
 
				+subsequent buffers belonging to the same frame.
			
 
				+
			
 
				+Acked-by: Toke Hoiland-Jorgensen <[email protected]>
			
 
				+Acked-by: John Fastabend <[email protected]>
			
 
				+Acked-by: Jesper Dangaard Brouer <[email protected]>
			
 
				+Signed-off-by: Lorenzo Bianconi <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/e389f14f3a162c0a5bc6a2e1aa8dd01a90be117d.1642758637.git.lorenzo@kernel.org
			
 
				+Signed-off-by: Alexei Starovoitov <[email protected]>
			
 
				+---
			
 
				+ include/net/xdp.h | 29 +++++++++++++++++++++++++++++
			
 
				+ 1 file changed, 29 insertions(+)
			
 
				+
			
 
				+--- a/include/net/xdp.h
			
 
				++++ b/include/net/xdp.h
			
 
				+@@ -66,6 +66,10 @@ struct xdp_txq_info {
			
 
				+ 	struct net_device *dev;
			
 
				+ };
			
 
				+ 
			
 
				++enum xdp_buff_flags {
			
 
				++	XDP_FLAGS_HAS_FRAGS	= BIT(0), /* non-linear xdp buff */
			
 
				++};
			
 
				++
			
 
				+ struct xdp_buff {
			
 
				+ 	void *data;
			
 
				+ 	void *data_end;
			
 
				+@@ -74,13 +78,30 @@ struct xdp_buff {
			
 
				+ 	struct xdp_rxq_info *rxq;
			
 
				+ 	struct xdp_txq_info *txq;
			
 
				+ 	u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
			
 
				++	u32 flags; /* supported values defined in xdp_buff_flags */
			
 
				+ };
			
 
				+ 
			
 
				++static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
			
 
				++{
			
 
				++	return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
			
 
				++}
			
 
				++
			
 
				++static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp)
			
 
				++{
			
 
				++	xdp->flags |= XDP_FLAGS_HAS_FRAGS;
			
 
				++}
			
 
				++
			
 
				++static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp)
			
 
				++{
			
 
				++	xdp->flags &= ~XDP_FLAGS_HAS_FRAGS;
			
 
				++}
			
 
				++
			
 
				+ static __always_inline void
			
 
				+ xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
			
 
				+ {
			
 
				+ 	xdp->frame_sz = frame_sz;
			
 
				+ 	xdp->rxq = rxq;
			
 
				++	xdp->flags = 0;
			
 
				+ }
			
 
				+ 
			
 
				+ static __always_inline void
			
 
				+@@ -122,8 +143,14 @@ struct xdp_frame {
			
 
				+ 	 */
			
 
				+ 	struct xdp_mem_info mem;
			
 
				+ 	struct net_device *dev_rx; /* used by cpumap */
			
 
				++	u32 flags; /* supported values defined in xdp_buff_flags */
			
 
				+ };
			
 
				+ 
			
 
				++static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
			
 
				++{
			
 
				++	return !!(frame->flags & XDP_FLAGS_HAS_FRAGS);
			
 
				++}
			
 
				++
			
 
				+ #define XDP_BULK_QUEUE_SIZE	16
			
 
				+ struct xdp_frame_bulk {
			
 
				+ 	int count;
			
 
				+@@ -180,6 +207,7 @@ void xdp_convert_frame_to_buff(struct xd
			
 
				+ 	xdp->data_end = frame->data + frame->len;
			
 
				+ 	xdp->data_meta = frame->data - frame->metasize;
			
 
				+ 	xdp->frame_sz = frame->frame_sz;
			
 
				++	xdp->flags = frame->flags;
			
 
				+ }
			
 
				+ 
			
 
				+ static inline
			
 
				+@@ -206,6 +234,7 @@ int xdp_update_frame_from_buff(struct xd
			
 
				+ 	xdp_frame->headroom = headroom - sizeof(*xdp_frame);
			
 
				+ 	xdp_frame->metasize = metasize;
			
 
				+ 	xdp_frame->frame_sz = xdp->frame_sz;
			
 
				++	xdp_frame->flags = xdp->flags;
			
 
				+ 
			
 
				+ 	return 0;
			
 
				+ }
			
--- a/target/linux/generic/backport-6.1/606-v5.18-xdp-add-frags-support-to-xdp_return_-buff-frame.patch
+++ b/target/linux/generic/backport-6.1/606-v5.18-xdp-add-frags-support-to-xdp_return_-buff-frame.patch
@@ -0,0 +1,137 @@
 
				+From 7c48cb0176c6d6d3b55029f7ff4ffa05faee6446 Mon Sep 17 00:00:00 2001
			
 
				+From: Lorenzo Bianconi <[email protected]>
			
 
				+Date: Fri, 21 Jan 2022 11:09:50 +0100
			
 
				+Subject: [PATCH] xdp: add frags support to xdp_return_{buff/frame}
			
 
				+
			
 
				+Take into account if the received xdp_buff/xdp_frame is non-linear
			
 
				+recycling/returning the frame memory to the allocator or into
			
 
				+xdp_frame_bulk.
			
 
				+
			
 
				+Acked-by: Toke Hoiland-Jorgensen <[email protected]>
			
 
				+Acked-by: John Fastabend <[email protected]>
			
 
				+Signed-off-by: Lorenzo Bianconi <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/a961069febc868508ce1bdf5e53a343eb4e57cb2.1642758637.git.lorenzo@kernel.org
			
 
				+Signed-off-by: Alexei Starovoitov <[email protected]>
			
 
				+---
			
 
				+ include/net/xdp.h | 18 ++++++++++++++--
			
 
				+ net/core/xdp.c    | 54 ++++++++++++++++++++++++++++++++++++++++++++++-
			
 
				+ 2 files changed, 69 insertions(+), 3 deletions(-)
			
 
				+
			
 
				+--- a/include/net/xdp.h
			
 
				++++ b/include/net/xdp.h
			
 
				+@@ -275,10 +275,24 @@ void __xdp_release_frame(void *data, str
			
 
				+ static inline void xdp_release_frame(struct xdp_frame *xdpf)
			
 
				+ {
			
 
				+ 	struct xdp_mem_info *mem = &xdpf->mem;
			
 
				++	struct skb_shared_info *sinfo;
			
 
				++	int i;
			
 
				+ 
			
 
				+ 	/* Curr only page_pool needs this */
			
 
				+-	if (mem->type == MEM_TYPE_PAGE_POOL)
			
 
				+-		__xdp_release_frame(xdpf->data, mem);
			
 
				++	if (mem->type != MEM_TYPE_PAGE_POOL)
			
 
				++		return;
			
 
				++
			
 
				++	if (likely(!xdp_frame_has_frags(xdpf)))
			
 
				++		goto out;
			
 
				++
			
 
				++	sinfo = xdp_get_shared_info_from_frame(xdpf);
			
 
				++	for (i = 0; i < sinfo->nr_frags; i++) {
			
 
				++		struct page *page = skb_frag_page(&sinfo->frags[i]);
			
 
				++
			
 
				++		__xdp_release_frame(page_address(page), mem);
			
 
				++	}
			
 
				++out:
			
 
				++	__xdp_release_frame(xdpf->data, mem);
			
 
				+ }
			
 
				+ 
			
 
				+ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
			
 
				+--- a/net/core/xdp.c
			
 
				++++ b/net/core/xdp.c
			
 
				+@@ -376,12 +376,38 @@ static void __xdp_return(void *data, str
			
 
				+ 
			
 
				+ void xdp_return_frame(struct xdp_frame *xdpf)
			
 
				+ {
			
 
				++	struct skb_shared_info *sinfo;
			
 
				++	int i;
			
 
				++
			
 
				++	if (likely(!xdp_frame_has_frags(xdpf)))
			
 
				++		goto out;
			
 
				++
			
 
				++	sinfo = xdp_get_shared_info_from_frame(xdpf);
			
 
				++	for (i = 0; i < sinfo->nr_frags; i++) {
			
 
				++		struct page *page = skb_frag_page(&sinfo->frags[i]);
			
 
				++
			
 
				++		__xdp_return(page_address(page), &xdpf->mem, false, NULL);
			
 
				++	}
			
 
				++out:
			
 
				+ 	__xdp_return(xdpf->data, &xdpf->mem, false, NULL);
			
 
				+ }
			
 
				+ EXPORT_SYMBOL_GPL(xdp_return_frame);
			
 
				+ 
			
 
				+ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
			
 
				+ {
			
 
				++	struct skb_shared_info *sinfo;
			
 
				++	int i;
			
 
				++
			
 
				++	if (likely(!xdp_frame_has_frags(xdpf)))
			
 
				++		goto out;
			
 
				++
			
 
				++	sinfo = xdp_get_shared_info_from_frame(xdpf);
			
 
				++	for (i = 0; i < sinfo->nr_frags; i++) {
			
 
				++		struct page *page = skb_frag_page(&sinfo->frags[i]);
			
 
				++
			
 
				++		__xdp_return(page_address(page), &xdpf->mem, true, NULL);
			
 
				++	}
			
 
				++out:
			
 
				+ 	__xdp_return(xdpf->data, &xdpf->mem, true, NULL);
			
 
				+ }
			
 
				+ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
			
 
				+@@ -417,7 +443,7 @@ void xdp_return_frame_bulk(struct xdp_fr
			
 
				+ 	struct xdp_mem_allocator *xa;
			
 
				+ 
			
 
				+ 	if (mem->type != MEM_TYPE_PAGE_POOL) {
			
 
				+-		__xdp_return(xdpf->data, &xdpf->mem, false, NULL);
			
 
				++		xdp_return_frame(xdpf);
			
 
				+ 		return;
			
 
				+ 	}
			
 
				+ 
			
 
				+@@ -436,12 +462,38 @@ void xdp_return_frame_bulk(struct xdp_fr
			
 
				+ 		bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
			
 
				+ 	}
			
 
				+ 
			
 
				++	if (unlikely(xdp_frame_has_frags(xdpf))) {
			
 
				++		struct skb_shared_info *sinfo;
			
 
				++		int i;
			
 
				++
			
 
				++		sinfo = xdp_get_shared_info_from_frame(xdpf);
			
 
				++		for (i = 0; i < sinfo->nr_frags; i++) {
			
 
				++			skb_frag_t *frag = &sinfo->frags[i];
			
 
				++
			
 
				++			bq->q[bq->count++] = skb_frag_address(frag);
			
 
				++			if (bq->count == XDP_BULK_QUEUE_SIZE)
			
 
				++				xdp_flush_frame_bulk(bq);
			
 
				++		}
			
 
				++	}
			
 
				+ 	bq->q[bq->count++] = xdpf->data;
			
 
				+ }
			
 
				+ EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
			
 
				+ 
			
 
				+ void xdp_return_buff(struct xdp_buff *xdp)
			
 
				+ {
			
 
				++	struct skb_shared_info *sinfo;
			
 
				++	int i;
			
 
				++
			
 
				++	if (likely(!xdp_buff_has_frags(xdp)))
			
 
				++		goto out;
			
 
				++
			
 
				++	sinfo = xdp_get_shared_info_from_buff(xdp);
			
 
				++	for (i = 0; i < sinfo->nr_frags; i++) {
			
 
				++		struct page *page = skb_frag_page(&sinfo->frags[i]);
			
 
				++
			
 
				++		__xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
			
 
				++	}
			
 
				++out:
			
 
				+ 	__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
			
 
				+ }
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/607-v5.18-net-skbuff-add-size-metadata-to-skb_shared_info-for-.patch
+++ b/target/linux/generic/backport-6.1/607-v5.18-net-skbuff-add-size-metadata-to-skb_shared_info-for-.patch
@@ -0,0 +1,31 @@
 
				+From d16697cb6261d4cc23422e6b1cb2759df8aa76d0 Mon Sep 17 00:00:00 2001
			
 
				+From: Lorenzo Bianconi <[email protected]>
			
 
				+Date: Fri, 21 Jan 2022 11:09:44 +0100
			
 
				+Subject: [PATCH] net: skbuff: add size metadata to skb_shared_info for xdp
			
 
				+
			
 
				+Introduce xdp_frags_size field in skb_shared_info data structure
			
 
				+to store xdp_buff/xdp_frame frame paged size (xdp_frags_size will
			
 
				+be used in xdp frags support). In order to not increase
			
 
				+skb_shared_info size we will use a hole due to skb_shared_info
			
 
				+alignment.
			
 
				+
			
 
				+Acked-by: Toke Hoiland-Jorgensen <[email protected]>
			
 
				+Acked-by: John Fastabend <[email protected]>
			
 
				+Acked-by: Jesper Dangaard Brouer <[email protected]>
			
 
				+Signed-off-by: Lorenzo Bianconi <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/8a849819a3e0a143d540f78a3a5add76e17e980d.1642758637.git.lorenzo@kernel.org
			
 
				+Signed-off-by: Alexei Starovoitov <[email protected]>
			
 
				+---
			
 
				+ include/linux/skbuff.h | 1 +
			
 
				+ 1 file changed, 1 insertion(+)
			
 
				+
			
 
				+--- a/include/linux/skbuff.h
			
 
				++++ b/include/linux/skbuff.h
			
 
				+@@ -568,6 +568,7 @@ struct skb_shared_info {
			
 
				+ 	 * Warning : all fields before dataref are cleared in __alloc_skb()
			
 
				+ 	 */
			
 
				+ 	atomic_t	dataref;
			
 
				++	unsigned int	xdp_frags_size;
			
 
				+ 
			
 
				+ 	/* Intermediate layers must ensure that destructor_arg
			
 
				+ 	 * remains valid until skb destructor */
			
--- a/target/linux/generic/backport-6.1/608-v5.18-net-veth-Account-total-xdp_frame-len-running-ndo_xdp.patch
+++ b/target/linux/generic/backport-6.1/608-v5.18-net-veth-Account-total-xdp_frame-len-running-ndo_xdp.patch
@@ -0,0 +1,65 @@
 
				+From 5142239a22219921a7863cf00c9ab853c00689d8 Mon Sep 17 00:00:00 2001
			
 
				+From: Lorenzo Bianconi <[email protected]>
			
 
				+Date: Fri, 11 Mar 2022 10:14:18 +0100
			
 
				+Subject: [PATCH] net: veth: Account total xdp_frame len running ndo_xdp_xmit
			
 
				+
			
 
				+Even if this is a theoretical issue since it is not possible to perform
			
 
				+XDP_REDIRECT on a non-linear xdp_frame, veth driver does not account
			
 
				+paged area in ndo_xdp_xmit function pointer.
			
 
				+Introduce xdp_get_frame_len utility routine to get the xdp_frame full
			
 
				+length and account total frame size running XDP_REDIRECT of a
			
 
				+non-linear xdp frame into a veth device.
			
 
				+
			
 
				+Signed-off-by: Lorenzo Bianconi <[email protected]>
			
 
				+Signed-off-by: Daniel Borkmann <[email protected]>
			
 
				+Acked-by: Toke Hoiland-Jorgensen <[email protected]>
			
 
				+Acked-by: John Fastabend <[email protected]>
			
 
				+Link: https://lore.kernel.org/bpf/54f9fd3bb65d190daf2c0bbae2f852ff16cfbaa0.1646989407.git.lorenzo@kernel.org
			
 
				+---
			
 
				+ drivers/net/veth.c |  4 ++--
			
 
				+ include/net/xdp.h  | 14 ++++++++++++++
			
 
				+ 2 files changed, 16 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+--- a/drivers/net/veth.c
			
 
				++++ b/drivers/net/veth.c
			
 
				+@@ -501,7 +501,7 @@ static int veth_xdp_xmit(struct net_devi
			
 
				+ 		struct xdp_frame *frame = frames[i];
			
 
				+ 		void *ptr = veth_xdp_to_ptr(frame);
			
 
				+ 
			
 
				+-		if (unlikely(frame->len > max_len ||
			
 
				++		if (unlikely(xdp_get_frame_len(frame) > max_len ||
			
 
				+ 			     __ptr_ring_produce(&rq->xdp_ring, ptr)))
			
 
				+ 			break;
			
 
				+ 		nxmit++;
			
 
				+@@ -862,7 +862,7 @@ static int veth_xdp_rcv(struct veth_rq *
			
 
				+ 			/* ndo_xdp_xmit */
			
 
				+ 			struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
			
 
				+ 
			
 
				+-			stats->xdp_bytes += frame->len;
			
 
				++			stats->xdp_bytes += xdp_get_frame_len(frame);
			
 
				+ 			frame = veth_xdp_rcv_one(rq, frame, bq, stats);
			
 
				+ 			if (frame) {
			
 
				+ 				/* XDP_PASS */
			
 
				+--- a/include/net/xdp.h
			
 
				++++ b/include/net/xdp.h
			
 
				+@@ -295,6 +295,20 @@ out:
			
 
				+ 	__xdp_release_frame(xdpf->data, mem);
			
 
				+ }
			
 
				+ 
			
 
				++static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
			
 
				++{
			
 
				++	struct skb_shared_info *sinfo;
			
 
				++	unsigned int len = xdpf->len;
			
 
				++
			
 
				++	if (likely(!xdp_frame_has_frags(xdpf)))
			
 
				++		goto out;
			
 
				++
			
 
				++	sinfo = xdp_get_shared_info_from_frame(xdpf);
			
 
				++	len += sinfo->xdp_frags_size;
			
 
				++out:
			
 
				++	return len;
			
 
				++}
			
 
				++
			
 
				+ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
			
 
				+ 		     struct net_device *dev, u32 queue_index, unsigned int napi_id);
			
 
				+ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
			
--- a/target/linux/generic/backport-6.1/609-v5.18-veth-Allow-jumbo-frames-in-xdp-mode.patch
+++ b/target/linux/generic/backport-6.1/609-v5.18-veth-Allow-jumbo-frames-in-xdp-mode.patch
@@ -0,0 +1,40 @@
 
				+From 7cda76d858a4e71ac4a04066c093679a12e1312c Mon Sep 17 00:00:00 2001
			
 
				+From: Lorenzo Bianconi <[email protected]>
			
 
				+Date: Fri, 11 Mar 2022 10:14:20 +0100
			
 
				+Subject: [PATCH] veth: Allow jumbo frames in xdp mode
			
 
				+MIME-Version: 1.0
			
 
				+Content-Type: text/plain; charset=UTF-8
			
 
				+Content-Transfer-Encoding: 8bit
			
 
				+
			
 
				+Allow increasing the MTU over page boundaries on veth devices
			
 
				+if the attached xdp program declares to support xdp fragments.
			
 
				+
			
 
				+Signed-off-by: Lorenzo Bianconi <[email protected]>
			
 
				+Signed-off-by: Daniel Borkmann <[email protected]>
			
 
				+Acked-by: Toke Høiland-Jørgensen <[email protected]>
			
 
				+Acked-by: John Fastabend <[email protected]>
			
 
				+Link: https://lore.kernel.org/bpf/d5dc039c3d4123426e7023a488c449181a7bc57f.1646989407.git.lorenzo@kernel.org
			
 
				+---
			
 
				+ drivers/net/veth.c | 11 ++++++++---
			
 
				+ 1 file changed, 8 insertions(+), 3 deletions(-)
			
 
				+
			
 
				+--- a/drivers/net/veth.c
			
 
				++++ b/drivers/net/veth.c
			
 
				+@@ -1471,9 +1471,14 @@ static int veth_xdp_set(struct net_devic
			
 
				+ 			goto err;
			
 
				+ 		}
			
 
				+ 
			
 
				+-		max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM -
			
 
				+-			  peer->hard_header_len -
			
 
				+-			  SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
			
 
				++		max_mtu = SKB_WITH_OVERHEAD(PAGE_SIZE - VETH_XDP_HEADROOM) -
			
 
				++			  peer->hard_header_len;
			
 
				++		/* Allow increasing the max_mtu if the program supports
			
 
				++		 * XDP fragments.
			
 
				++		 */
			
 
				++		//if (prog->aux->xdp_has_frags)
			
 
				++		max_mtu += PAGE_SIZE * MAX_SKB_FRAGS;
			
 
				++
			
 
				+ 		if (peer->mtu > max_mtu) {
			
 
				+ 			NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
			
 
				+ 			err = -ERANGE;
			
--- a/target/linux/generic/backport-6.1/610-v6.3-net-page_pool-use-in_softirq-instead.patch
+++ b/target/linux/generic/backport-6.1/610-v6.3-net-page_pool-use-in_softirq-instead.patch
@@ -0,0 +1,56 @@
 
				+From: Qingfang DENG <[email protected]>
			
 
				+Date: Fri, 3 Feb 2023 09:16:11 +0800
			
 
				+Subject: [PATCH] net: page_pool: use in_softirq() instead
			
 
				+
			
 
				+We use BH context only for synchronization, so we don't care if it's
			
 
				+actually serving softirq or not.
			
 
				+
			
 
				+As a side node, in case of threaded NAPI, in_serving_softirq() will
			
 
				+return false because it's in process context with BH off, making
			
 
				+page_pool_recycle_in_cache() unreachable.
			
 
				+
			
 
				+Signed-off-by: Qingfang DENG <[email protected]>
			
 
				+---
			
 
				+
			
 
				+--- a/include/net/page_pool.h
			
 
				++++ b/include/net/page_pool.h
			
 
				+@@ -357,7 +357,7 @@ static inline void page_pool_nid_changed
			
 
				+ static inline void page_pool_ring_lock(struct page_pool *pool)
			
 
				+ 	__acquires(&pool->ring.producer_lock)
			
 
				+ {
			
 
				+-	if (in_serving_softirq())
			
 
				++	if (in_softirq())
			
 
				+ 		spin_lock(&pool->ring.producer_lock);
			
 
				+ 	else
			
 
				+ 		spin_lock_bh(&pool->ring.producer_lock);
			
 
				+@@ -366,7 +366,7 @@ static inline void page_pool_ring_lock(s
			
 
				+ static inline void page_pool_ring_unlock(struct page_pool *pool)
			
 
				+ 	__releases(&pool->ring.producer_lock)
			
 
				+ {
			
 
				+-	if (in_serving_softirq())
			
 
				++	if (in_softirq())
			
 
				+ 		spin_unlock(&pool->ring.producer_lock);
			
 
				+ 	else
			
 
				+ 		spin_unlock_bh(&pool->ring.producer_lock);
			
 
				+--- a/net/core/page_pool.c
			
 
				++++ b/net/core/page_pool.c
			
 
				+@@ -512,8 +512,8 @@ static void page_pool_return_page(struct
			
 
				+ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
			
 
				+ {
			
 
				+ 	int ret;
			
 
				+-	/* BH protection not needed if current is serving softirq */
			
 
				+-	if (in_serving_softirq())
			
 
				++	/* BH protection not needed if current is softirq */
			
 
				++	if (in_softirq())
			
 
				+ 		ret = ptr_ring_produce(&pool->ring, page);
			
 
				+ 	else
			
 
				+ 		ret = ptr_ring_produce_bh(&pool->ring, page);
			
 
				+@@ -576,7 +576,7 @@ __page_pool_put_page(struct page_pool *p
			
 
				+ 			page_pool_dma_sync_for_device(pool, page,
			
 
				+ 						      dma_sync_size);
			
 
				+ 
			
 
				+-		if (allow_direct && in_serving_softirq() &&
			
 
				++		if (allow_direct && in_softirq() &&
			
 
				+ 		    page_pool_recycle_in_cache(page, pool))
			
 
				+ 			return NULL;
			
 
				+ 
			
--- a/target/linux/generic/backport-6.1/611-v6.3-net-add-helper-eth_addr_add.patch
+++ b/target/linux/generic/backport-6.1/611-v6.3-net-add-helper-eth_addr_add.patch
@@ -0,0 +1,41 @@
 
				+From 7390609b0121a1b982c5ecdfcd72dc328e5784ee Mon Sep 17 00:00:00 2001
			
 
				+From: Michael Walle <[email protected]>
			
 
				+Date: Mon, 6 Feb 2023 13:43:42 +0000
			
 
				+Subject: [PATCH] net: add helper eth_addr_add()
			
 
				+
			
 
				+Add a helper to add an offset to a ethernet address. This comes in handy
			
 
				+if you have a base ethernet address for multiple interfaces.
			
 
				+
			
 
				+Signed-off-by: Michael Walle <[email protected]>
			
 
				+Reviewed-by: Andrew Lunn <[email protected]>
			
 
				+Acked-by: Jakub Kicinski <[email protected]>
			
 
				+Signed-off-by: Srinivas Kandagatla <[email protected]>
			
 
				+Link: https://lore.kernel.org/r/[email protected]
			
 
				+Signed-off-by: Greg Kroah-Hartman <[email protected]>
			
 
				+---
			
 
				+ include/linux/etherdevice.h | 14 ++++++++++++++
			
 
				+ 1 file changed, 14 insertions(+)
			
 
				+
			
 
				+--- a/include/linux/etherdevice.h
			
 
				++++ b/include/linux/etherdevice.h
			
 
				+@@ -478,6 +478,20 @@ static inline void eth_addr_inc(u8 *addr
			
 
				+ }
			
 
				+ 
			
 
				+ /**
			
 
				++ * eth_addr_add() - Add (or subtract) an offset to/from the given MAC address.
			
 
				++ *
			
 
				++ * @offset: Offset to add.
			
 
				++ * @addr: Pointer to a six-byte array containing Ethernet address to increment.
			
 
				++ */
			
 
				++static inline void eth_addr_add(u8 *addr, long offset)
			
 
				++{
			
 
				++	u64 u = ether_addr_to_u64(addr);
			
 
				++
			
 
				++	u += offset;
			
 
				++	u64_to_ether_addr(u, addr);
			
 
				++}
			
 
				++
			
 
				++/**
			
 
				+  * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
			
 
				+  * @dev: Pointer to a device structure
			
 
				+  * @addr: Pointer to a six-byte array containing the Ethernet address
			
--- a/target/linux/generic/backport-6.1/700-v5.17-net-dsa-introduce-tagger-owned-storage-for-private.patch
+++ b/target/linux/generic/backport-6.1/700-v5.17-net-dsa-introduce-tagger-owned-storage-for-private.patch
@@ -0,0 +1,279 @@
 
				+From dc452a471dbae8aca8257c565174212620880093 Mon Sep 17 00:00:00 2001
			
 
				+From: Vladimir Oltean <[email protected]>
			
 
				+Date: Fri, 10 Dec 2021 01:34:37 +0200
			
 
				+Subject: net: dsa: introduce tagger-owned storage for private and shared data
			
 
				+
			
 
				+Ansuel is working on register access over Ethernet for the qca8k switch
			
 
				+family. This requires the qca8k tagging protocol driver to receive
			
 
				+frames which aren't intended for the network stack, but instead for the
			
 
				+qca8k switch driver itself.
			
 
				+
			
 
				+The dp->priv is currently the prevailing method for passing data back
			
 
				+and forth between the tagging protocol driver and the switch driver.
			
 
				+However, this method is riddled with caveats.
			
 
				+
			
 
				+The DSA design allows in principle for any switch driver to return any
			
 
				+protocol it desires in ->get_tag_protocol(). The dsa_loop driver can be
			
 
				+modified to do just that. But in the current design, the memory behind
			
 
				+dp->priv has to be allocated by the switch driver, so if the tagging
			
 
				+protocol is paired to an unexpected switch driver, we may end up in NULL
			
 
				+pointer dereferences inside the kernel, or worse (a switch driver may
			
 
				+allocate dp->priv according to the expectations of a different tagger).
			
 
				+
			
 
				+The latter possibility is even more plausible considering that DSA
			
 
				+switches can dynamically change tagging protocols in certain cases
			
 
				+(dsa <-> edsa, ocelot <-> ocelot-8021q), and the current design lends
			
 
				+itself to mistakes that are all too easy to make.
			
 
				+
			
 
				+This patch proposes that the tagging protocol driver should manage its
			
 
				+own memory, instead of relying on the switch driver to do so.
			
 
				+After analyzing the different in-tree needs, it can be observed that the
			
 
				+required tagger storage is per switch, therefore a ds->tagger_data
			
 
				+pointer is introduced. In principle, per-port storage could also be
			
 
				+introduced, although there is no need for it at the moment. Future
			
 
				+changes will replace the current usage of dp->priv with ds->tagger_data.
			
 
				+
			
 
				+We define a "binding" event between the DSA switch tree and the tagging
			
 
				+protocol. During this binding event, the tagging protocol's ->connect()
			
 
				+method is called first, and this may allocate some memory for each
			
 
				+switch of the tree. Then a cross-chip notifier is emitted for the
			
 
				+switches within that tree, and they are given the opportunity to fix up
			
 
				+the tagger's memory (for example, they might set up some function
			
 
				+pointers that represent virtual methods for consuming packets).
			
 
				+Because the memory is owned by the tagger, there exists a ->disconnect()
			
 
				+method for the tagger (which is the place to free the resources), but
			
 
				+there doesn't exist a ->disconnect() method for the switch driver.
			
 
				+This is part of the design. The switch driver should make minimal use of
			
 
				+the public part of the tagger data, and only after type-checking it
			
 
				+using the supplied "proto" argument.
			
 
				+
			
 
				+In the code there are in fact two binding events, one is the initial
			
 
				+event in dsa_switch_setup_tag_protocol(). At this stage, the cross chip
			
 
				+notifier chains aren't initialized, so we call each switch's connect()
			
 
				+method by hand. Then there is dsa_tree_bind_tag_proto() during
			
 
				+dsa_tree_change_tag_proto(), and here we have an old protocol and a new
			
 
				+one. We first connect to the new one before disconnecting from the old
			
 
				+one, to simplify error handling a bit and to ensure we remain in a valid
			
 
				+state at all times.
			
 
				+
			
 
				+Co-developed-by: Ansuel Smith <[email protected]>
			
 
				+Signed-off-by: Ansuel Smith <[email protected]>
			
 
				+Signed-off-by: Vladimir Oltean <[email protected]>
			
 
				+Signed-off-by: David S. Miller <[email protected]>
			
 
				+---
			
 
				+ include/net/dsa.h  | 12 +++++++++
			
 
				+ net/dsa/dsa2.c     | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++---
			
 
				+ net/dsa/dsa_priv.h |  1 +
			
 
				+ net/dsa/switch.c   | 14 +++++++++++
			
 
				+ 4 files changed, 96 insertions(+), 4 deletions(-)
			
 
				+
			
 
				+--- a/include/net/dsa.h
			
 
				++++ b/include/net/dsa.h
			
 
				+@@ -80,12 +80,15 @@ enum dsa_tag_protocol {
			
 
				+ };
			
 
				+ 
			
 
				+ struct dsa_switch;
			
 
				++struct dsa_switch_tree;
			
 
				+ 
			
 
				+ struct dsa_device_ops {
			
 
				+ 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
			
 
				+ 	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
			
 
				+ 	void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
			
 
				+ 			     int *offset);
			
 
				++	int (*connect)(struct dsa_switch_tree *dst);
			
 
				++	void (*disconnect)(struct dsa_switch_tree *dst);
			
 
				+ 	unsigned int needed_headroom;
			
 
				+ 	unsigned int needed_tailroom;
			
 
				+ 	const char *name;
			
 
				+@@ -329,6 +332,8 @@ struct dsa_switch {
			
 
				+ 	 */
			
 
				+ 	void *priv;
			
 
				+ 
			
 
				++	void *tagger_data;
			
 
				++
			
 
				+ 	/*
			
 
				+ 	 * Configuration data for this switch.
			
 
				+ 	 */
			
 
				+@@ -584,6 +589,13 @@ struct dsa_switch_ops {
			
 
				+ 						  enum dsa_tag_protocol mprot);
			
 
				+ 	int	(*change_tag_protocol)(struct dsa_switch *ds, int port,
			
 
				+ 				       enum dsa_tag_protocol proto);
			
 
				++	/*
			
 
				++	 * Method for switch drivers to connect to the tagging protocol driver
			
 
				++	 * in current use. The switch driver can provide handlers for certain
			
 
				++	 * types of packets for switch management.
			
 
				++	 */
			
 
				++	int	(*connect_tag_protocol)(struct dsa_switch *ds,
			
 
				++					enum dsa_tag_protocol proto);
			
 
				+ 
			
 
				+ 	/* Optional switch-wide initialization and destruction methods */
			
 
				+ 	int	(*setup)(struct dsa_switch *ds);
			
 
				+--- a/net/dsa/dsa2.c
			
 
				++++ b/net/dsa/dsa2.c
			
 
				+@@ -230,8 +230,12 @@ static struct dsa_switch_tree *dsa_tree_
			
 
				+ 
			
 
				+ static void dsa_tree_free(struct dsa_switch_tree *dst)
			
 
				+ {
			
 
				+-	if (dst->tag_ops)
			
 
				++	if (dst->tag_ops) {
			
 
				++		if (dst->tag_ops->disconnect)
			
 
				++			dst->tag_ops->disconnect(dst);
			
 
				++
			
 
				+ 		dsa_tag_driver_put(dst->tag_ops);
			
 
				++	}
			
 
				+ 	list_del(&dst->list);
			
 
				+ 	kfree(dst);
			
 
				+ }
			
 
				+@@ -805,7 +809,7 @@ static int dsa_switch_setup_tag_protocol
			
 
				+ 	int port, err;
			
 
				+ 
			
 
				+ 	if (tag_ops->proto == dst->default_proto)
			
 
				+-		return 0;
			
 
				++		goto connect;
			
 
				+ 
			
 
				+ 	for (port = 0; port < ds->num_ports; port++) {
			
 
				+ 		if (!dsa_is_cpu_port(ds, port))
			
 
				+@@ -821,6 +825,17 @@ static int dsa_switch_setup_tag_protocol
			
 
				+ 		}
			
 
				+ 	}
			
 
				+ 
			
 
				++connect:
			
 
				++	if (ds->ops->connect_tag_protocol) {
			
 
				++		err = ds->ops->connect_tag_protocol(ds, tag_ops->proto);
			
 
				++		if (err) {
			
 
				++			dev_err(ds->dev,
			
 
				++				"Unable to connect to tag protocol \"%s\": %pe\n",
			
 
				++				tag_ops->name, ERR_PTR(err));
			
 
				++			return err;
			
 
				++		}
			
 
				++	}
			
 
				++
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+ 
			
 
				+@@ -1132,6 +1147,46 @@ static void dsa_tree_teardown(struct dsa
			
 
				+ 	dst->setup = false;
			
 
				+ }
			
 
				+ 
			
 
				++static int dsa_tree_bind_tag_proto(struct dsa_switch_tree *dst,
			
 
				++				   const struct dsa_device_ops *tag_ops)
			
 
				++{
			
 
				++	const struct dsa_device_ops *old_tag_ops = dst->tag_ops;
			
 
				++	struct dsa_notifier_tag_proto_info info;
			
 
				++	int err;
			
 
				++
			
 
				++	dst->tag_ops = tag_ops;
			
 
				++
			
 
				++	/* Notify the new tagger about the connection to this tree */
			
 
				++	if (tag_ops->connect) {
			
 
				++		err = tag_ops->connect(dst);
			
 
				++		if (err)
			
 
				++			goto out_revert;
			
 
				++	}
			
 
				++
			
 
				++	/* Notify the switches from this tree about the connection
			
 
				++	 * to the new tagger
			
 
				++	 */
			
 
				++	info.tag_ops = tag_ops;
			
 
				++	err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_CONNECT, &info);
			
 
				++	if (err && err != -EOPNOTSUPP)
			
 
				++		goto out_disconnect;
			
 
				++
			
 
				++	/* Notify the old tagger about the disconnection from this tree */
			
 
				++	if (old_tag_ops->disconnect)
			
 
				++		old_tag_ops->disconnect(dst);
			
 
				++
			
 
				++	return 0;
			
 
				++
			
 
				++out_disconnect:
			
 
				++	/* Revert the new tagger's connection to this tree */
			
 
				++	if (tag_ops->disconnect)
			
 
				++		tag_ops->disconnect(dst);
			
 
				++out_revert:
			
 
				++	dst->tag_ops = old_tag_ops;
			
 
				++
			
 
				++	return err;
			
 
				++}
			
 
				++
			
 
				+ /* Since the dsa/tagging sysfs device attribute is per master, the assumption
			
 
				+  * is that all DSA switches within a tree share the same tagger, otherwise
			
 
				+  * they would have formed disjoint trees (different "dsa,member" values).
			
 
				+@@ -1164,12 +1219,15 @@ int dsa_tree_change_tag_proto(struct dsa
			
 
				+ 			goto out_unlock;
			
 
				+ 	}
			
 
				+ 
			
 
				++	/* Notify the tag protocol change */
			
 
				+ 	info.tag_ops = tag_ops;
			
 
				+ 	err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
			
 
				+ 	if (err)
			
 
				+-		goto out_unwind_tagger;
			
 
				++		return err;
			
 
				+ 
			
 
				+-	dst->tag_ops = tag_ops;
			
 
				++	err = dsa_tree_bind_tag_proto(dst, tag_ops);
			
 
				++	if (err)
			
 
				++		goto out_unwind_tagger;
			
 
				+ 
			
 
				+ 	rtnl_unlock();
			
 
				+ 
			
 
				+@@ -1257,6 +1315,7 @@ static int dsa_port_parse_cpu(struct dsa
			
 
				+ 	struct dsa_switch *ds = dp->ds;
			
 
				+ 	struct dsa_switch_tree *dst = ds->dst;
			
 
				+ 	enum dsa_tag_protocol default_proto;
			
 
				++	int err;
			
 
				+ 
			
 
				+ 	/* Find out which protocol the switch would prefer. */
			
 
				+ 	default_proto = dsa_get_tag_protocol(dp, master);
			
 
				+@@ -1311,6 +1370,12 @@ static int dsa_port_parse_cpu(struct dsa
			
 
				+ 		 */
			
 
				+ 		dsa_tag_driver_put(tag_ops);
			
 
				+ 	} else {
			
 
				++		if (tag_ops->connect) {
			
 
				++			err = tag_ops->connect(dst);
			
 
				++			if (err)
			
 
				++				return err;
			
 
				++		}
			
 
				++
			
 
				+ 		dst->tag_ops = tag_ops;
			
 
				+ 	}
			
 
				+ 
			
 
				+--- a/net/dsa/dsa_priv.h
			
 
				++++ b/net/dsa/dsa_priv.h
			
 
				+@@ -37,6 +37,7 @@ enum {
			
 
				+ 	DSA_NOTIFIER_VLAN_DEL,
			
 
				+ 	DSA_NOTIFIER_MTU,
			
 
				+ 	DSA_NOTIFIER_TAG_PROTO,
			
 
				++	DSA_NOTIFIER_TAG_PROTO_CONNECT,
			
 
				+ 	DSA_NOTIFIER_MRP_ADD,
			
 
				+ 	DSA_NOTIFIER_MRP_DEL,
			
 
				+ 	DSA_NOTIFIER_MRP_ADD_RING_ROLE,
			
 
				+--- a/net/dsa/switch.c
			
 
				++++ b/net/dsa/switch.c
			
 
				+@@ -616,6 +616,17 @@ static int dsa_switch_change_tag_proto(s
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+ 
			
 
				++static int dsa_switch_connect_tag_proto(struct dsa_switch *ds,
			
 
				++					struct dsa_notifier_tag_proto_info *info)
			
 
				++{
			
 
				++	const struct dsa_device_ops *tag_ops = info->tag_ops;
			
 
				++
			
 
				++	if (!ds->ops->connect_tag_protocol)
			
 
				++		return -EOPNOTSUPP;
			
 
				++
			
 
				++	return ds->ops->connect_tag_protocol(ds, tag_ops->proto);
			
 
				++}
			
 
				++
			
 
				+ static int dsa_switch_mrp_add(struct dsa_switch *ds,
			
 
				+ 			      struct dsa_notifier_mrp_info *info)
			
 
				+ {
			
 
				+@@ -735,6 +746,9 @@ static int dsa_switch_event(struct notif
			
 
				+ 	case DSA_NOTIFIER_TAG_PROTO:
			
 
				+ 		err = dsa_switch_change_tag_proto(ds, info);
			
 
				+ 		break;
			
 
				++	case DSA_NOTIFIER_TAG_PROTO_CONNECT:
			
 
				++		err = dsa_switch_connect_tag_proto(ds, info);
			
 
				++		break;
			
 
				+ 	case DSA_NOTIFIER_MRP_ADD:
			
 
				+ 		err = dsa_switch_mrp_add(ds, info);
			
 
				+ 		break;
			
--- a/target/linux/generic/backport-6.1/701-v5.17-dsa-make-tagging-protocols-connect-to-individual-switches.patch
+++ b/target/linux/generic/backport-6.1/701-v5.17-dsa-make-tagging-protocols-connect-to-individual-switches.patch
@@ -0,0 +1,274 @@
 
				+From 7f2973149c22e7a6fee4c0c9fa6b8e4108e9c208 Mon Sep 17 00:00:00 2001
			
 
				+From: Vladimir Oltean <[email protected]>
			
 
				+Date: Tue, 14 Dec 2021 03:45:36 +0200
			
 
				+Subject: net: dsa: make tagging protocols connect to individual switches from
			
 
				+ a tree
			
 
				+
			
 
				+On the NXP Bluebox 3 board which uses a multi-switch setup with sja1105,
			
 
				+the mechanism through which the tagger connects to the switch tree is
			
 
				+broken, due to improper DSA code design. At the time when tag_ops->connect()
			
 
				+is called in dsa_port_parse_cpu(), DSA hasn't finished "touching" all
			
 
				+the ports, so it doesn't know how large the tree is and how many ports
			
 
				+it has. It has just seen the first CPU port by this time. As a result,
			
 
				+this function will call the tagger's ->connect method too early, and the
			
 
				+tagger will connect only to the first switch from the tree.
			
 
				+
			
 
				+This could be perhaps addressed a bit more simply by just moving the
			
 
				+tag_ops->connect(dst) call a bit later (for example in dsa_tree_setup),
			
 
				+but there is already a design inconsistency at present: on the switch
			
 
				+side, the notification is on a per-switch basis, but on the tagger side,
			
 
				+it is on a per-tree basis. Furthermore, the persistent storage itself is
			
 
				+per switch (ds->tagger_data). And the tagger connect and disconnect
			
 
				+procedures (at least the ones that exist currently) could see a fair bit
			
 
				+of simplification if they didn't have to iterate through the switches of
			
 
				+a tree.
			
 
				+
			
 
				+To fix the issue, this change transforms tag_ops->connect(dst) into
			
 
				+tag_ops->connect(ds) and moves it somewhere where we already iterate
			
 
				+over all switches of a tree. That is in dsa_switch_setup_tag_protocol(),
			
 
				+which is a good placement because we already have there the connection
			
 
				+call to the switch side of things.
			
 
				+
			
 
				+As for the dsa_tree_bind_tag_proto() method (called from the code path
			
 
				+that changes the tag protocol), things are a bit more complicated
			
 
				+because we receive the tree as argument, yet when we unwind on errors,
			
 
				+it would be nice to not call tag_ops->disconnect(ds) where we didn't
			
 
				+previously call tag_ops->connect(ds). We didn't have this problem before
			
 
				+because the tag_ops connection operations passed the entire dst before,
			
 
				+and this is more fine grained now. To solve the error rewind case using
			
 
				+the new API, we have to create yet one more cross-chip notifier for
			
 
				+disconnection, and stay connected with the old tag protocol to all the
			
 
				+switches in the tree until we've succeeded to connect with the new one
			
 
				+as well. So if something fails half way, the whole tree is still
			
 
				+connected to the old tagger. But there may still be leaks if the tagger
			
 
				+fails to connect to the 2nd out of 3 switches in a tree: somebody needs
			
 
				+to tell the tagger to disconnect from the first switch. Nothing comes
			
 
				+for free, and this was previously handled privately by the tagging
			
 
				+protocol driver before, but now we need to emit a disconnect cross-chip
			
 
				+notifier for that, because DSA has to take care of the unwind path. We
			
 
				+assume that the tagging protocol has connected to a switch if it has set
			
 
				+ds->tagger_data to something, otherwise we avoid calling its
			
 
				+disconnection method in the error rewind path.
			
 
				+
			
 
				+The rest of the changes are in the tagging protocol drivers, and have to
			
 
				+do with the replacement of dst with ds. The iteration is removed and the
			
 
				+error unwind path is simplified, as mentioned above.
			
 
				+
			
 
				+Signed-off-by: Vladimir Oltean <[email protected]>
			
 
				+Signed-off-by: David S. Miller <[email protected]>
			
 
				+---
			
 
				+ include/net/dsa.h          |  5 ++--
			
 
				+ net/dsa/dsa2.c             | 44 +++++++++++++-----------------
			
 
				+ net/dsa/dsa_priv.h         |  1 +
			
 
				+ net/dsa/switch.c           | 52 ++++++++++++++++++++++++++++++++---
			
 
				+ net/dsa/tag_ocelot_8021q.c | 53 +++++++++++-------------------------
			
 
				+ net/dsa/tag_sja1105.c      | 67 ++++++++++++++++------------------------------
			
 
				+ 6 files changed, 109 insertions(+), 113 deletions(-)
			
 
				+
			
 
				+--- a/include/net/dsa.h
			
 
				++++ b/include/net/dsa.h
			
 
				+@@ -80,15 +80,14 @@ enum dsa_tag_protocol {
			
 
				+ };
			
 
				+ 
			
 
				+ struct dsa_switch;
			
 
				+-struct dsa_switch_tree;
			
 
				+ 
			
 
				+ struct dsa_device_ops {
			
 
				+ 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
			
 
				+ 	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
			
 
				+ 	void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
			
 
				+ 			     int *offset);
			
 
				+-	int (*connect)(struct dsa_switch_tree *dst);
			
 
				+-	void (*disconnect)(struct dsa_switch_tree *dst);
			
 
				++	int (*connect)(struct dsa_switch *ds);
			
 
				++	void (*disconnect)(struct dsa_switch *ds);
			
 
				+ 	unsigned int needed_headroom;
			
 
				+ 	unsigned int needed_tailroom;
			
 
				+ 	const char *name;
			
 
				+--- a/net/dsa/dsa2.c
			
 
				++++ b/net/dsa/dsa2.c
			
 
				+@@ -230,12 +230,8 @@ static struct dsa_switch_tree *dsa_tree_
			
 
				+ 
			
 
				+ static void dsa_tree_free(struct dsa_switch_tree *dst)
			
 
				+ {
			
 
				+-	if (dst->tag_ops) {
			
 
				+-		if (dst->tag_ops->disconnect)
			
 
				+-			dst->tag_ops->disconnect(dst);
			
 
				+-
			
 
				++	if (dst->tag_ops)
			
 
				+ 		dsa_tag_driver_put(dst->tag_ops);
			
 
				+-	}
			
 
				+ 	list_del(&dst->list);
			
 
				+ 	kfree(dst);
			
 
				+ }
			
 
				+@@ -826,17 +822,29 @@ static int dsa_switch_setup_tag_protocol
			
 
				+ 	}
			
 
				+ 
			
 
				+ connect:
			
 
				++	if (tag_ops->connect) {
			
 
				++		err = tag_ops->connect(ds);
			
 
				++		if (err)
			
 
				++			return err;
			
 
				++	}
			
 
				++
			
 
				+ 	if (ds->ops->connect_tag_protocol) {
			
 
				+ 		err = ds->ops->connect_tag_protocol(ds, tag_ops->proto);
			
 
				+ 		if (err) {
			
 
				+ 			dev_err(ds->dev,
			
 
				+ 				"Unable to connect to tag protocol \"%s\": %pe\n",
			
 
				+ 				tag_ops->name, ERR_PTR(err));
			
 
				+-			return err;
			
 
				++			goto disconnect;
			
 
				+ 		}
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	return 0;
			
 
				++
			
 
				++disconnect:
			
 
				++	if (tag_ops->disconnect)
			
 
				++		tag_ops->disconnect(ds);
			
 
				++
			
 
				++	return err;
			
 
				+ }
			
 
				+ 
			
 
				+ static int dsa_switch_setup(struct dsa_switch *ds)
			
 
				+@@ -1156,13 +1164,6 @@ static int dsa_tree_bind_tag_proto(struc
			
 
				+ 
			
 
				+ 	dst->tag_ops = tag_ops;
			
 
				+ 
			
 
				+-	/* Notify the new tagger about the connection to this tree */
			
 
				+-	if (tag_ops->connect) {
			
 
				+-		err = tag_ops->connect(dst);
			
 
				+-		if (err)
			
 
				+-			goto out_revert;
			
 
				+-	}
			
 
				+-
			
 
				+ 	/* Notify the switches from this tree about the connection
			
 
				+ 	 * to the new tagger
			
 
				+ 	 */
			
 
				+@@ -1172,16 +1173,14 @@ static int dsa_tree_bind_tag_proto(struc
			
 
				+ 		goto out_disconnect;
			
 
				+ 
			
 
				+ 	/* Notify the old tagger about the disconnection from this tree */
			
 
				+-	if (old_tag_ops->disconnect)
			
 
				+-		old_tag_ops->disconnect(dst);
			
 
				++	info.tag_ops = old_tag_ops;
			
 
				++	dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info);
			
 
				+ 
			
 
				+ 	return 0;
			
 
				+ 
			
 
				+ out_disconnect:
			
 
				+-	/* Revert the new tagger's connection to this tree */
			
 
				+-	if (tag_ops->disconnect)
			
 
				+-		tag_ops->disconnect(dst);
			
 
				+-out_revert:
			
 
				++	info.tag_ops = tag_ops;
			
 
				++	dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info);
			
 
				+ 	dst->tag_ops = old_tag_ops;
			
 
				+ 
			
 
				+ 	return err;
			
 
				+@@ -1315,7 +1314,6 @@ static int dsa_port_parse_cpu(struct dsa
			
 
				+ 	struct dsa_switch *ds = dp->ds;
			
 
				+ 	struct dsa_switch_tree *dst = ds->dst;
			
 
				+ 	enum dsa_tag_protocol default_proto;
			
 
				+-	int err;
			
 
				+ 
			
 
				+ 	/* Find out which protocol the switch would prefer. */
			
 
				+ 	default_proto = dsa_get_tag_protocol(dp, master);
			
 
				+@@ -1370,12 +1368,6 @@ static int dsa_port_parse_cpu(struct dsa
			
 
				+ 		 */
			
 
				+ 		dsa_tag_driver_put(tag_ops);
			
 
				+ 	} else {
			
 
				+-		if (tag_ops->connect) {
			
 
				+-			err = tag_ops->connect(dst);
			
 
				+-			if (err)
			
 
				+-				return err;
			
 
				+-		}
			
 
				+-
			
 
				+ 		dst->tag_ops = tag_ops;
			
 
				+ 	}
			
 
				+ 
			
 
				+--- a/net/dsa/dsa_priv.h
			
 
				++++ b/net/dsa/dsa_priv.h
			
 
				+@@ -38,6 +38,7 @@ enum {
			
 
				+ 	DSA_NOTIFIER_MTU,
			
 
				+ 	DSA_NOTIFIER_TAG_PROTO,
			
 
				+ 	DSA_NOTIFIER_TAG_PROTO_CONNECT,
			
 
				++	DSA_NOTIFIER_TAG_PROTO_DISCONNECT,
			
 
				+ 	DSA_NOTIFIER_MRP_ADD,
			
 
				+ 	DSA_NOTIFIER_MRP_DEL,
			
 
				+ 	DSA_NOTIFIER_MRP_ADD_RING_ROLE,
			
 
				+--- a/net/dsa/switch.c
			
 
				++++ b/net/dsa/switch.c
			
 
				+@@ -616,15 +616,58 @@ static int dsa_switch_change_tag_proto(s
			
 
				+ 	return 0;
			
 
				+ }
			
 
				+ 
			
 
				+-static int dsa_switch_connect_tag_proto(struct dsa_switch *ds,
			
 
				+-					struct dsa_notifier_tag_proto_info *info)
			
 
				++/* We use the same cross-chip notifiers to inform both the tagger side, as well
			
 
				++ * as the switch side, of connection and disconnection events.
			
 
				++ * Since ds->tagger_data is owned by the tagger, it isn't a hard error if the
			
 
				++ * switch side doesn't support connecting to this tagger, and therefore, the
			
 
				++ * fact that we don't disconnect the tagger side doesn't constitute a memory
			
 
				++ * leak: the tagger will still operate with persistent per-switch memory, just
			
 
				++ * with the switch side unconnected to it. What does constitute a hard error is
			
 
				++ * when the switch side supports connecting but fails.
			
 
				++ */
			
 
				++static int
			
 
				++dsa_switch_connect_tag_proto(struct dsa_switch *ds,
			
 
				++			     struct dsa_notifier_tag_proto_info *info)
			
 
				+ {
			
 
				+ 	const struct dsa_device_ops *tag_ops = info->tag_ops;
			
 
				++	int err;
			
 
				++
			
 
				++	/* Notify the new tagger about the connection to this switch */
			
 
				++	if (tag_ops->connect) {
			
 
				++		err = tag_ops->connect(ds);
			
 
				++		if (err)
			
 
				++			return err;
			
 
				++	}
			
 
				+ 
			
 
				+ 	if (!ds->ops->connect_tag_protocol)
			
 
				+ 		return -EOPNOTSUPP;
			
 
				+ 
			
 
				+-	return ds->ops->connect_tag_protocol(ds, tag_ops->proto);
			
 
				++	/* Notify the switch about the connection to the new tagger */
			
 
				++	err = ds->ops->connect_tag_protocol(ds, tag_ops->proto);
			
 
				++	if (err) {
			
 
				++		/* Revert the new tagger's connection to this tree */
			
 
				++		if (tag_ops->disconnect)
			
 
				++			tag_ops->disconnect(ds);
			
 
				++		return err;
			
 
				++	}
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++dsa_switch_disconnect_tag_proto(struct dsa_switch *ds,
			
 
				++				struct dsa_notifier_tag_proto_info *info)
			
 
				++{
			
 
				++	const struct dsa_device_ops *tag_ops = info->tag_ops;
			
 
				++
			
 
				++	/* Notify the tagger about the disconnection from this switch */
			
 
				++	if (tag_ops->disconnect && ds->tagger_data)
			
 
				++		tag_ops->disconnect(ds);
			
 
				++
			
 
				++	/* No need to notify the switch, since it shouldn't have any
			
 
				++	 * resources to tear down
			
 
				++	 */
			
 
				++	return 0;
			
 
				+ }
			
 
				+ 
			
 
				+ static int dsa_switch_mrp_add(struct dsa_switch *ds,
			
 
				+@@ -749,6 +792,9 @@ static int dsa_switch_event(struct notif
			
 
				+ 	case DSA_NOTIFIER_TAG_PROTO_CONNECT:
			
 
				+ 		err = dsa_switch_connect_tag_proto(ds, info);
			
 
				+ 		break;
			
 
				++	case DSA_NOTIFIER_TAG_PROTO_DISCONNECT:
			
 
				++		err = dsa_switch_disconnect_tag_proto(ds, info);
			
 
				++		break;
			
 
				+ 	case DSA_NOTIFIER_MRP_ADD:
			
 
				+ 		err = dsa_switch_mrp_add(ds, info);
			
 
				+ 		break;
			
--- a/target/linux/generic/backport-6.1/702-v5.19-00-net-ethernet-mtk_eth_soc-add-support-for-coherent-DM.patch
+++ b/target/linux/generic/backport-6.1/702-v5.19-00-net-ethernet-mtk_eth_soc-add-support-for-coherent-DM.patch
@@ -0,0 +1,327 @@
 
				+From: Felix Fietkau <[email protected]>
			
 
				+Date: Sat, 5 Feb 2022 17:59:07 +0100
			
 
				+Subject: [PATCH] net: ethernet: mtk_eth_soc: add support for coherent
			
 
				+ DMA
			
 
				+
			
 
				+It improves performance by eliminating the need for a cache flush on rx and tx
			
 
				+In preparation for supporting WED (Wireless Ethernet Dispatch), also add a
			
 
				+function for disabling coherent DMA at runtime.
			
 
				+
			
 
				+Signed-off-by: Felix Fietkau <[email protected]>
			
 
				+---
			
 
				+
			
 
				+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
			
 
				++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
			
 
				+@@ -9,6 +9,7 @@
			
 
				+ #include <linux/of_device.h>
			
 
				+ #include <linux/of_mdio.h>
			
 
				+ #include <linux/of_net.h>
			
 
				++#include <linux/of_address.h>
			
 
				+ #include <linux/mfd/syscon.h>
			
 
				+ #include <linux/regmap.h>
			
 
				+ #include <linux/clk.h>
			
 
				+@@ -840,7 +841,7 @@ static int mtk_init_fq_dma(struct mtk_et
			
 
				+ 	dma_addr_t dma_addr;
			
 
				+ 	int i;
			
 
				+ 
			
 
				+-	eth->scratch_ring = dma_alloc_coherent(eth->dev,
			
 
				++	eth->scratch_ring = dma_alloc_coherent(eth->dma_dev,
			
 
				+ 					       cnt * sizeof(struct mtk_tx_dma),
			
 
				+ 					       &eth->phy_scratch_ring,
			
 
				+ 					       GFP_ATOMIC);
			
 
				+@@ -852,10 +853,10 @@ static int mtk_init_fq_dma(struct mtk_et
			
 
				+ 	if (unlikely(!eth->scratch_head))
			
 
				+ 		return -ENOMEM;
			
 
				+ 
			
 
				+-	dma_addr = dma_map_single(eth->dev,
			
 
				++	dma_addr = dma_map_single(eth->dma_dev,
			
 
				+ 				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
			
 
				+ 				  DMA_FROM_DEVICE);
			
 
				+-	if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
			
 
				++	if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
			
 
				+ 		return -ENOMEM;
			
 
				+ 
			
 
				+ 	phy_ring_tail = eth->phy_scratch_ring +
			
 
				+@@ -909,26 +910,26 @@ static void mtk_tx_unmap(struct mtk_eth
			
 
				+ {
			
 
				+ 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
			
 
				+ 		if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
			
 
				+-			dma_unmap_single(eth->dev,
			
 
				++			dma_unmap_single(eth->dma_dev,
			
 
				+ 					 dma_unmap_addr(tx_buf, dma_addr0),
			
 
				+ 					 dma_unmap_len(tx_buf, dma_len0),
			
 
				+ 					 DMA_TO_DEVICE);
			
 
				+ 		} else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
			
 
				+-			dma_unmap_page(eth->dev,
			
 
				++			dma_unmap_page(eth->dma_dev,
			
 
				+ 				       dma_unmap_addr(tx_buf, dma_addr0),
			
 
				+ 				       dma_unmap_len(tx_buf, dma_len0),
			
 
				+ 				       DMA_TO_DEVICE);
			
 
				+ 		}
			
 
				+ 	} else {
			
 
				+ 		if (dma_unmap_len(tx_buf, dma_len0)) {
			
 
				+-			dma_unmap_page(eth->dev,
			
 
				++			dma_unmap_page(eth->dma_dev,
			
 
				+ 				       dma_unmap_addr(tx_buf, dma_addr0),
			
 
				+ 				       dma_unmap_len(tx_buf, dma_len0),
			
 
				+ 				       DMA_TO_DEVICE);
			
 
				+ 		}
			
 
				+ 
			
 
				+ 		if (dma_unmap_len(tx_buf, dma_len1)) {
			
 
				+-			dma_unmap_page(eth->dev,
			
 
				++			dma_unmap_page(eth->dma_dev,
			
 
				+ 				       dma_unmap_addr(tx_buf, dma_addr1),
			
 
				+ 				       dma_unmap_len(tx_buf, dma_len1),
			
 
				+ 				       DMA_TO_DEVICE);
			
 
				+@@ -1006,9 +1007,9 @@ static int mtk_tx_map(struct sk_buff *sk
			
 
				+ 	if (skb_vlan_tag_present(skb))
			
 
				+ 		txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb);
			
 
				+ 
			
 
				+-	mapped_addr = dma_map_single(eth->dev, skb->data,
			
 
				++	mapped_addr = dma_map_single(eth->dma_dev, skb->data,
			
 
				+ 				     skb_headlen(skb), DMA_TO_DEVICE);
			
 
				+-	if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
			
 
				++	if (unlikely(dma_mapping_error(eth->dma_dev, mapped_addr)))
			
 
				+ 		return -ENOMEM;
			
 
				+ 
			
 
				+ 	WRITE_ONCE(itxd->txd1, mapped_addr);
			
 
				+@@ -1047,10 +1048,10 @@ static int mtk_tx_map(struct sk_buff *sk
			
 
				+ 
			
 
				+ 
			
 
				+ 			frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
			
 
				+-			mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
			
 
				++			mapped_addr = skb_frag_dma_map(eth->dma_dev, frag, offset,
			
 
				+ 						       frag_map_size,
			
 
				+ 						       DMA_TO_DEVICE);
			
 
				+-			if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
			
 
				++			if (unlikely(dma_mapping_error(eth->dma_dev, mapped_addr)))
			
 
				+ 				goto err_dma;
			
 
				+ 
			
 
				+ 			if (i == nr_frags - 1 &&
			
 
				+@@ -1331,18 +1332,18 @@ static int mtk_poll_rx(struct napi_struc
			
 
				+ 			netdev->stats.rx_dropped++;
			
 
				+ 			goto release_desc;
			
 
				+ 		}
			
 
				+-		dma_addr = dma_map_single(eth->dev,
			
 
				++		dma_addr = dma_map_single(eth->dma_dev,
			
 
				+ 					  new_data + NET_SKB_PAD +
			
 
				+ 					  eth->ip_align,
			
 
				+ 					  ring->buf_size,
			
 
				+ 					  DMA_FROM_DEVICE);
			
 
				+-		if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
			
 
				++		if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr))) {
			
 
				+ 			skb_free_frag(new_data);
			
 
				+ 			netdev->stats.rx_dropped++;
			
 
				+ 			goto release_desc;
			
 
				+ 		}
			
 
				+ 
			
 
				+-		dma_unmap_single(eth->dev, trxd.rxd1,
			
 
				++		dma_unmap_single(eth->dma_dev, trxd.rxd1,
			
 
				+ 				 ring->buf_size, DMA_FROM_DEVICE);
			
 
				+ 
			
 
				+ 		/* receive data */
			
 
				+@@ -1615,7 +1616,7 @@ static int mtk_tx_alloc(struct mtk_eth *
			
 
				+ 	if (!ring->buf)
			
 
				+ 		goto no_tx_mem;
			
 
				+ 
			
 
				+-	ring->dma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
			
 
				++	ring->dma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
			
 
				+ 				       &ring->phys, GFP_ATOMIC);
			
 
				+ 	if (!ring->dma)
			
 
				+ 		goto no_tx_mem;
			
 
				+@@ -1633,7 +1634,7 @@ static int mtk_tx_alloc(struct mtk_eth *
			
 
				+ 	 * descriptors in ring->dma_pdma.
			
 
				+ 	 */
			
 
				+ 	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
			
 
				+-		ring->dma_pdma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
			
 
				++		ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
			
 
				+ 						    &ring->phys_pdma,
			
 
				+ 						    GFP_ATOMIC);
			
 
				+ 		if (!ring->dma_pdma)
			
 
				+@@ -1692,7 +1693,7 @@ static void mtk_tx_clean(struct mtk_eth
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	if (ring->dma) {
			
 
				+-		dma_free_coherent(eth->dev,
			
 
				++		dma_free_coherent(eth->dma_dev,
			
 
				+ 				  MTK_DMA_SIZE * sizeof(*ring->dma),
			
 
				+ 				  ring->dma,
			
 
				+ 				  ring->phys);
			
 
				+@@ -1700,7 +1701,7 @@ static void mtk_tx_clean(struct mtk_eth
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	if (ring->dma_pdma) {
			
 
				+-		dma_free_coherent(eth->dev,
			
 
				++		dma_free_coherent(eth->dma_dev,
			
 
				+ 				  MTK_DMA_SIZE * sizeof(*ring->dma_pdma),
			
 
				+ 				  ring->dma_pdma,
			
 
				+ 				  ring->phys_pdma);
			
 
				+@@ -1748,18 +1749,18 @@ static int mtk_rx_alloc(struct mtk_eth *
			
 
				+ 			return -ENOMEM;
			
 
				+ 	}
			
 
				+ 
			
 
				+-	ring->dma = dma_alloc_coherent(eth->dev,
			
 
				++	ring->dma = dma_alloc_coherent(eth->dma_dev,
			
 
				+ 				       rx_dma_size * sizeof(*ring->dma),
			
 
				+ 				       &ring->phys, GFP_ATOMIC);
			
 
				+ 	if (!ring->dma)
			
 
				+ 		return -ENOMEM;
			
 
				+ 
			
 
				+ 	for (i = 0; i < rx_dma_size; i++) {
			
 
				+-		dma_addr_t dma_addr = dma_map_single(eth->dev,
			
 
				++		dma_addr_t dma_addr = dma_map_single(eth->dma_dev,
			
 
				+ 				ring->data[i] + NET_SKB_PAD + eth->ip_align,
			
 
				+ 				ring->buf_size,
			
 
				+ 				DMA_FROM_DEVICE);
			
 
				+-		if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
			
 
				++		if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
			
 
				+ 			return -ENOMEM;
			
 
				+ 		ring->dma[i].rxd1 = (unsigned int)dma_addr;
			
 
				+ 
			
 
				+@@ -1795,7 +1796,7 @@ static void mtk_rx_clean(struct mtk_eth
			
 
				+ 				continue;
			
 
				+ 			if (!ring->dma[i].rxd1)
			
 
				+ 				continue;
			
 
				+-			dma_unmap_single(eth->dev,
			
 
				++			dma_unmap_single(eth->dma_dev,
			
 
				+ 					 ring->dma[i].rxd1,
			
 
				+ 					 ring->buf_size,
			
 
				+ 					 DMA_FROM_DEVICE);
			
 
				+@@ -1806,7 +1807,7 @@ static void mtk_rx_clean(struct mtk_eth
			
 
				+ 	}
			
 
				+ 
			
 
				+ 	if (ring->dma) {
			
 
				+-		dma_free_coherent(eth->dev,
			
 
				++		dma_free_coherent(eth->dma_dev,
			
 
				+ 				  ring->dma_size * sizeof(*ring->dma),
			
 
				+ 				  ring->dma,
			
 
				+ 				  ring->phys);
			
 
				+@@ -2162,7 +2163,7 @@ static void mtk_dma_free(struct mtk_eth
			
 
				+ 		if (eth->netdev[i])
			
 
				+ 			netdev_reset_queue(eth->netdev[i]);
			
 
				+ 	if (eth->scratch_ring) {
			
 
				+-		dma_free_coherent(eth->dev,
			
 
				++		dma_free_coherent(eth->dma_dev,
			
 
				+ 				  MTK_DMA_SIZE * sizeof(struct mtk_tx_dma),
			
 
				+ 				  eth->scratch_ring,
			
 
				+ 				  eth->phy_scratch_ring);
			
 
				+@@ -2514,6 +2515,8 @@ static void mtk_dim_tx(struct work_struc
			
 
				+ 
			
 
				+ static int mtk_hw_init(struct mtk_eth *eth)
			
 
				+ {
			
 
				++	u32 dma_mask = ETHSYS_DMA_AG_MAP_PDMA | ETHSYS_DMA_AG_MAP_QDMA |
			
 
				++		       ETHSYS_DMA_AG_MAP_PPE;
			
 
				+ 	int i, val, ret;
			
 
				+ 
			
 
				+ 	if (test_and_set_bit(MTK_HW_INIT, &eth->state))
			
 
				+@@ -2526,6 +2529,10 @@ static int mtk_hw_init(struct mtk_eth *e
			
 
				+ 	if (ret)
			
 
				+ 		goto err_disable_pm;
			
 
				+ 
			
 
				++	if (eth->ethsys)
			
 
				++		regmap_update_bits(eth->ethsys, ETHSYS_DMA_AG_MAP, dma_mask,
			
 
				++				   of_dma_is_coherent(eth->dma_dev->of_node) * dma_mask);
			
 
				++
			
 
				+ 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
			
 
				+ 		ret = device_reset(eth->dev);
			
 
				+ 		if (ret) {
			
 
				+@@ -3079,6 +3086,35 @@ free_netdev:
			
 
				+ 	return err;
			
 
				+ }
			
 
				+ 
			
 
				++void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev)
			
 
				++{
			
 
				++	struct net_device *dev, *tmp;
			
 
				++	LIST_HEAD(dev_list);
			
 
				++	int i;
			
 
				++
			
 
				++	rtnl_lock();
			
 
				++
			
 
				++	for (i = 0; i < MTK_MAC_COUNT; i++) {
			
 
				++		dev = eth->netdev[i];
			
 
				++
			
 
				++		if (!dev || !(dev->flags & IFF_UP))
			
 
				++			continue;
			
 
				++
			
 
				++		list_add_tail(&dev->close_list, &dev_list);
			
 
				++	}
			
 
				++
			
 
				++	dev_close_many(&dev_list, false);
			
 
				++
			
 
				++	eth->dma_dev = dma_dev;
			
 
				++
			
 
				++	list_for_each_entry_safe(dev, tmp, &dev_list, close_list) {
			
 
				++		list_del_init(&dev->close_list);
			
 
				++		dev_open(dev, NULL);
			
 
				++	}
			
 
				++
			
 
				++	rtnl_unlock();
			
 
				++}
			
 
				++
			
 
				+ static int mtk_probe(struct platform_device *pdev)
			
 
				+ {
			
 
				+ 	struct device_node *mac_np;
			
 
				+@@ -3092,6 +3128,7 @@ static int mtk_probe(struct platform_dev
			
 
				+ 	eth->soc = of_device_get_match_data(&pdev->dev);
			
 
				+ 
			
 
				+ 	eth->dev = &pdev->dev;
			
 
				++	eth->dma_dev = &pdev->dev;
			
 
				+ 	eth->base = devm_platform_ioremap_resource(pdev, 0);
			
 
				+ 	if (IS_ERR(eth->base))
			
 
				+ 		return PTR_ERR(eth->base);
			
 
				+@@ -3140,6 +3177,16 @@ static int mtk_probe(struct platform_dev
			
 
				+ 		}
			
 
				+ 	}
			
 
				+ 
			
 
				++	if (of_dma_is_coherent(pdev->dev.of_node)) {
			
 
				++		struct regmap *cci;
			
 
				++
			
 
				++		cci = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
			
 
				++						      "mediatek,cci-control");
			
 
				++		/* enable CPU/bus coherency */
			
 
				++		if (!IS_ERR(cci))
			
 
				++			regmap_write(cci, 0, 3);
			
 
				++	}
			
 
				++
			
 
				+ 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
			
 
				+ 		eth->sgmii = devm_kzalloc(eth->dev, sizeof(*eth->sgmii),
			
 
				+ 					  GFP_KERNEL);
			
 
				+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
			
 
				++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
			
 
				+@@ -463,6 +463,12 @@
			
 
				+ #define RSTCTRL_FE		BIT(6)
			
 
				+ #define RSTCTRL_PPE		BIT(31)
			
 
				+ 
			
 
				++/* ethernet dma channel agent map */
			
 
				++#define ETHSYS_DMA_AG_MAP	0x408
			
 
				++#define ETHSYS_DMA_AG_MAP_PDMA	BIT(0)
			
 
				++#define ETHSYS_DMA_AG_MAP_QDMA	BIT(1)
			
 
				++#define ETHSYS_DMA_AG_MAP_PPE	BIT(2)
			
 
				++
			
 
				+ /* SGMII subsystem config registers */
			
 
				+ /* Register to auto-negotiation restart */
			
 
				+ #define SGMSYS_PCS_CONTROL_1	0x0
			
 
				+@@ -880,6 +886,7 @@ struct mtk_sgmii {
			
 
				+ /* struct mtk_eth -	This is the main datasructure for holding the state
			
 
				+  *			of the driver
			
 
				+  * @dev:		The device pointer
			
 
				++ * @dev:		The device pointer used for dma mapping/alloc
			
 
				+  * @base:		The mapped register i/o base
			
 
				+  * @page_lock:		Make sure that register operations are atomic
			
 
				+  * @tx_irq__lock:	Make sure that IRQ register operations are atomic
			
 
				+@@ -923,6 +930,7 @@ struct mtk_sgmii {
			
 
				+ 
			
 
				+ struct mtk_eth {
			
 
				+ 	struct device			*dev;
			
 
				++	struct device			*dma_dev;
			
 
				+ 	void __iomem			*base;
			
 
				+ 	spinlock_t			page_lock;
			
 
				+ 	spinlock_t			tx_irq_lock;
			
 
				+@@ -1021,6 +1029,7 @@ int mtk_gmac_rgmii_path_setup(struct mtk
			
 
				+ int mtk_eth_offload_init(struct mtk_eth *eth);
			
 
				+ int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
			
 
				+ 		     void *type_data);
			
 
				++void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
			
 
				+ 
			
 
				+ 
			
 
				+ #endif /* MTK_ETH_H */
			
--- a/target/linux/generic/backport-6.1/702-v5.19-01-arm64-dts-mediatek-mt7622-add-support-for-coherent-D.patch
+++ b/target/linux/generic/backport-6.1/702-v5.19-01-arm64-dts-mediatek-mt7622-add-support-for-coherent-D.patch
@@ -0,0 +1,30 @@
 
				+From: Felix Fietkau <[email protected]>
			
 
				+Date: Mon, 7 Feb 2022 10:27:22 +0100
			
 
				+Subject: [PATCH] arm64: dts: mediatek: mt7622: add support for coherent
			
 
				+ DMA
			
 
				+
			
 
				+It improves performance by eliminating the need for a cache flush on rx and tx
			
 
				+
			
 
				+Signed-off-by: Felix Fietkau <[email protected]>
			
 
				+---
			
 
				+
			
 
				+--- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi
			
 
				++++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
			
 
				+@@ -357,7 +357,7 @@
			
 
				+ 		};
			
 
				+ 
			
 
				+ 		cci_control2: slave-if@5000 {
			
 
				+-			compatible = "arm,cci-400-ctrl-if";
			
 
				++			compatible = "arm,cci-400-ctrl-if", "syscon";
			
 
				+ 			interface-type = "ace";
			
 
				+ 			reg = <0x5000 0x1000>;
			
 
				+ 		};
			
 
				+@@ -938,6 +938,8 @@
			
 
				+ 		power-domains = <&scpsys MT7622_POWER_DOMAIN_ETHSYS>;
			
 
				+ 		mediatek,ethsys = <&ethsys>;
			
 
				+ 		mediatek,sgmiisys = <&sgmiisys>;
			
 
				++		mediatek,cci-control = <&cci_control2>;
			
 
				++		dma-coherent;
			
 
				+ 		#address-cells = <1>;
			
 
				+ 		#size-cells = <0>;
			
 
				+ 		status = "disabled";
			
--- a/target/linux/generic/backport-6.1/702-v5.19-02-net-ethernet-mtk_eth_soc-add-support-for-Wireless-Et.patch
+++ b/target/linux/generic/backport-6.1/702-v5.19-02-net-ethernet-mtk_eth_soc-add-support-for-Wireless-Et.patch
@@ -0,0 +1,1679 @@
 
				+From: Felix Fietkau <[email protected]>
			
 
				+Date: Sat, 5 Feb 2022 17:56:08 +0100
			
 
				+Subject: [PATCH] net: ethernet: mtk_eth_soc: add support for Wireless
			
 
				+ Ethernet Dispatch (WED)
			
 
				+
			
 
				+The Wireless Ethernet Dispatch subsystem on the MT7622 SoC can be
			
 
				+configured to intercept and handle access to the DMA queues and
			
 
				+PCIe interrupts for a MT7615/MT7915 wireless card.
			
 
				+It can manage the internal WDMA (Wireless DMA) controller, which allows
			
 
				+ethernet packets to be passed from the packet switch engine (PSE) to the
			
 
				+wireless card, bypassing the CPU entirely.
			
 
				+This can be used to implement hardware flow offloading from ethernet to
			
 
				+WLAN.
			
 
				+
			
 
				+Signed-off-by: Felix Fietkau <[email protected]>
			
 
				+---
			
 
				+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed.c
			
 
				+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed.h
			
 
				+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
			
 
				+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_ops.c
			
 
				+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_regs.h
			
 
				+ create mode 100644 include/linux/soc/mediatek/mtk_wed.h
			
 
				+
			
 
				+--- a/drivers/net/ethernet/mediatek/Kconfig
			
 
				++++ b/drivers/net/ethernet/mediatek/Kconfig
			
 
				+@@ -7,6 +7,10 @@ config NET_VENDOR_MEDIATEK
			
 
				+ 
			
 
				+ if NET_VENDOR_MEDIATEK
			
 
				+ 
			
 
				++config NET_MEDIATEK_SOC_WED
			
 
				++	depends on ARCH_MEDIATEK || COMPILE_TEST
			
 
				++	def_bool NET_MEDIATEK_SOC != n
			
 
				++
			
 
				+ config NET_MEDIATEK_SOC
			
 
				+ 	tristate "MediaTek SoC Gigabit Ethernet support"
			
 
				+ 	depends on NET_DSA || !NET_DSA
			
 
				+--- a/drivers/net/ethernet/mediatek/Makefile
			
 
				++++ b/drivers/net/ethernet/mediatek/Makefile
			
 
				+@@ -5,4 +5,9 @@
			
 
				+ 
			
 
				+ obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
			
 
				+ mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
			
 
				++mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed.o
			
 
				++ifdef CONFIG_DEBUG_FS
			
 
				++mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_debugfs.o
			
 
				++endif
			
 
				++obj-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_ops.o
			
 
				+ obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
			
 
				+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
			
 
				++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
			
 
				+@@ -24,6 +24,7 @@
			
 
				+ #include <net/dsa.h>
			
 
				+ 
			
 
				+ #include "mtk_eth_soc.h"
			
 
				++#include "mtk_wed.h"
			
 
				+ 
			
 
				+ static int mtk_msg_level = -1;
			
 
				+ module_param_named(msg_level, mtk_msg_level, int, 0);
			
 
				+@@ -3209,6 +3210,22 @@ static int mtk_probe(struct platform_dev
			
 
				+ 		}
			
 
				+ 	}
			
 
				+ 
			
 
				++	for (i = 0;; i++) {
			
 
				++		struct device_node *np = of_parse_phandle(pdev->dev.of_node,
			
 
				++							  "mediatek,wed", i);
			
 
				++		static const u32 wdma_regs[] = {
			
 
				++			MTK_WDMA0_BASE,
			
 
				++			MTK_WDMA1_BASE
			
 
				++		};
			
 
				++		void __iomem *wdma;
			
 
				++
			
 
				++		if (!np || i >= ARRAY_SIZE(wdma_regs))
			
 
				++			break;
			
 
				++
			
 
				++		wdma = eth->base + wdma_regs[i];
			
 
				++		mtk_wed_add_hw(np, eth, wdma, i);
			
 
				++	}
			
 
				++
			
 
				+ 	for (i = 0; i < 3; i++) {
			
 
				+ 		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT) && i > 0)
			
 
				+ 			eth->irq[i] = eth->irq[0];
			
 
				+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
			
 
				++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
			
 
				+@@ -295,6 +295,9 @@
			
 
				+ #define MTK_GDM1_TX_GPCNT	0x2438
			
 
				+ #define MTK_STAT_OFFSET		0x40
			
 
				+ 
			
 
				++#define MTK_WDMA0_BASE		0x2800
			
 
				++#define MTK_WDMA1_BASE		0x2c00
			
 
				++
			
 
				+ /* QDMA descriptor txd4 */
			
 
				+ #define TX_DMA_CHKSUM		(0x7 << 29)
			
 
				+ #define TX_DMA_TSO		BIT(28)
			
 
				+--- /dev/null
			
 
				++++ b/drivers/net/ethernet/mediatek/mtk_wed.c
			
 
				+@@ -0,0 +1,875 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-only
			
 
				++/* Copyright (C) 2021 Felix Fietkau <[email protected]> */
			
 
				++
			
 
				++#include <linux/kernel.h>
			
 
				++#include <linux/slab.h>
			
 
				++#include <linux/module.h>
			
 
				++#include <linux/bitfield.h>
			
 
				++#include <linux/dma-mapping.h>
			
 
				++#include <linux/skbuff.h>
			
 
				++#include <linux/of_platform.h>
			
 
				++#include <linux/of_address.h>
			
 
				++#include <linux/mfd/syscon.h>
			
 
				++#include <linux/debugfs.h>
			
 
				++#include <linux/soc/mediatek/mtk_wed.h>
			
 
				++#include "mtk_eth_soc.h"
			
 
				++#include "mtk_wed_regs.h"
			
 
				++#include "mtk_wed.h"
			
 
				++#include "mtk_ppe.h"
			
 
				++
			
 
				++#define MTK_PCIE_BASE(n)		(0x1a143000 + (n) * 0x2000)
			
 
				++
			
 
				++#define MTK_WED_PKT_SIZE		1900
			
 
				++#define MTK_WED_BUF_SIZE		2048
			
 
				++#define MTK_WED_BUF_PER_PAGE		(PAGE_SIZE / 2048)
			
 
				++
			
 
				++#define MTK_WED_TX_RING_SIZE		2048
			
 
				++#define MTK_WED_WDMA_RING_SIZE		1024
			
 
				++
			
 
				++static struct mtk_wed_hw *hw_list[2];
			
 
				++static DEFINE_MUTEX(hw_lock);
			
 
				++
			
 
				++static void
			
 
				++wed_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val)
			
 
				++{
			
 
				++	regmap_update_bits(dev->hw->regs, reg, mask | val, val);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++wed_set(struct mtk_wed_device *dev, u32 reg, u32 mask)
			
 
				++{
			
 
				++	return wed_m32(dev, reg, 0, mask);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++wed_clr(struct mtk_wed_device *dev, u32 reg, u32 mask)
			
 
				++{
			
 
				++	return wed_m32(dev, reg, mask, 0);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++wdma_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val)
			
 
				++{
			
 
				++	wdma_w32(dev, reg, (wdma_r32(dev, reg) & ~mask) | val);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++wdma_set(struct mtk_wed_device *dev, u32 reg, u32 mask)
			
 
				++{
			
 
				++	wdma_m32(dev, reg, 0, mask);
			
 
				++}
			
 
				++
			
 
				++static u32
			
 
				++mtk_wed_read_reset(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	return wed_r32(dev, MTK_WED_RESET);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_reset(struct mtk_wed_device *dev, u32 mask)
			
 
				++{
			
 
				++	u32 status;
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_RESET, mask);
			
 
				++	if (readx_poll_timeout(mtk_wed_read_reset, dev, status,
			
 
				++			       !(status & mask), 0, 1000))
			
 
				++		WARN_ON_ONCE(1);
			
 
				++}
			
 
				++
			
 
				++static struct mtk_wed_hw *
			
 
				++mtk_wed_assign(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	struct mtk_wed_hw *hw;
			
 
				++
			
 
				++	hw = hw_list[pci_domain_nr(dev->wlan.pci_dev->bus)];
			
 
				++	if (!hw || hw->wed_dev)
			
 
				++		return NULL;
			
 
				++
			
 
				++	hw->wed_dev = dev;
			
 
				++	return hw;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mtk_wed_buffer_alloc(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	struct mtk_wdma_desc *desc;
			
 
				++	dma_addr_t desc_phys;
			
 
				++	void **page_list;
			
 
				++	int token = dev->wlan.token_start;
			
 
				++	int ring_size;
			
 
				++	int n_pages;
			
 
				++	int i, page_idx;
			
 
				++
			
 
				++	ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
			
 
				++	n_pages = ring_size / MTK_WED_BUF_PER_PAGE;
			
 
				++
			
 
				++	page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL);
			
 
				++	if (!page_list)
			
 
				++		return -ENOMEM;
			
 
				++
			
 
				++	dev->buf_ring.size = ring_size;
			
 
				++	dev->buf_ring.pages = page_list;
			
 
				++
			
 
				++	desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc),
			
 
				++				  &desc_phys, GFP_KERNEL);
			
 
				++	if (!desc)
			
 
				++		return -ENOMEM;
			
 
				++
			
 
				++	dev->buf_ring.desc = desc;
			
 
				++	dev->buf_ring.desc_phys = desc_phys;
			
 
				++
			
 
				++	for (i = 0, page_idx = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) {
			
 
				++		dma_addr_t page_phys, buf_phys;
			
 
				++		struct page *page;
			
 
				++		void *buf;
			
 
				++		int s;
			
 
				++
			
 
				++		page = __dev_alloc_pages(GFP_KERNEL, 0);
			
 
				++		if (!page)
			
 
				++			return -ENOMEM;
			
 
				++
			
 
				++		page_phys = dma_map_page(dev->hw->dev, page, 0, PAGE_SIZE,
			
 
				++					 DMA_BIDIRECTIONAL);
			
 
				++		if (dma_mapping_error(dev->hw->dev, page_phys)) {
			
 
				++			__free_page(page);
			
 
				++			return -ENOMEM;
			
 
				++		}
			
 
				++
			
 
				++		page_list[page_idx++] = page;
			
 
				++		dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE,
			
 
				++					DMA_BIDIRECTIONAL);
			
 
				++
			
 
				++		buf = page_to_virt(page);
			
 
				++		buf_phys = page_phys;
			
 
				++
			
 
				++		for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) {
			
 
				++			u32 txd_size;
			
 
				++
			
 
				++			txd_size = dev->wlan.init_buf(buf, buf_phys, token++);
			
 
				++
			
 
				++			desc->buf0 = buf_phys;
			
 
				++			desc->buf1 = buf_phys + txd_size;
			
 
				++			desc->ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0,
			
 
				++						txd_size) |
			
 
				++				     FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1,
			
 
				++						MTK_WED_BUF_SIZE - txd_size) |
			
 
				++				     MTK_WDMA_DESC_CTRL_LAST_SEG1;
			
 
				++			desc->info = 0;
			
 
				++			desc++;
			
 
				++
			
 
				++			buf += MTK_WED_BUF_SIZE;
			
 
				++			buf_phys += MTK_WED_BUF_SIZE;
			
 
				++		}
			
 
				++
			
 
				++		dma_sync_single_for_device(dev->hw->dev, page_phys, PAGE_SIZE,
			
 
				++					   DMA_BIDIRECTIONAL);
			
 
				++	}
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_free_buffer(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	struct mtk_wdma_desc *desc = dev->buf_ring.desc;
			
 
				++	void **page_list = dev->buf_ring.pages;
			
 
				++	int page_idx;
			
 
				++	int i;
			
 
				++
			
 
				++	if (!page_list)
			
 
				++		return;
			
 
				++
			
 
				++	if (!desc)
			
 
				++		goto free_pagelist;
			
 
				++
			
 
				++	for (i = 0, page_idx = 0; i < dev->buf_ring.size; i += MTK_WED_BUF_PER_PAGE) {
			
 
				++		void *page = page_list[page_idx++];
			
 
				++
			
 
				++		if (!page)
			
 
				++			break;
			
 
				++
			
 
				++		dma_unmap_page(dev->hw->dev, desc[i].buf0,
			
 
				++			       PAGE_SIZE, DMA_BIDIRECTIONAL);
			
 
				++		__free_page(page);
			
 
				++	}
			
 
				++
			
 
				++	dma_free_coherent(dev->hw->dev, dev->buf_ring.size * sizeof(*desc),
			
 
				++			  desc, dev->buf_ring.desc_phys);
			
 
				++
			
 
				++free_pagelist:
			
 
				++	kfree(page_list);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_free_ring(struct mtk_wed_device *dev, struct mtk_wed_ring *ring)
			
 
				++{
			
 
				++	if (!ring->desc)
			
 
				++		return;
			
 
				++
			
 
				++	dma_free_coherent(dev->hw->dev, ring->size * sizeof(*ring->desc),
			
 
				++			  ring->desc, ring->desc_phys);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_free_tx_rings(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	int i;
			
 
				++
			
 
				++	for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++)
			
 
				++		mtk_wed_free_ring(dev, &dev->tx_ring[i]);
			
 
				++	for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++)
			
 
				++		mtk_wed_free_ring(dev, &dev->tx_wdma[i]);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en)
			
 
				++{
			
 
				++	u32 mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK;
			
 
				++
			
 
				++	if (!dev->hw->num_flows)
			
 
				++		mask &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_EXT_INT_MASK, en ? mask : 0);
			
 
				++	wed_r32(dev, MTK_WED_EXT_INT_MASK);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_stop(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	regmap_write(dev->hw->mirror, dev->hw->index * 4, 0);
			
 
				++	mtk_wed_set_ext_int(dev, false);
			
 
				++
			
 
				++	wed_clr(dev, MTK_WED_CTRL,
			
 
				++		MTK_WED_CTRL_WDMA_INT_AGENT_EN |
			
 
				++		MTK_WED_CTRL_WPDMA_INT_AGENT_EN |
			
 
				++		MTK_WED_CTRL_WED_TX_BM_EN |
			
 
				++		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
			
 
				++	wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
			
 
				++	wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
			
 
				++	wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
			
 
				++	wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
			
 
				++	wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
			
 
				++
			
 
				++	wed_clr(dev, MTK_WED_GLO_CFG,
			
 
				++		MTK_WED_GLO_CFG_TX_DMA_EN |
			
 
				++		MTK_WED_GLO_CFG_RX_DMA_EN);
			
 
				++	wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
			
 
				++		MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
			
 
				++		MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
			
 
				++	wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
			
 
				++		MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_detach(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	struct device_node *wlan_node = dev->wlan.pci_dev->dev.of_node;
			
 
				++	struct mtk_wed_hw *hw = dev->hw;
			
 
				++
			
 
				++	mutex_lock(&hw_lock);
			
 
				++
			
 
				++	mtk_wed_stop(dev);
			
 
				++
			
 
				++	wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX);
			
 
				++	wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
			
 
				++
			
 
				++	mtk_wed_reset(dev, MTK_WED_RESET_WED);
			
 
				++
			
 
				++	mtk_wed_free_buffer(dev);
			
 
				++	mtk_wed_free_tx_rings(dev);
			
 
				++
			
 
				++	if (of_dma_is_coherent(wlan_node))
			
 
				++		regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP,
			
 
				++				   BIT(hw->index), BIT(hw->index));
			
 
				++
			
 
				++	if (!hw_list[!hw->index]->wed_dev &&
			
 
				++	    hw->eth->dma_dev != hw->eth->dev)
			
 
				++		mtk_eth_set_dma_device(hw->eth, hw->eth->dev);
			
 
				++
			
 
				++	memset(dev, 0, sizeof(*dev));
			
 
				++	module_put(THIS_MODULE);
			
 
				++
			
 
				++	hw->wed_dev = NULL;
			
 
				++	mutex_unlock(&hw_lock);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_hw_init_early(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	u32 mask, set;
			
 
				++	u32 offset;
			
 
				++
			
 
				++	mtk_wed_stop(dev);
			
 
				++	mtk_wed_reset(dev, MTK_WED_RESET_WED);
			
 
				++
			
 
				++	mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE |
			
 
				++	       MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE |
			
 
				++	       MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE;
			
 
				++	set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2) |
			
 
				++	      MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP |
			
 
				++	      MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY;
			
 
				++	wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set);
			
 
				++
			
 
				++	wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_INFO_PRERES);
			
 
				++
			
 
				++	offset = dev->hw->index ? 0x04000400 : 0;
			
 
				++	wed_w32(dev, MTK_WED_WDMA_OFFSET0, 0x2a042a20 + offset);
			
 
				++	wed_w32(dev, MTK_WED_WDMA_OFFSET1, 0x29002800 + offset);
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_PCIE_CFG_BASE, MTK_PCIE_BASE(dev->hw->index));
			
 
				++	wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_hw_init(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	if (dev->init_done)
			
 
				++		return;
			
 
				++
			
 
				++	dev->init_done = true;
			
 
				++	mtk_wed_set_ext_int(dev, false);
			
 
				++	wed_w32(dev, MTK_WED_TX_BM_CTRL,
			
 
				++		MTK_WED_TX_BM_CTRL_PAUSE |
			
 
				++		FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
			
 
				++			   dev->buf_ring.size / 128) |
			
 
				++		FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
			
 
				++			   MTK_WED_TX_RING_SIZE / 256));
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_TX_BM_BASE, dev->buf_ring.desc_phys);
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_TX_BM_TKID,
			
 
				++		FIELD_PREP(MTK_WED_TX_BM_TKID_START,
			
 
				++			   dev->wlan.token_start) |
			
 
				++		FIELD_PREP(MTK_WED_TX_BM_TKID_END,
			
 
				++			   dev->wlan.token_start + dev->wlan.nbuf - 1));
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE);
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_TX_BM_DYN_THR,
			
 
				++		FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) |
			
 
				++		MTK_WED_TX_BM_DYN_THR_HI);
			
 
				++
			
 
				++	mtk_wed_reset(dev, MTK_WED_RESET_TX_BM);
			
 
				++
			
 
				++	wed_set(dev, MTK_WED_CTRL,
			
 
				++		MTK_WED_CTRL_WED_TX_BM_EN |
			
 
				++		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
			
 
				++
			
 
				++	wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_ring_reset(struct mtk_wdma_desc *desc, int size)
			
 
				++{
			
 
				++	int i;
			
 
				++
			
 
				++	for (i = 0; i < size; i++) {
			
 
				++		desc[i].buf0 = 0;
			
 
				++		desc[i].ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
			
 
				++		desc[i].buf1 = 0;
			
 
				++		desc[i].info = 0;
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++static u32
			
 
				++mtk_wed_check_busy(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	if (wed_r32(dev, MTK_WED_GLO_CFG) & MTK_WED_GLO_CFG_TX_DMA_BUSY)
			
 
				++		return true;
			
 
				++
			
 
				++	if (wed_r32(dev, MTK_WED_WPDMA_GLO_CFG) &
			
 
				++	    MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY)
			
 
				++		return true;
			
 
				++
			
 
				++	if (wed_r32(dev, MTK_WED_CTRL) & MTK_WED_CTRL_WDMA_INT_AGENT_BUSY)
			
 
				++		return true;
			
 
				++
			
 
				++	if (wed_r32(dev, MTK_WED_WDMA_GLO_CFG) &
			
 
				++	    MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY)
			
 
				++		return true;
			
 
				++
			
 
				++	if (wdma_r32(dev, MTK_WDMA_GLO_CFG) &
			
 
				++	    MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY)
			
 
				++		return true;
			
 
				++
			
 
				++	if (wed_r32(dev, MTK_WED_CTRL) &
			
 
				++	    (MTK_WED_CTRL_WED_TX_BM_BUSY | MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY))
			
 
				++		return true;
			
 
				++
			
 
				++	return false;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mtk_wed_poll_busy(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	int sleep = 15000;
			
 
				++	int timeout = 100 * sleep;
			
 
				++	u32 val;
			
 
				++
			
 
				++	return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep,
			
 
				++				 timeout, false, dev);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_reset_dma(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	bool busy = false;
			
 
				++	u32 val;
			
 
				++	int i;
			
 
				++
			
 
				++	for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++) {
			
 
				++		struct mtk_wdma_desc *desc = dev->tx_ring[i].desc;
			
 
				++
			
 
				++		if (!desc)
			
 
				++			continue;
			
 
				++
			
 
				++		mtk_wed_ring_reset(desc, MTK_WED_TX_RING_SIZE);
			
 
				++	}
			
 
				++
			
 
				++	if (mtk_wed_poll_busy(dev))
			
 
				++		busy = mtk_wed_check_busy(dev);
			
 
				++
			
 
				++	if (busy) {
			
 
				++		mtk_wed_reset(dev, MTK_WED_RESET_WED_TX_DMA);
			
 
				++	} else {
			
 
				++		wed_w32(dev, MTK_WED_RESET_IDX,
			
 
				++			MTK_WED_RESET_IDX_TX |
			
 
				++			MTK_WED_RESET_IDX_RX);
			
 
				++		wed_w32(dev, MTK_WED_RESET_IDX, 0);
			
 
				++	}
			
 
				++
			
 
				++	wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX);
			
 
				++	wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
			
 
				++
			
 
				++	if (busy) {
			
 
				++		mtk_wed_reset(dev, MTK_WED_RESET_WDMA_INT_AGENT);
			
 
				++		mtk_wed_reset(dev, MTK_WED_RESET_WDMA_RX_DRV);
			
 
				++	} else {
			
 
				++		wed_w32(dev, MTK_WED_WDMA_RESET_IDX,
			
 
				++			MTK_WED_WDMA_RESET_IDX_RX | MTK_WED_WDMA_RESET_IDX_DRV);
			
 
				++		wed_w32(dev, MTK_WED_WDMA_RESET_IDX, 0);
			
 
				++
			
 
				++		wed_set(dev, MTK_WED_WDMA_GLO_CFG,
			
 
				++			MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE);
			
 
				++
			
 
				++		wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
			
 
				++			MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE);
			
 
				++	}
			
 
				++
			
 
				++	for (i = 0; i < 100; i++) {
			
 
				++		val = wed_r32(dev, MTK_WED_TX_BM_INTF);
			
 
				++		if (FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, val) == 0x40)
			
 
				++			break;
			
 
				++	}
			
 
				++
			
 
				++	mtk_wed_reset(dev, MTK_WED_RESET_TX_FREE_AGENT);
			
 
				++	mtk_wed_reset(dev, MTK_WED_RESET_TX_BM);
			
 
				++
			
 
				++	if (busy) {
			
 
				++		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT);
			
 
				++		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_TX_DRV);
			
 
				++		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_DRV);
			
 
				++	} else {
			
 
				++		wed_w32(dev, MTK_WED_WPDMA_RESET_IDX,
			
 
				++			MTK_WED_WPDMA_RESET_IDX_TX |
			
 
				++			MTK_WED_WPDMA_RESET_IDX_RX);
			
 
				++		wed_w32(dev, MTK_WED_WPDMA_RESET_IDX, 0);
			
 
				++	}
			
 
				++
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mtk_wed_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring,
			
 
				++		   int size)
			
 
				++{
			
 
				++	ring->desc = dma_alloc_coherent(dev->hw->dev,
			
 
				++					size * sizeof(*ring->desc),
			
 
				++					&ring->desc_phys, GFP_KERNEL);
			
 
				++	if (!ring->desc)
			
 
				++		return -ENOMEM;
			
 
				++
			
 
				++	ring->size = size;
			
 
				++	mtk_wed_ring_reset(ring->desc, size);
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mtk_wed_wdma_ring_setup(struct mtk_wed_device *dev, int idx, int size)
			
 
				++{
			
 
				++	struct mtk_wed_ring *wdma = &dev->tx_wdma[idx];
			
 
				++
			
 
				++	if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE))
			
 
				++		return -ENOMEM;
			
 
				++
			
 
				++	wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
			
 
				++		 wdma->desc_phys);
			
 
				++	wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT,
			
 
				++		 size);
			
 
				++	wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0);
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
			
 
				++		wdma->desc_phys);
			
 
				++	wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT,
			
 
				++		size);
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
			
 
				++{
			
 
				++	u32 wdma_mask;
			
 
				++	u32 val;
			
 
				++	int i;
			
 
				++
			
 
				++	for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++)
			
 
				++		if (!dev->tx_wdma[i].desc)
			
 
				++			mtk_wed_wdma_ring_setup(dev, i, 16);
			
 
				++
			
 
				++	wdma_mask = FIELD_PREP(MTK_WDMA_INT_MASK_RX_DONE, GENMASK(1, 0));
			
 
				++
			
 
				++	mtk_wed_hw_init(dev);
			
 
				++
			
 
				++	wed_set(dev, MTK_WED_CTRL,
			
 
				++		MTK_WED_CTRL_WDMA_INT_AGENT_EN |
			
 
				++		MTK_WED_CTRL_WPDMA_INT_AGENT_EN |
			
 
				++		MTK_WED_CTRL_WED_TX_BM_EN |
			
 
				++		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, MTK_WED_PCIE_INT_TRIGGER_STATUS);
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER,
			
 
				++		MTK_WED_WPDMA_INT_TRIGGER_RX_DONE |
			
 
				++		MTK_WED_WPDMA_INT_TRIGGER_TX_DONE);
			
 
				++
			
 
				++	wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
			
 
				++		MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, wdma_mask);
			
 
				++	wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask);
			
 
				++
			
 
				++	wdma_w32(dev, MTK_WDMA_INT_MASK, wdma_mask);
			
 
				++	wdma_w32(dev, MTK_WDMA_INT_GRP2, wdma_mask);
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask);
			
 
				++	wed_w32(dev, MTK_WED_INT_MASK, irq_mask);
			
 
				++
			
 
				++	wed_set(dev, MTK_WED_GLO_CFG,
			
 
				++		MTK_WED_GLO_CFG_TX_DMA_EN |
			
 
				++		MTK_WED_GLO_CFG_RX_DMA_EN);
			
 
				++	wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
			
 
				++		MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
			
 
				++		MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
			
 
				++	wed_set(dev, MTK_WED_WDMA_GLO_CFG,
			
 
				++		MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
			
 
				++
			
 
				++	mtk_wed_set_ext_int(dev, true);
			
 
				++	val = dev->wlan.wpdma_phys |
			
 
				++	      MTK_PCIE_MIRROR_MAP_EN |
			
 
				++	      FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID, dev->hw->index);
			
 
				++
			
 
				++	if (dev->hw->index)
			
 
				++		val |= BIT(1);
			
 
				++	val |= BIT(0);
			
 
				++	regmap_write(dev->hw->mirror, dev->hw->index * 4, val);
			
 
				++
			
 
				++	dev->running = true;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mtk_wed_attach(struct mtk_wed_device *dev)
			
 
				++	__releases(RCU)
			
 
				++{
			
 
				++	struct mtk_wed_hw *hw;
			
 
				++	int ret = 0;
			
 
				++
			
 
				++	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
			
 
				++			 "mtk_wed_attach without holding the RCU read lock");
			
 
				++
			
 
				++	if (pci_domain_nr(dev->wlan.pci_dev->bus) > 1 ||
			
 
				++	    !try_module_get(THIS_MODULE))
			
 
				++		ret = -ENODEV;
			
 
				++
			
 
				++	rcu_read_unlock();
			
 
				++
			
 
				++	if (ret)
			
 
				++		return ret;
			
 
				++
			
 
				++	mutex_lock(&hw_lock);
			
 
				++
			
 
				++	hw = mtk_wed_assign(dev);
			
 
				++	if (!hw) {
			
 
				++		module_put(THIS_MODULE);
			
 
				++		ret = -ENODEV;
			
 
				++		goto out;
			
 
				++	}
			
 
				++
			
 
				++	dev_info(&dev->wlan.pci_dev->dev, "attaching wed device %d\n", hw->index);
			
 
				++
			
 
				++	dev->hw = hw;
			
 
				++	dev->dev = hw->dev;
			
 
				++	dev->irq = hw->irq;
			
 
				++	dev->wdma_idx = hw->index;
			
 
				++
			
 
				++	if (hw->eth->dma_dev == hw->eth->dev &&
			
 
				++	    of_dma_is_coherent(hw->eth->dev->of_node))
			
 
				++		mtk_eth_set_dma_device(hw->eth, hw->dev);
			
 
				++
			
 
				++	ret = mtk_wed_buffer_alloc(dev);
			
 
				++	if (ret) {
			
 
				++		mtk_wed_detach(dev);
			
 
				++		goto out;
			
 
				++	}
			
 
				++
			
 
				++	mtk_wed_hw_init_early(dev);
			
 
				++	regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, BIT(hw->index), 0);
			
 
				++
			
 
				++out:
			
 
				++	mutex_unlock(&hw_lock);
			
 
				++
			
 
				++	return ret;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
			
 
				++{
			
 
				++	struct mtk_wed_ring *ring = &dev->tx_ring[idx];
			
 
				++
			
 
				++	/*
			
 
				++	 * Tx ring redirection:
			
 
				++	 * Instead of configuring the WLAN PDMA TX ring directly, the WLAN
			
 
				++	 * driver allocated DMA ring gets configured into WED MTK_WED_RING_TX(n)
			
 
				++	 * registers.
			
 
				++	 *
			
 
				++	 * WED driver posts its own DMA ring as WLAN PDMA TX and configures it
			
 
				++	 * into MTK_WED_WPDMA_RING_TX(n) registers.
			
 
				++	 * It gets filled with packets picked up from WED TX ring and from
			
 
				++	 * WDMA RX.
			
 
				++	 */
			
 
				++
			
 
				++	BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
			
 
				++
			
 
				++	if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE))
			
 
				++		return -ENOMEM;
			
 
				++
			
 
				++	if (mtk_wed_wdma_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE))
			
 
				++		return -ENOMEM;
			
 
				++
			
 
				++	ring->reg_base = MTK_WED_RING_TX(idx);
			
 
				++	ring->wpdma = regs;
			
 
				++
			
 
				++	/* WED -> WPDMA */
			
 
				++	wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys);
			
 
				++	wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_TX_RING_SIZE);
			
 
				++	wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_CPU_IDX, 0);
			
 
				++
			
 
				++	wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE,
			
 
				++		ring->desc_phys);
			
 
				++	wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT,
			
 
				++		MTK_WED_TX_RING_SIZE);
			
 
				++	wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0);
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
			
 
				++{
			
 
				++	struct mtk_wed_ring *ring = &dev->txfree_ring;
			
 
				++	int i;
			
 
				++
			
 
				++	/*
			
 
				++	 * For txfree event handling, the same DMA ring is shared between WED
			
 
				++	 * and WLAN. The WLAN driver accesses the ring index registers through
			
 
				++	 * WED
			
 
				++	 */
			
 
				++	ring->reg_base = MTK_WED_RING_RX(1);
			
 
				++	ring->wpdma = regs;
			
 
				++
			
 
				++	for (i = 0; i < 12; i += 4) {
			
 
				++		u32 val = readl(regs + i);
			
 
				++
			
 
				++		wed_w32(dev, MTK_WED_RING_RX(1) + i, val);
			
 
				++		wed_w32(dev, MTK_WED_WPDMA_RING_RX(1) + i, val);
			
 
				++	}
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static u32
			
 
				++mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
			
 
				++{
			
 
				++	u32 val;
			
 
				++
			
 
				++	val = wed_r32(dev, MTK_WED_EXT_INT_STATUS);
			
 
				++	wed_w32(dev, MTK_WED_EXT_INT_STATUS, val);
			
 
				++	val &= MTK_WED_EXT_INT_STATUS_ERROR_MASK;
			
 
				++	if (!dev->hw->num_flows)
			
 
				++		val &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
			
 
				++	if (val && net_ratelimit())
			
 
				++		pr_err("mtk_wed%d: error status=%08x\n", dev->hw->index, val);
			
 
				++
			
 
				++	val = wed_r32(dev, MTK_WED_INT_STATUS);
			
 
				++	val &= mask;
			
 
				++	wed_w32(dev, MTK_WED_INT_STATUS, val); /* ACK */
			
 
				++
			
 
				++	return val;
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
			
 
				++{
			
 
				++	if (!dev->running)
			
 
				++		return;
			
 
				++
			
 
				++	mtk_wed_set_ext_int(dev, !!mask);
			
 
				++	wed_w32(dev, MTK_WED_INT_MASK, mask);
			
 
				++}
			
 
				++
			
 
				++int mtk_wed_flow_add(int index)
			
 
				++{
			
 
				++	struct mtk_wed_hw *hw = hw_list[index];
			
 
				++	int ret;
			
 
				++
			
 
				++	if (!hw || !hw->wed_dev)
			
 
				++		return -ENODEV;
			
 
				++
			
 
				++	if (hw->num_flows) {
			
 
				++		hw->num_flows++;
			
 
				++		return 0;
			
 
				++	}
			
 
				++
			
 
				++	mutex_lock(&hw_lock);
			
 
				++	if (!hw->wed_dev) {
			
 
				++		ret = -ENODEV;
			
 
				++		goto out;
			
 
				++	}
			
 
				++
			
 
				++	ret = hw->wed_dev->wlan.offload_enable(hw->wed_dev);
			
 
				++	if (!ret)
			
 
				++		hw->num_flows++;
			
 
				++	mtk_wed_set_ext_int(hw->wed_dev, true);
			
 
				++
			
 
				++out:
			
 
				++	mutex_unlock(&hw_lock);
			
 
				++
			
 
				++	return ret;
			
 
				++}
			
 
				++
			
 
				++void mtk_wed_flow_remove(int index)
			
 
				++{
			
 
				++	struct mtk_wed_hw *hw = hw_list[index];
			
 
				++
			
 
				++	if (!hw)
			
 
				++		return;
			
 
				++
			
 
				++	if (--hw->num_flows)
			
 
				++		return;
			
 
				++
			
 
				++	mutex_lock(&hw_lock);
			
 
				++	if (!hw->wed_dev)
			
 
				++		goto out;
			
 
				++
			
 
				++	hw->wed_dev->wlan.offload_disable(hw->wed_dev);
			
 
				++	mtk_wed_set_ext_int(hw->wed_dev, true);
			
 
				++
			
 
				++out:
			
 
				++	mutex_unlock(&hw_lock);
			
 
				++}
			
 
				++
			
 
				++void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
			
 
				++		    void __iomem *wdma, int index)
			
 
				++{
			
 
				++	static const struct mtk_wed_ops wed_ops = {
			
 
				++		.attach = mtk_wed_attach,
			
 
				++		.tx_ring_setup = mtk_wed_tx_ring_setup,
			
 
				++		.txfree_ring_setup = mtk_wed_txfree_ring_setup,
			
 
				++		.start = mtk_wed_start,
			
 
				++		.stop = mtk_wed_stop,
			
 
				++		.reset_dma = mtk_wed_reset_dma,
			
 
				++		.reg_read = wed_r32,
			
 
				++		.reg_write = wed_w32,
			
 
				++		.irq_get = mtk_wed_irq_get,
			
 
				++		.irq_set_mask = mtk_wed_irq_set_mask,
			
 
				++		.detach = mtk_wed_detach,
			
 
				++	};
			
 
				++	struct device_node *eth_np = eth->dev->of_node;
			
 
				++	struct platform_device *pdev;
			
 
				++	struct mtk_wed_hw *hw;
			
 
				++	struct regmap *regs;
			
 
				++	int irq;
			
 
				++
			
 
				++	if (!np)
			
 
				++		return;
			
 
				++
			
 
				++	pdev = of_find_device_by_node(np);
			
 
				++	if (!pdev)
			
 
				++		return;
			
 
				++
			
 
				++	get_device(&pdev->dev);
			
 
				++	irq = platform_get_irq(pdev, 0);
			
 
				++	if (irq < 0)
			
 
				++		return;
			
 
				++
			
 
				++	regs = syscon_regmap_lookup_by_phandle(np, NULL);
			
 
				++	if (!regs)
			
 
				++		return;
			
 
				++
			
 
				++	rcu_assign_pointer(mtk_soc_wed_ops, &wed_ops);
			
 
				++
			
 
				++	mutex_lock(&hw_lock);
			
 
				++
			
 
				++	if (WARN_ON(hw_list[index]))
			
 
				++		goto unlock;
			
 
				++
			
 
				++	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
			
 
				++	hw->node = np;
			
 
				++	hw->regs = regs;
			
 
				++	hw->eth = eth;
			
 
				++	hw->dev = &pdev->dev;
			
 
				++	hw->wdma = wdma;
			
 
				++	hw->index = index;
			
 
				++	hw->irq = irq;
			
 
				++	hw->mirror = syscon_regmap_lookup_by_phandle(eth_np,
			
 
				++						     "mediatek,pcie-mirror");
			
 
				++	hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np,
			
 
				++						     "mediatek,hifsys");
			
 
				++	if (IS_ERR(hw->mirror) || IS_ERR(hw->hifsys)) {
			
 
				++		kfree(hw);
			
 
				++		goto unlock;
			
 
				++	}
			
 
				++
			
 
				++	if (!index) {
			
 
				++		regmap_write(hw->mirror, 0, 0);
			
 
				++		regmap_write(hw->mirror, 4, 0);
			
 
				++	}
			
 
				++	mtk_wed_hw_add_debugfs(hw);
			
 
				++
			
 
				++	hw_list[index] = hw;
			
 
				++
			
 
				++unlock:
			
 
				++	mutex_unlock(&hw_lock);
			
 
				++}
			
 
				++
			
 
				++void mtk_wed_exit(void)
			
 
				++{
			
 
				++	int i;
			
 
				++
			
 
				++	rcu_assign_pointer(mtk_soc_wed_ops, NULL);
			
 
				++
			
 
				++	synchronize_rcu();
			
 
				++
			
 
				++	for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
			
 
				++		struct mtk_wed_hw *hw;
			
 
				++
			
 
				++		hw = hw_list[i];
			
 
				++		if (!hw)
			
 
				++			continue;
			
 
				++
			
 
				++		hw_list[i] = NULL;
			
 
				++		debugfs_remove(hw->debugfs_dir);
			
 
				++		put_device(hw->dev);
			
 
				++		kfree(hw);
			
 
				++	}
			
 
				++}
			
 
				+--- /dev/null
			
 
				++++ b/drivers/net/ethernet/mediatek/mtk_wed.h
			
 
				+@@ -0,0 +1,128 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-only
			
 
				++/* Copyright (C) 2021 Felix Fietkau <[email protected]> */
			
 
				++
			
 
				++#ifndef __MTK_WED_PRIV_H
			
 
				++#define __MTK_WED_PRIV_H
			
 
				++
			
 
				++#include <linux/soc/mediatek/mtk_wed.h>
			
 
				++#include <linux/debugfs.h>
			
 
				++#include <linux/regmap.h>
			
 
				++
			
 
				++struct mtk_eth;
			
 
				++
			
 
				++struct mtk_wed_hw {
			
 
				++	struct device_node *node;
			
 
				++	struct mtk_eth *eth;
			
 
				++	struct regmap *regs;
			
 
				++	struct regmap *hifsys;
			
 
				++	struct device *dev;
			
 
				++	void __iomem *wdma;
			
 
				++	struct regmap *mirror;
			
 
				++	struct dentry *debugfs_dir;
			
 
				++	struct mtk_wed_device *wed_dev;
			
 
				++	u32 debugfs_reg;
			
 
				++	u32 num_flows;
			
 
				++	char dirname[5];
			
 
				++	int irq;
			
 
				++	int index;
			
 
				++};
			
 
				++
			
 
				++
			
 
				++#ifdef CONFIG_NET_MEDIATEK_SOC_WED
			
 
				++static inline void
			
 
				++wed_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
			
 
				++{
			
 
				++	regmap_write(dev->hw->regs, reg, val);
			
 
				++}
			
 
				++
			
 
				++static inline u32
			
 
				++wed_r32(struct mtk_wed_device *dev, u32 reg)
			
 
				++{
			
 
				++	unsigned int val;
			
 
				++
			
 
				++	regmap_read(dev->hw->regs, reg, &val);
			
 
				++
			
 
				++	return val;
			
 
				++}
			
 
				++
			
 
				++static inline void
			
 
				++wdma_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
			
 
				++{
			
 
				++	writel(val, dev->hw->wdma + reg);
			
 
				++}
			
 
				++
			
 
				++static inline u32
			
 
				++wdma_r32(struct mtk_wed_device *dev, u32 reg)
			
 
				++{
			
 
				++	return readl(dev->hw->wdma + reg);
			
 
				++}
			
 
				++
			
 
				++static inline u32
			
 
				++wpdma_tx_r32(struct mtk_wed_device *dev, int ring, u32 reg)
			
 
				++{
			
 
				++	if (!dev->tx_ring[ring].wpdma)
			
 
				++		return 0;
			
 
				++
			
 
				++	return readl(dev->tx_ring[ring].wpdma + reg);
			
 
				++}
			
 
				++
			
 
				++static inline void
			
 
				++wpdma_tx_w32(struct mtk_wed_device *dev, int ring, u32 reg, u32 val)
			
 
				++{
			
 
				++	if (!dev->tx_ring[ring].wpdma)
			
 
				++		return;
			
 
				++
			
 
				++	writel(val, dev->tx_ring[ring].wpdma + reg);
			
 
				++}
			
 
				++
			
 
				++static inline u32
			
 
				++wpdma_txfree_r32(struct mtk_wed_device *dev, u32 reg)
			
 
				++{
			
 
				++	if (!dev->txfree_ring.wpdma)
			
 
				++		return 0;
			
 
				++
			
 
				++	return readl(dev->txfree_ring.wpdma + reg);
			
 
				++}
			
 
				++
			
 
				++static inline void
			
 
				++wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
			
 
				++{
			
 
				++	if (!dev->txfree_ring.wpdma)
			
 
				++		return;
			
 
				++
			
 
				++	writel(val, dev->txfree_ring.wpdma + reg);
			
 
				++}
			
 
				++
			
 
				++void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
			
 
				++		    void __iomem *wdma, int index);
			
 
				++void mtk_wed_exit(void);
			
 
				++int mtk_wed_flow_add(int index);
			
 
				++void mtk_wed_flow_remove(int index);
			
 
				++#else
			
 
				++static inline void
			
 
				++mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
			
 
				++	       void __iomem *wdma, int index)
			
 
				++{
			
 
				++}
			
 
				++static inline void
			
 
				++mtk_wed_exit(void)
			
 
				++{
			
 
				++}
			
 
				++static inline int mtk_wed_flow_add(int index)
			
 
				++{
			
 
				++	return -EINVAL;
			
 
				++}
			
 
				++static inline void mtk_wed_flow_remove(int index)
			
 
				++{
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				++#ifdef CONFIG_DEBUG_FS
			
 
				++void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw);
			
 
				++#else
			
 
				++static inline void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
			
 
				++{
			
 
				++}
			
 
				++#endif
			
 
				++
			
 
				++#endif
			
 
				+--- /dev/null
			
 
				++++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
			
 
				+@@ -0,0 +1,175 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-only
			
 
				++/* Copyright (C) 2021 Felix Fietkau <[email protected]> */
			
 
				++
			
 
				++#include <linux/seq_file.h>
			
 
				++#include "mtk_wed.h"
			
 
				++#include "mtk_wed_regs.h"
			
 
				++
			
 
				++struct reg_dump {
			
 
				++	const char *name;
			
 
				++	u16 offset;
			
 
				++	u8 type;
			
 
				++	u8 base;
			
 
				++};
			
 
				++
			
 
				++enum {
			
 
				++	DUMP_TYPE_STRING,
			
 
				++	DUMP_TYPE_WED,
			
 
				++	DUMP_TYPE_WDMA,
			
 
				++	DUMP_TYPE_WPDMA_TX,
			
 
				++	DUMP_TYPE_WPDMA_TXFREE,
			
 
				++};
			
 
				++
			
 
				++#define DUMP_STR(_str) { _str, 0, DUMP_TYPE_STRING }
			
 
				++#define DUMP_REG(_reg, ...) { #_reg, MTK_##_reg, __VA_ARGS__ }
			
 
				++#define DUMP_RING(_prefix, _base, ...)				\
			
 
				++	{ _prefix " BASE", _base, __VA_ARGS__ },		\
			
 
				++	{ _prefix " CNT",  _base + 0x4, __VA_ARGS__ },	\
			
 
				++	{ _prefix " CIDX", _base + 0x8, __VA_ARGS__ },	\
			
 
				++	{ _prefix " DIDX", _base + 0xc, __VA_ARGS__ }
			
 
				++
			
 
				++#define DUMP_WED(_reg) DUMP_REG(_reg, DUMP_TYPE_WED)
			
 
				++#define DUMP_WED_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WED)
			
 
				++
			
 
				++#define DUMP_WDMA(_reg) DUMP_REG(_reg, DUMP_TYPE_WDMA)
			
 
				++#define DUMP_WDMA_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WDMA)
			
 
				++
			
 
				++#define DUMP_WPDMA_TX_RING(_n) DUMP_RING("WPDMA_TX" #_n, 0, DUMP_TYPE_WPDMA_TX, _n)
			
 
				++#define DUMP_WPDMA_TXFREE_RING DUMP_RING("WPDMA_RX1", 0, DUMP_TYPE_WPDMA_TXFREE)
			
 
				++
			
 
				++static void
			
 
				++print_reg_val(struct seq_file *s, const char *name, u32 val)
			
 
				++{
			
 
				++	seq_printf(s, "%-32s %08x\n", name, val);
			
 
				++}
			
 
				++
			
 
				++static void
			
 
				++dump_wed_regs(struct seq_file *s, struct mtk_wed_device *dev,
			
 
				++	      const struct reg_dump *regs, int n_regs)
			
 
				++{
			
 
				++	const struct reg_dump *cur;
			
 
				++	u32 val;
			
 
				++
			
 
				++	for (cur = regs; cur < &regs[n_regs]; cur++) {
			
 
				++		switch (cur->type) {
			
 
				++		case DUMP_TYPE_STRING:
			
 
				++			seq_printf(s, "%s======== %s:\n",
			
 
				++				   cur > regs ? "\n" : "",
			
 
				++				   cur->name);
			
 
				++			continue;
			
 
				++		case DUMP_TYPE_WED:
			
 
				++			val = wed_r32(dev, cur->offset);
			
 
				++			break;
			
 
				++		case DUMP_TYPE_WDMA:
			
 
				++			val = wdma_r32(dev, cur->offset);
			
 
				++			break;
			
 
				++		case DUMP_TYPE_WPDMA_TX:
			
 
				++			val = wpdma_tx_r32(dev, cur->base, cur->offset);
			
 
				++			break;
			
 
				++		case DUMP_TYPE_WPDMA_TXFREE:
			
 
				++			val = wpdma_txfree_r32(dev, cur->offset);
			
 
				++			break;
			
 
				++		}
			
 
				++		print_reg_val(s, cur->name, val);
			
 
				++	}
			
 
				++}
			
 
				++
			
 
				++
			
 
				++static int
			
 
				++wed_txinfo_show(struct seq_file *s, void *data)
			
 
				++{
			
 
				++	static const struct reg_dump regs[] = {
			
 
				++		DUMP_STR("WED TX"),
			
 
				++		DUMP_WED(WED_TX_MIB(0)),
			
 
				++		DUMP_WED_RING(WED_RING_TX(0)),
			
 
				++
			
 
				++		DUMP_WED(WED_TX_MIB(1)),
			
 
				++		DUMP_WED_RING(WED_RING_TX(1)),
			
 
				++
			
 
				++		DUMP_STR("WPDMA TX"),
			
 
				++		DUMP_WED(WED_WPDMA_TX_MIB(0)),
			
 
				++		DUMP_WED_RING(WED_WPDMA_RING_TX(0)),
			
 
				++		DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(0)),
			
 
				++
			
 
				++		DUMP_WED(WED_WPDMA_TX_MIB(1)),
			
 
				++		DUMP_WED_RING(WED_WPDMA_RING_TX(1)),
			
 
				++		DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(1)),
			
 
				++
			
 
				++		DUMP_STR("WPDMA TX"),
			
 
				++		DUMP_WPDMA_TX_RING(0),
			
 
				++		DUMP_WPDMA_TX_RING(1),
			
 
				++
			
 
				++		DUMP_STR("WED WDMA RX"),
			
 
				++		DUMP_WED(WED_WDMA_RX_MIB(0)),
			
 
				++		DUMP_WED_RING(WED_WDMA_RING_RX(0)),
			
 
				++		DUMP_WED(WED_WDMA_RX_THRES(0)),
			
 
				++		DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(0)),
			
 
				++		DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(0)),
			
 
				++
			
 
				++		DUMP_WED(WED_WDMA_RX_MIB(1)),
			
 
				++		DUMP_WED_RING(WED_WDMA_RING_RX(1)),
			
 
				++		DUMP_WED(WED_WDMA_RX_THRES(1)),
			
 
				++		DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(1)),
			
 
				++		DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(1)),
			
 
				++
			
 
				++		DUMP_STR("WDMA RX"),
			
 
				++		DUMP_WDMA(WDMA_GLO_CFG),
			
 
				++		DUMP_WDMA_RING(WDMA_RING_RX(0)),
			
 
				++		DUMP_WDMA_RING(WDMA_RING_RX(1)),
			
 
				++	};
			
 
				++	struct mtk_wed_hw *hw = s->private;
			
 
				++	struct mtk_wed_device *dev = hw->wed_dev;
			
 
				++
			
 
				++	if (!dev)
			
 
				++		return 0;
			
 
				++
			
 
				++	dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++DEFINE_SHOW_ATTRIBUTE(wed_txinfo);
			
 
				++
			
 
				++
			
 
				++static int
			
 
				++mtk_wed_reg_set(void *data, u64 val)
			
 
				++{
			
 
				++	struct mtk_wed_hw *hw = data;
			
 
				++
			
 
				++	regmap_write(hw->regs, hw->debugfs_reg, val);
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++static int
			
 
				++mtk_wed_reg_get(void *data, u64 *val)
			
 
				++{
			
 
				++	struct mtk_wed_hw *hw = data;
			
 
				++	unsigned int regval;
			
 
				++	int ret;
			
 
				++
			
 
				++	ret = regmap_read(hw->regs, hw->debugfs_reg, &regval);
			
 
				++	if (ret)
			
 
				++		return ret;
			
 
				++
			
 
				++	*val = regval;
			
 
				++
			
 
				++	return 0;
			
 
				++}
			
 
				++
			
 
				++DEFINE_DEBUGFS_ATTRIBUTE(fops_regval, mtk_wed_reg_get, mtk_wed_reg_set,
			
 
				++             "0x%08llx\n");
			
 
				++
			
 
				++void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
			
 
				++{
			
 
				++	struct dentry *dir;
			
 
				++
			
 
				++	snprintf(hw->dirname, sizeof(hw->dirname), "wed%d", hw->index);
			
 
				++	dir = debugfs_create_dir(hw->dirname, NULL);
			
 
				++	if (!dir)
			
 
				++		return;
			
 
				++
			
 
				++	hw->debugfs_dir = dir;
			
 
				++	debugfs_create_u32("regidx", 0600, dir, &hw->debugfs_reg);
			
 
				++	debugfs_create_file_unsafe("regval", 0600, dir, hw, &fops_regval);
			
 
				++	debugfs_create_file_unsafe("txinfo", 0400, dir, hw, &wed_txinfo_fops);
			
 
				++}
			
 
				+--- /dev/null
			
 
				++++ b/drivers/net/ethernet/mediatek/mtk_wed_ops.c
			
 
				+@@ -0,0 +1,8 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-only
			
 
				++/* Copyright (C) 2020 Felix Fietkau <[email protected]> */
			
 
				++
			
 
				++#include <linux/kernel.h>
			
 
				++#include <linux/soc/mediatek/mtk_wed.h>
			
 
				++
			
 
				++const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
			
 
				++EXPORT_SYMBOL_GPL(mtk_soc_wed_ops);
			
 
				+--- /dev/null
			
 
				++++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
			
 
				+@@ -0,0 +1,251 @@
			
 
				++// SPDX-License-Identifier: GPL-2.0-only
			
 
				++/* Copyright (C) 2020 Felix Fietkau <[email protected]> */
			
 
				++
			
 
				++#ifndef __MTK_WED_REGS_H
			
 
				++#define __MTK_WED_REGS_H
			
 
				++
			
 
				++#define MTK_WDMA_DESC_CTRL_LEN1			GENMASK(14, 0)
			
 
				++#define MTK_WDMA_DESC_CTRL_LAST_SEG1		BIT(15)
			
 
				++#define MTK_WDMA_DESC_CTRL_BURST		BIT(16)
			
 
				++#define MTK_WDMA_DESC_CTRL_LEN0			GENMASK(29, 16)
			
 
				++#define MTK_WDMA_DESC_CTRL_LAST_SEG0		BIT(30)
			
 
				++#define MTK_WDMA_DESC_CTRL_DMA_DONE		BIT(31)
			
 
				++
			
 
				++struct mtk_wdma_desc {
			
 
				++	__le32 buf0;
			
 
				++	__le32 ctrl;
			
 
				++	__le32 buf1;
			
 
				++	__le32 info;
			
 
				++} __packed __aligned(4);
			
 
				++
			
 
				++#define MTK_WED_RESET					0x008
			
 
				++#define MTK_WED_RESET_TX_BM				BIT(0)
			
 
				++#define MTK_WED_RESET_TX_FREE_AGENT			BIT(4)
			
 
				++#define MTK_WED_RESET_WPDMA_TX_DRV			BIT(8)
			
 
				++#define MTK_WED_RESET_WPDMA_RX_DRV			BIT(9)
			
 
				++#define MTK_WED_RESET_WPDMA_INT_AGENT			BIT(11)
			
 
				++#define MTK_WED_RESET_WED_TX_DMA			BIT(12)
			
 
				++#define MTK_WED_RESET_WDMA_RX_DRV			BIT(17)
			
 
				++#define MTK_WED_RESET_WDMA_INT_AGENT			BIT(19)
			
 
				++#define MTK_WED_RESET_WED				BIT(31)
			
 
				++
			
 
				++#define MTK_WED_CTRL					0x00c
			
 
				++#define MTK_WED_CTRL_WPDMA_INT_AGENT_EN			BIT(0)
			
 
				++#define MTK_WED_CTRL_WPDMA_INT_AGENT_BUSY		BIT(1)
			
 
				++#define MTK_WED_CTRL_WDMA_INT_AGENT_EN			BIT(2)
			
 
				++#define MTK_WED_CTRL_WDMA_INT_AGENT_BUSY		BIT(3)
			
 
				++#define MTK_WED_CTRL_WED_TX_BM_EN			BIT(8)
			
 
				++#define MTK_WED_CTRL_WED_TX_BM_BUSY			BIT(9)
			
 
				++#define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN		BIT(10)
			
 
				++#define MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY		BIT(11)
			
 
				++#define MTK_WED_CTRL_RESERVE_EN				BIT(12)
			
 
				++#define MTK_WED_CTRL_RESERVE_BUSY			BIT(13)
			
 
				++#define MTK_WED_CTRL_FINAL_DIDX_READ			BIT(24)
			
 
				++#define MTK_WED_CTRL_MIB_READ_CLEAR			BIT(28)
			
 
				++
			
 
				++#define MTK_WED_EXT_INT_STATUS				0x020
			
 
				++#define MTK_WED_EXT_INT_STATUS_TF_LEN_ERR		BIT(0)
			
 
				++#define MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD		BIT(1)
			
 
				++#define MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID	BIT(4)
			
 
				++#define MTK_WED_EXT_INT_STATUS_TX_FBUF_LO_TH		BIT(8)
			
 
				++#define MTK_WED_EXT_INT_STATUS_TX_FBUF_HI_TH		BIT(9)
			
 
				++#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH		BIT(12)
			
 
				++#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH		BIT(13)
			
 
				++#define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR	BIT(16)
			
 
				++#define MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR	BIT(17)
			
 
				++#define MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT		BIT(18)
			
 
				++#define MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN	BIT(19)
			
 
				++#define MTK_WED_EXT_INT_STATUS_RX_DRV_BM_DMAD_COHERENT	BIT(20)
			
 
				++#define MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR	BIT(21)
			
 
				++#define MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR	BIT(22)
			
 
				++#define MTK_WED_EXT_INT_STATUS_RX_DRV_DMA_RECYCLE	BIT(24)
			
 
				++#define MTK_WED_EXT_INT_STATUS_ERROR_MASK		(MTK_WED_EXT_INT_STATUS_TF_LEN_ERR | \
			
 
				++							 MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD | \
			
 
				++							 MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID | \
			
 
				++							 MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR | \
			
 
				++							 MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR | \
			
 
				++							 MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN | \
			
 
				++							 MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR | \
			
 
				++							 MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR)
			
 
				++
			
 
				++#define MTK_WED_EXT_INT_MASK				0x028
			
 
				++
			
 
				++#define MTK_WED_STATUS					0x060
			
 
				++#define MTK_WED_STATUS_TX				GENMASK(15, 8)
			
 
				++
			
 
				++#define MTK_WED_TX_BM_CTRL				0x080
			
 
				++#define MTK_WED_TX_BM_CTRL_VLD_GRP_NUM			GENMASK(6, 0)
			
 
				++#define MTK_WED_TX_BM_CTRL_RSV_GRP_NUM			GENMASK(22, 16)
			
 
				++#define MTK_WED_TX_BM_CTRL_PAUSE			BIT(28)
			
 
				++
			
 
				++#define MTK_WED_TX_BM_BASE				0x084
			
 
				++
			
 
				++#define MTK_WED_TX_BM_TKID				0x088
			
 
				++#define MTK_WED_TX_BM_TKID_START			GENMASK(15, 0)
			
 
				++#define MTK_WED_TX_BM_TKID_END				GENMASK(31, 16)
			
 
				++
			
 
				++#define MTK_WED_TX_BM_BUF_LEN				0x08c
			
 
				++
			
 
				++#define MTK_WED_TX_BM_INTF				0x09c
			
 
				++#define MTK_WED_TX_BM_INTF_TKID				GENMASK(15, 0)
			
 
				++#define MTK_WED_TX_BM_INTF_TKFIFO_FDEP			GENMASK(23, 16)
			
 
				++#define MTK_WED_TX_BM_INTF_TKID_VALID			BIT(28)
			
 
				++#define MTK_WED_TX_BM_INTF_TKID_READ			BIT(29)
			
 
				++
			
 
				++#define MTK_WED_TX_BM_DYN_THR				0x0a0
			
 
				++#define MTK_WED_TX_BM_DYN_THR_LO			GENMASK(6, 0)
			
 
				++#define MTK_WED_TX_BM_DYN_THR_HI			GENMASK(22, 16)
			
 
				++
			
 
				++#define MTK_WED_INT_STATUS				0x200
			
 
				++#define MTK_WED_INT_MASK				0x204
			
 
				++
			
 
				++#define MTK_WED_GLO_CFG					0x208
			
 
				++#define MTK_WED_GLO_CFG_TX_DMA_EN			BIT(0)
			
 
				++#define MTK_WED_GLO_CFG_TX_DMA_BUSY			BIT(1)
			
 
				++#define MTK_WED_GLO_CFG_RX_DMA_EN			BIT(2)
			
 
				++#define MTK_WED_GLO_CFG_RX_DMA_BUSY			BIT(3)
			
 
				++#define MTK_WED_GLO_CFG_RX_BT_SIZE			GENMASK(5, 4)
			
 
				++#define MTK_WED_GLO_CFG_TX_WB_DDONE			BIT(6)
			
 
				++#define MTK_WED_GLO_CFG_BIG_ENDIAN			BIT(7)
			
 
				++#define MTK_WED_GLO_CFG_DIS_BT_SIZE_ALIGN		BIT(8)
			
 
				++#define MTK_WED_GLO_CFG_TX_BT_SIZE_LO			BIT(9)
			
 
				++#define MTK_WED_GLO_CFG_MULTI_DMA_EN			GENMASK(11, 10)
			
 
				++#define MTK_WED_GLO_CFG_FIFO_LITTLE_ENDIAN		BIT(12)
			
 
				++#define MTK_WED_GLO_CFG_MI_DEPTH_RD			GENMASK(21, 13)
			
 
				++#define MTK_WED_GLO_CFG_TX_BT_SIZE_HI			GENMASK(23, 22)
			
 
				++#define MTK_WED_GLO_CFG_SW_RESET			BIT(24)
			
 
				++#define MTK_WED_GLO_CFG_FIRST_TOKEN_ONLY		BIT(26)
			
 
				++#define MTK_WED_GLO_CFG_OMIT_RX_INFO			BIT(27)
			
 
				++#define MTK_WED_GLO_CFG_OMIT_TX_INFO			BIT(28)
			
 
				++#define MTK_WED_GLO_CFG_BYTE_SWAP			BIT(29)
			
 
				++#define MTK_WED_GLO_CFG_RX_2B_OFFSET			BIT(31)
			
 
				++
			
 
				++#define MTK_WED_RESET_IDX				0x20c
			
 
				++#define MTK_WED_RESET_IDX_TX				GENMASK(3, 0)
			
 
				++#define MTK_WED_RESET_IDX_RX				GENMASK(17, 16)
			
 
				++
			
 
				++#define MTK_WED_TX_MIB(_n)				(0x2a0 + (_n) * 4)
			
 
				++
			
 
				++#define MTK_WED_RING_TX(_n)				(0x300 + (_n) * 0x10)
			
 
				++
			
 
				++#define MTK_WED_RING_RX(_n)				(0x400 + (_n) * 0x10)
			
 
				++
			
 
				++#define MTK_WED_WPDMA_INT_TRIGGER			0x504
			
 
				++#define MTK_WED_WPDMA_INT_TRIGGER_RX_DONE		BIT(1)
			
 
				++#define MTK_WED_WPDMA_INT_TRIGGER_TX_DONE		GENMASK(5, 4)
			
 
				++
			
 
				++#define MTK_WED_WPDMA_GLO_CFG				0x508
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN			BIT(0)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY		BIT(1)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN			BIT(2)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_BUSY		BIT(3)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_RX_BT_SIZE		GENMASK(5, 4)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_TX_WB_DDONE		BIT(6)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_BIG_ENDIAN		BIT(7)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_DIS_BT_SIZE_ALIGN		BIT(8)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_LO		BIT(9)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_MULTI_DMA_EN		GENMASK(11, 10)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_FIFO_LITTLE_ENDIAN	BIT(12)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_MI_DEPTH_RD		GENMASK(21, 13)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_HI		GENMASK(23, 22)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_SW_RESET			BIT(24)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_FIRST_TOKEN_ONLY		BIT(26)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_OMIT_RX_INFO		BIT(27)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_OMIT_TX_INFO		BIT(28)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_BYTE_SWAP			BIT(29)
			
 
				++#define MTK_WED_WPDMA_GLO_CFG_RX_2B_OFFSET		BIT(31)
			
 
				++
			
 
				++#define MTK_WED_WPDMA_RESET_IDX				0x50c
			
 
				++#define MTK_WED_WPDMA_RESET_IDX_TX			GENMASK(3, 0)
			
 
				++#define MTK_WED_WPDMA_RESET_IDX_RX			GENMASK(17, 16)
			
 
				++
			
 
				++#define MTK_WED_WPDMA_INT_CTRL				0x520
			
 
				++#define MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV		BIT(21)
			
 
				++
			
 
				++#define MTK_WED_WPDMA_INT_MASK				0x524
			
 
				++
			
 
				++#define MTK_WED_PCIE_CFG_BASE				0x560
			
 
				++
			
 
				++#define MTK_WED_PCIE_INT_TRIGGER			0x570
			
 
				++#define MTK_WED_PCIE_INT_TRIGGER_STATUS			BIT(16)
			
 
				++
			
 
				++#define MTK_WED_WPDMA_CFG_BASE				0x580
			
 
				++
			
 
				++#define MTK_WED_WPDMA_TX_MIB(_n)			(0x5a0 + (_n) * 4)
			
 
				++#define MTK_WED_WPDMA_TX_COHERENT_MIB(_n)		(0x5d0 + (_n) * 4)
			
 
				++
			
 
				++#define MTK_WED_WPDMA_RING_TX(_n)			(0x600 + (_n) * 0x10)
			
 
				++#define MTK_WED_WPDMA_RING_RX(_n)			(0x700 + (_n) * 0x10)
			
 
				++#define MTK_WED_WDMA_RING_RX(_n)			(0x900 + (_n) * 0x10)
			
 
				++#define MTK_WED_WDMA_RX_THRES(_n)			(0x940 + (_n) * 0x4)
			
 
				++
			
 
				++#define MTK_WED_WDMA_GLO_CFG				0xa04
			
 
				++#define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN			BIT(0)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_RX_DRV_EN			BIT(2)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY		BIT(3)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_BT_SIZE			GENMASK(5, 4)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_TX_WB_DDONE		BIT(6)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE	BIT(13)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_WCOMPLETE_SEL		BIT(16)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_RXDMA_BYPASS	BIT(17)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_BYPASS		BIT(18)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_FSM_RETURN_IDLE		BIT(19)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_WAIT_COHERENT		BIT(20)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_AXI_W_AFTER_AW		BIT(21)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY_SINGLE_W	BIT(22)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY		BIT(23)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP	BIT(24)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE	BIT(25)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE		BIT(26)
			
 
				++#define MTK_WED_WDMA_GLO_CFG_RXDRV_CLKGATE_BYPASS	BIT(30)
			
 
				++
			
 
				++#define MTK_WED_WDMA_RESET_IDX				0xa08
			
 
				++#define MTK_WED_WDMA_RESET_IDX_RX			GENMASK(17, 16)
			
 
				++#define MTK_WED_WDMA_RESET_IDX_DRV			GENMASK(25, 24)
			
 
				++
			
 
				++#define MTK_WED_WDMA_INT_TRIGGER			0xa28
			
 
				++#define MTK_WED_WDMA_INT_TRIGGER_RX_DONE		GENMASK(17, 16)
			
 
				++
			
 
				++#define MTK_WED_WDMA_INT_CTRL				0xa2c
			
 
				++#define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL		GENMASK(17, 16)
			
 
				++
			
 
				++#define MTK_WED_WDMA_OFFSET0				0xaa4
			
 
				++#define MTK_WED_WDMA_OFFSET1				0xaa8
			
 
				++
			
 
				++#define MTK_WED_WDMA_RX_MIB(_n)				(0xae0 + (_n) * 4)
			
 
				++#define MTK_WED_WDMA_RX_RECYCLE_MIB(_n)			(0xae8 + (_n) * 4)
			
 
				++#define MTK_WED_WDMA_RX_PROCESSED_MIB(_n)		(0xaf0 + (_n) * 4)
			
 
				++
			
 
				++#define MTK_WED_RING_OFS_BASE				0x00
			
 
				++#define MTK_WED_RING_OFS_COUNT				0x04
			
 
				++#define MTK_WED_RING_OFS_CPU_IDX			0x08
			
 
				++#define MTK_WED_RING_OFS_DMA_IDX			0x0c
			
 
				++
			
 
				++#define MTK_WDMA_RING_RX(_n)				(0x100 + (_n) * 0x10)
			
 
				++
			
 
				++#define MTK_WDMA_GLO_CFG				0x204
			
 
				++#define MTK_WDMA_GLO_CFG_RX_INFO_PRERES			GENMASK(28, 26)
			
 
				++
			
 
				++#define MTK_WDMA_RESET_IDX				0x208
			
 
				++#define MTK_WDMA_RESET_IDX_TX				GENMASK(3, 0)
			
 
				++#define MTK_WDMA_RESET_IDX_RX				GENMASK(17, 16)
			
 
				++
			
 
				++#define MTK_WDMA_INT_MASK				0x228
			
 
				++#define MTK_WDMA_INT_MASK_TX_DONE			GENMASK(3, 0)
			
 
				++#define MTK_WDMA_INT_MASK_RX_DONE			GENMASK(17, 16)
			
 
				++#define MTK_WDMA_INT_MASK_TX_DELAY			BIT(28)
			
 
				++#define MTK_WDMA_INT_MASK_TX_COHERENT			BIT(29)
			
 
				++#define MTK_WDMA_INT_MASK_RX_DELAY			BIT(30)
			
 
				++#define MTK_WDMA_INT_MASK_RX_COHERENT			BIT(31)
			
 
				++
			
 
				++#define MTK_WDMA_INT_GRP1				0x250
			
 
				++#define MTK_WDMA_INT_GRP2				0x254
			
 
				++
			
 
				++#define MTK_PCIE_MIRROR_MAP(n)				((n) ? 0x4 : 0x0)
			
 
				++#define MTK_PCIE_MIRROR_MAP_EN				BIT(0)
			
 
				++#define MTK_PCIE_MIRROR_MAP_WED_ID			BIT(1)
			
 
				++
			
 
				++/* DMA channel mapping */
			
 
				++#define HIFSYS_DMA_AG_MAP				0x008
			
 
				++
			
 
				++#endif
			
 
				+--- /dev/null
			
 
				++++ b/include/linux/soc/mediatek/mtk_wed.h
			
 
				+@@ -0,0 +1,131 @@
			
 
				++#ifndef __MTK_WED_H
			
 
				++#define __MTK_WED_H
			
 
				++
			
 
				++#include <linux/kernel.h>
			
 
				++#include <linux/rcupdate.h>
			
 
				++#include <linux/regmap.h>
			
 
				++#include <linux/pci.h>
			
 
				++
			
 
				++#define MTK_WED_TX_QUEUES		2
			
 
				++
			
 
				++struct mtk_wed_hw;
			
 
				++struct mtk_wdma_desc;
			
 
				++
			
 
				++struct mtk_wed_ring {
			
 
				++	struct mtk_wdma_desc *desc;
			
 
				++	dma_addr_t desc_phys;
			
 
				++	int size;
			
 
				++
			
 
				++	u32 reg_base;
			
 
				++	void __iomem *wpdma;
			
 
				++};
			
 
				++
			
 
				++struct mtk_wed_device {
			
 
				++#ifdef CONFIG_NET_MEDIATEK_SOC_WED
			
 
				++	const struct mtk_wed_ops *ops;
			
 
				++	struct device *dev;
			
 
				++	struct mtk_wed_hw *hw;
			
 
				++	bool init_done, running;
			
 
				++	int wdma_idx;
			
 
				++	int irq;
			
 
				++
			
 
				++	struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES];
			
 
				++	struct mtk_wed_ring txfree_ring;
			
 
				++	struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
			
 
				++
			
 
				++	struct {
			
 
				++		int size;
			
 
				++		void **pages;
			
 
				++		struct mtk_wdma_desc *desc;
			
 
				++		dma_addr_t desc_phys;
			
 
				++	} buf_ring;
			
 
				++
			
 
				++	/* filled by driver: */
			
 
				++	struct {
			
 
				++		struct pci_dev *pci_dev;
			
 
				++
			
 
				++		u32 wpdma_phys;
			
 
				++
			
 
				++		u16 token_start;
			
 
				++		unsigned int nbuf;
			
 
				++
			
 
				++		u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id);
			
 
				++		int (*offload_enable)(struct mtk_wed_device *wed);
			
 
				++		void (*offload_disable)(struct mtk_wed_device *wed);
			
 
				++	} wlan;
			
 
				++#endif
			
 
				++};
			
 
				++
			
 
				++struct mtk_wed_ops {
			
 
				++	int (*attach)(struct mtk_wed_device *dev);
			
 
				++	int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
			
 
				++			     void __iomem *regs);
			
 
				++	int (*txfree_ring_setup)(struct mtk_wed_device *dev,
			
 
				++				 void __iomem *regs);
			
 
				++	void (*detach)(struct mtk_wed_device *dev);
			
 
				++
			
 
				++	void (*stop)(struct mtk_wed_device *dev);
			
 
				++	void (*start)(struct mtk_wed_device *dev, u32 irq_mask);
			
 
				++	void (*reset_dma)(struct mtk_wed_device *dev);
			
 
				++
			
 
				++	u32 (*reg_read)(struct mtk_wed_device *dev, u32 reg);
			
 
				++	void (*reg_write)(struct mtk_wed_device *dev, u32 reg, u32 val);
			
 
				++
			
 
				++	u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask);
			
 
				++	void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask);
			
 
				++};
			
 
				++
			
 
				++extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
			
 
				++
			
 
				++static inline int
			
 
				++mtk_wed_device_attach(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	int ret = -ENODEV;
			
 
				++
			
 
				++#ifdef CONFIG_NET_MEDIATEK_SOC_WED
			
 
				++	rcu_read_lock();
			
 
				++	dev->ops = rcu_dereference(mtk_soc_wed_ops);
			
 
				++	if (dev->ops)
			
 
				++		ret = dev->ops->attach(dev);
			
 
				++	else
			
 
				++		rcu_read_unlock();
			
 
				++
			
 
				++	if (ret)
			
 
				++		dev->ops = NULL;
			
 
				++#endif
			
 
				++
			
 
				++	return ret;
			
 
				++}
			
 
				++
			
 
				++#ifdef CONFIG_NET_MEDIATEK_SOC_WED
			
 
				++#define mtk_wed_device_active(_dev) !!(_dev)->ops
			
 
				++#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev)
			
 
				++#define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask)
			
 
				++#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \
			
 
				++	(_dev)->ops->tx_ring_setup(_dev, _ring, _regs)
			
 
				++#define mtk_wed_device_txfree_ring_setup(_dev, _regs) \
			
 
				++	(_dev)->ops->txfree_ring_setup(_dev, _regs)
			
 
				++#define mtk_wed_device_reg_read(_dev, _reg) \
			
 
				++	(_dev)->ops->reg_read(_dev, _reg)
			
 
				++#define mtk_wed_device_reg_write(_dev, _reg, _val) \
			
 
				++	(_dev)->ops->reg_write(_dev, _reg, _val)
			
 
				++#define mtk_wed_device_irq_get(_dev, _mask) \
			
 
				++	(_dev)->ops->irq_get(_dev, _mask)
			
 
				++#define mtk_wed_device_irq_set_mask(_dev, _mask) \
			
 
				++	(_dev)->ops->irq_set_mask(_dev, _mask)
			
 
				++#else
			
 
				++static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
			
 
				++{
			
 
				++	return false;
			
 
				++}
			
 
				++#define mtk_wed_device_detach(_dev) do {} while (0)
			
 
				++#define mtk_wed_device_start(_dev, _mask) do {} while (0)
			
 
				++#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV
			
 
				++#define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV
			
 
				++#define mtk_wed_device_reg_read(_dev, _reg) 0
			
 
				++#define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0)
			
 
				++#define mtk_wed_device_irq_get(_dev, _mask) 0
			
 
				++#define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0)
			
 
				++#endif
			
 
				++
			
 
				++#endif