summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2024-04-10 11:09:06 -0400
committerMike Pagano <mpagano@gentoo.org>2024-04-10 11:09:06 -0400
commit55f07c32363e30cdd7d8619be719408a3999d536 (patch)
tree5a6ae7b94f222c31930b983aa3cc399ef645b16f
parentLinux patch 6.6.25 (diff)
downloadlinux-patches-55f07c32363e30cdd7d8619be719408a3999d536.tar.gz
linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.tar.bz2
linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.zip
Linux patch 6.6.266.6-32
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r--0000_README4
-rw-r--r--1025_linux-6.6.26.patch15825
2 files changed, 15829 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index cfd8c138..7661b44e 100644
--- a/0000_README
+++ b/0000_README
@@ -143,6 +143,10 @@ Patch: 1024_linux-6.6.25.patch
From: https://www.kernel.org
Desc: Linux 6.6.25
+Patch: 1025_linux-6.6.26.patch
+From: https://www.kernel.org
+Desc: Linux 6.6.26
+
Patch: 1510_fs-enable-link-security-restrictions-by-default.patch
From: http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch/
Desc: Enable link security restrictions by default.
diff --git a/1025_linux-6.6.26.patch b/1025_linux-6.6.26.patch
new file mode 100644
index 00000000..20a79d8e
--- /dev/null
+++ b/1025_linux-6.6.26.patch
@@ -0,0 +1,15825 @@
+diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
+index 32a8893e56177..9edb2860a3e19 100644
+--- a/Documentation/admin-guide/hw-vuln/spectre.rst
++++ b/Documentation/admin-guide/hw-vuln/spectre.rst
+@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
+ the BHB might be shared across privilege levels even in the presence of
+ Enhanced IBRS.
+
+-Currently the only known real-world BHB attack vector is via
+-unprivileged eBPF. Therefore, it's highly recommended to not enable
+-unprivileged eBPF, especially when eIBRS is used (without retpolines).
+-For a full mitigation against BHB attacks, it's recommended to use
+-retpolines (or eIBRS combined with retpolines).
++Previously the only known real-world BHB attack vector was via unprivileged
++eBPF. Further research has found attacks that don't require unprivileged eBPF.
++For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
++use the BHB clearing sequence.
+
+ Attack scenarios
+ ----------------
+@@ -430,6 +429,23 @@ The possible values in this file are:
+ 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
+ =========================== =======================================================
+
++ - Branch History Injection (BHI) protection status:
++
++.. list-table::
++
++ * - BHI: Not affected
++ - System is not affected
++ * - BHI: Retpoline
++ - System is protected by retpoline
++ * - BHI: BHI_DIS_S
++ - System is protected by BHI_DIS_S
++ * - BHI: SW loop; KVM SW loop
++ - System is protected by software clearing sequence
++ * - BHI: Syscall hardening
++ - Syscalls are hardened against BHI
++ * - BHI: Syscall hardening; KVM: SW loop
++ - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence
++
+ Full mitigation might require a microcode update from the CPU
+ vendor. When the necessary microcode is not available, the kernel will
+ report vulnerability.
+@@ -484,7 +500,11 @@ Spectre variant 2
+
+ Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
+ boot, by setting the IBRS bit, and they're automatically protected against
+- Spectre v2 variant attacks.
++ some Spectre v2 variant attacks. The BHB can still influence the choice of
++ indirect branch predictor entry, and although branch predictor entries are
++ isolated between modes when eIBRS is enabled, the BHB itself is not isolated
++ between modes. Systems which support BHI_DIS_S will set it to protect against
++ BHI attacks.
+
+ On Intel's enhanced IBRS systems, this includes cross-thread branch target
+ injections on SMT systems (STIBP). In other words, Intel eIBRS enables
+@@ -638,6 +658,22 @@ kernel command line.
+ spectre_v2=off. Spectre variant 1 mitigations
+ cannot be disabled.
+
++ spectre_bhi=
++
++ [X86] Control mitigation of Branch History Injection
++ (BHI) vulnerability. Syscalls are hardened against BHI
++ regardless of this setting. This setting affects the deployment
++ of the HW BHI control and the SW BHB clearing sequence.
++
++ on
++ unconditionally enable.
++ off
++ unconditionally disable.
++ auto
++ enable if hardware mitigation
++ control(BHI_DIS_S) is available, otherwise
++ enable alternate mitigation in KVM.
++
+ For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
+
+ Mitigation selection guide
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 7a36124dde5e5..61199466c0437 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5920,6 +5920,18 @@
+ sonypi.*= [HW] Sony Programmable I/O Control Device driver
+ See Documentation/admin-guide/laptops/sonypi.rst
+
++ spectre_bhi= [X86] Control mitigation of Branch History Injection
++ (BHI) vulnerability. Syscalls are hardened against BHI
++ reglardless of this setting. This setting affects the
++ deployment of the HW BHI control and the SW BHB
++ clearing sequence.
++
++ on - unconditionally enable.
++ off - unconditionally disable.
++ auto - (default) enable hardware mitigation
++ (BHI_DIS_S) if available, otherwise enable
++ alternate mitigation in KVM.
++
+ spectre_v2= [X86] Control mitigation of Spectre variant 2
+ (indirect branch speculation) vulnerability.
+ The default operation protects the kernel from
+diff --git a/Makefile b/Makefile
+index 022af2a9a6d9b..77ad41bd298e0 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 6
+ PATCHLEVEL = 6
+-SUBLEVEL = 25
++SUBLEVEL = 26
+ EXTRAVERSION =
+ NAME = Hurr durr I'ma ninja sloth
+
+diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+index 5a33e16a8b677..c2f5e9f6679d6 100644
+--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
++++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+@@ -970,6 +970,8 @@ bluetooth: bluetooth {
+ vddrf-supply = <&pp1300_l2c>;
+ vddch0-supply = <&pp3300_l10c>;
+ max-speed = <3200000>;
++
++ qcom,local-bd-address-broken;
+ };
+ };
+
+diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
+index c94c0f8c9a737..d95416b93a9dd 100644
+--- a/arch/arm64/kernel/ptrace.c
++++ b/arch/arm64/kernel/ptrace.c
+@@ -728,7 +728,6 @@ static void sve_init_header_from_task(struct user_sve_header *header,
+ {
+ unsigned int vq;
+ bool active;
+- bool fpsimd_only;
+ enum vec_type task_type;
+
+ memset(header, 0, sizeof(*header));
+@@ -744,12 +743,10 @@ static void sve_init_header_from_task(struct user_sve_header *header,
+ case ARM64_VEC_SVE:
+ if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
+ header->flags |= SVE_PT_VL_INHERIT;
+- fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
+ break;
+ case ARM64_VEC_SME:
+ if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
+ header->flags |= SVE_PT_VL_INHERIT;
+- fpsimd_only = false;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+@@ -757,7 +754,7 @@ static void sve_init_header_from_task(struct user_sve_header *header,
+ }
+
+ if (active) {
+- if (fpsimd_only) {
++ if (target->thread.fp_type == FP_STATE_FPSIMD) {
+ header->flags |= SVE_PT_REGS_FPSIMD;
+ } else {
+ header->flags |= SVE_PT_REGS_SVE;
+diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
+index f155b8c9e98c7..15aa9bad1c280 100644
+--- a/arch/arm64/kvm/hyp/pgtable.c
++++ b/arch/arm64/kvm/hyp/pgtable.c
+@@ -805,12 +805,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
+ * Perform the appropriate TLB invalidation based on the
+ * evicted pte value (if any).
+ */
+- if (kvm_pte_table(ctx->old, ctx->level))
+- kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+- kvm_granule_size(ctx->level));
+- else if (kvm_pte_valid(ctx->old))
++ if (kvm_pte_table(ctx->old, ctx->level)) {
++ u64 size = kvm_granule_size(ctx->level);
++ u64 addr = ALIGN_DOWN(ctx->addr, size);
++
++ kvm_tlb_flush_vmid_range(mmu, addr, size);
++ } else if (kvm_pte_valid(ctx->old)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+ ctx->addr, ctx->level);
++ }
+ }
+
+ if (stage2_pte_is_counted(ctx->old))
+diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
+index 150d1c6543f7f..29196dce9b91d 100644
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -876,7 +876,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+ emit(A64_UXTH(is64, dst, dst), ctx);
+ break;
+ case 32:
+- emit(A64_REV32(is64, dst, dst), ctx);
++ emit(A64_REV32(0, dst, dst), ctx);
+ /* upper 32 bits already cleared */
+ break;
+ case 64:
+@@ -1189,7 +1189,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ if (sign_extend)
+- emit(A64_LDRSW(dst, src_adj, off_adj), ctx);
++ emit(A64_LDRSW(dst, src, tmp), ctx);
+ else
+ emit(A64_LDR32(dst, src, tmp), ctx);
+ }
+diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
+index 926bec775f41c..9822366dc186e 100644
+--- a/arch/powerpc/mm/book3s64/pgtable.c
++++ b/arch/powerpc/mm/book3s64/pgtable.c
+@@ -130,7 +130,7 @@ void set_pud_at(struct mm_struct *mm, unsigned long addr,
+
+ WARN_ON(pte_hw_valid(pud_pte(*pudp)));
+ assert_spin_locked(pud_lockptr(mm, pudp));
+- WARN_ON(!(pud_large(pud)));
++ WARN_ON(!(pud_leaf(pud)));
+ #endif
+ trace_hugepage_set_pud(addr, pud_val(pud));
+ return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
+diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
+index ec0cab9fbddd0..72ec1d9bd3f31 100644
+--- a/arch/riscv/include/asm/uaccess.h
++++ b/arch/riscv/include/asm/uaccess.h
+@@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
+
+ #define __get_kernel_nofault(dst, src, type, err_label) \
+ do { \
+- long __kr_err; \
++ long __kr_err = 0; \
+ \
+ __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
+ if (unlikely(__kr_err)) \
+@@ -328,7 +328,7 @@ do { \
+
+ #define __put_kernel_nofault(dst, src, type, err_label) \
+ do { \
+- long __kr_err; \
++ long __kr_err = 0; \
+ \
+ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
+ if (unlikely(__kr_err)) \
+diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
+index 10aaa83db89ef..95050ebe9ad00 100644
+--- a/arch/riscv/include/uapi/asm/auxvec.h
++++ b/arch/riscv/include/uapi/asm/auxvec.h
+@@ -34,7 +34,7 @@
+ #define AT_L3_CACHEGEOMETRY 47
+
+ /* entries in ARCH_DLINFO */
+-#define AT_VECTOR_SIZE_ARCH 9
++#define AT_VECTOR_SIZE_ARCH 10
+ #define AT_MINSIGSTKSZ 51
+
+ #endif /* _UAPI_ASM_RISCV_AUXVEC_H */
+diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
+index 37e87fdcf6a00..30e12b310cab7 100644
+--- a/arch/riscv/kernel/patch.c
++++ b/arch/riscv/kernel/patch.c
+@@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
+ */
+ lockdep_assert_held(&text_mutex);
+
++ preempt_disable();
++
+ if (across_pages)
+ patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
+
+@@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
+ if (across_pages)
+ patch_unmap(FIX_TEXT_POKE1);
+
++ preempt_enable();
++
+ return 0;
+ }
+ NOKPROBE_SYMBOL(__patch_insn_set);
+@@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
+ if (!riscv_patch_in_stop_machine)
+ lockdep_assert_held(&text_mutex);
+
++ preempt_disable();
++
+ if (across_pages)
+ patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
+
+@@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
+ if (across_pages)
+ patch_unmap(FIX_TEXT_POKE1);
+
++ preempt_enable();
++
+ return ret;
+ }
+ NOKPROBE_SYMBOL(__patch_insn_write);
+diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
+index e32d737e039fd..83e223318822a 100644
+--- a/arch/riscv/kernel/process.c
++++ b/arch/riscv/kernel/process.c
+@@ -26,8 +26,6 @@
+ #include <asm/cpuidle.h>
+ #include <asm/vector.h>
+
+-register unsigned long gp_in_global __asm__("gp");
+-
+ #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
+ #include <linux/stackprotector.h>
+ unsigned long __stack_chk_guard __read_mostly;
+@@ -186,7 +184,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
+ if (unlikely(args->fn)) {
+ /* Kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+- childregs->gp = gp_in_global;
+ /* Supervisor/Machine, irqs on: */
+ childregs->status = SR_PP | SR_PIE;
+
+diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
+index 39e72aa016a4c..b467ba5ed9100 100644
+--- a/arch/riscv/kvm/aia_aplic.c
++++ b/arch/riscv/kvm/aia_aplic.c
+@@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
+- if (!pending &&
+- ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
+- (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
++ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
+ goto skip_write_pending;
+
++ if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH ||
++ sm == APLIC_SOURCECFG_SM_LEVEL_LOW) {
++ if (!pending)
++ goto skip_write_pending;
++ if ((irqd->state & APLIC_IRQ_STATE_INPUT) &&
++ sm == APLIC_SOURCECFG_SM_LEVEL_LOW)
++ goto skip_write_pending;
++ if (!(irqd->state & APLIC_IRQ_STATE_INPUT) &&
++ sm == APLIC_SOURCECFG_SM_LEVEL_HIGH)
++ goto skip_write_pending;
++ }
++
+ if (pending)
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ else
+@@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
+
+ static bool aplic_read_input(struct aplic *aplic, u32 irq)
+ {
+- bool ret;
+- unsigned long flags;
++ u32 sourcecfg, sm, raw_input, irq_inverted;
+ struct aplic_irq *irqd;
++ unsigned long flags;
++ bool ret = false;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+- ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
++
++ sourcecfg = irqd->sourcecfg;
++ if (sourcecfg & APLIC_SOURCECFG_D)
++ goto skip;
++
++ sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
++ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
++ goto skip;
++
++ raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0;
++ irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
++ sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
++ ret = !!(raw_input ^ irq_inverted);
++
++skip:
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
+index 442a74f113cbf..14e1a73ffcfe6 100644
+--- a/arch/s390/boot/vmem.c
++++ b/arch/s390/boot/vmem.c
+@@ -360,7 +360,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
+ }
+ pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ pud_populate(&init_mm, pud, pmd);
+- } else if (pud_large(*pud)) {
++ } else if (pud_leaf(*pud)) {
+ continue;
+ }
+ pgtable_pmd_populate(pud, addr, next, mode);
+diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
+index fb3ee7758b765..38290b0078c56 100644
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -729,7 +729,7 @@ static inline int pud_bad(pud_t pud)
+ {
+ unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
+
+- if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
++ if (type > _REGION_ENTRY_TYPE_R3 || pud_leaf(pud))
+ return 1;
+ if (type < _REGION_ENTRY_TYPE_R3)
+ return 0;
+@@ -1396,7 +1396,7 @@ static inline unsigned long pud_deref(pud_t pud)
+ unsigned long origin_mask;
+
+ origin_mask = _REGION_ENTRY_ORIGIN;
+- if (pud_large(pud))
++ if (pud_leaf(pud))
+ origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+ return (unsigned long)__va(pud_val(pud) & origin_mask);
+ }
+diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
+index 49a11f6dd7ae9..26c08ee877407 100644
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -653,6 +653,7 @@ SYM_DATA_START_LOCAL(daton_psw)
+ SYM_DATA_END(daton_psw)
+
+ .section .rodata, "a"
++ .balign 8
+ #define SYSCALL(esame,emu) .quad __s390x_ ## esame
+ SYM_DATA_START(sys_call_table)
+ #include "asm/syscall_table.h"
+diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
+index 157e0a8d5157d..d17bb1ef63f41 100644
+--- a/arch/s390/mm/gmap.c
++++ b/arch/s390/mm/gmap.c
+@@ -596,7 +596,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+ pud = pud_offset(p4d, vmaddr);
+ VM_BUG_ON(pud_none(*pud));
+ /* large puds cannot yet be handled */
+- if (pud_large(*pud))
++ if (pud_leaf(*pud))
+ return -EFAULT;
+ pmd = pmd_offset(pud, vmaddr);
+ VM_BUG_ON(pmd_none(*pmd));
+diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
+index 297a6d897d5a0..5f64f3d0fafbb 100644
+--- a/arch/s390/mm/hugetlbpage.c
++++ b/arch/s390/mm/hugetlbpage.c
+@@ -224,7 +224,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
+ if (p4d_present(*p4dp)) {
+ pudp = pud_offset(p4dp, addr);
+ if (pud_present(*pudp)) {
+- if (pud_large(*pudp))
++ if (pud_leaf(*pudp))
+ return (pte_t *) pudp;
+ pmdp = pmd_offset(pudp, addr);
+ }
+@@ -240,7 +240,7 @@ int pmd_huge(pmd_t pmd)
+
+ int pud_huge(pud_t pud)
+ {
+- return pud_large(pud);
++ return pud_leaf(pud);
+ }
+
+ bool __init arch_hugetlb_valid_size(unsigned long size)
+diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
+index b87e96c64b61d..441f654d048d2 100644
+--- a/arch/s390/mm/pageattr.c
++++ b/arch/s390/mm/pageattr.c
+@@ -274,7 +274,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
+ if (pud_none(*pudp))
+ return -EINVAL;
+ next = pud_addr_end(addr, end);
+- if (pud_large(*pudp)) {
++ if (pud_leaf(*pudp)) {
+ need_split = !!(flags & SET_MEMORY_4K);
+ need_split |= !!(addr & ~PUD_MASK);
+ need_split |= !!(addr + PUD_SIZE > next);
+diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
+index 5cb92941540b3..5e349869590a8 100644
+--- a/arch/s390/mm/pgtable.c
++++ b/arch/s390/mm/pgtable.c
+@@ -479,7 +479,7 @@ static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
+ return -ENOENT;
+
+ /* Large PUDs are not supported yet. */
+- if (pud_large(*pud))
++ if (pud_leaf(*pud))
+ return -EFAULT;
+
+ *pmdp = pmd_offset(pud, addr);
+diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
+index 6d276103c6d58..2d3f65da56eea 100644
+--- a/arch/s390/mm/vmem.c
++++ b/arch/s390/mm/vmem.c
+@@ -322,7 +322,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
+ if (!add) {
+ if (pud_none(*pud))
+ continue;
+- if (pud_large(*pud)) {
++ if (pud_leaf(*pud)) {
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
+ IS_ALIGNED(next, PUD_SIZE)) {
+ pud_clear(pud);
+@@ -343,7 +343,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
+ if (!pmd)
+ goto out;
+ pud_populate(&init_mm, pud, pmd);
+- } else if (pud_large(*pud)) {
++ } else if (pud_leaf(*pud)) {
+ continue;
+ }
+ ret = modify_pmd_table(pud, addr, next, add, direct);
+@@ -586,7 +586,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
+ if (!pmd)
+ goto out;
+ pud_populate(&init_mm, pud, pmd);
+- } else if (WARN_ON_ONCE(pud_large(*pud))) {
++ } else if (WARN_ON_ONCE(pud_leaf(*pud))) {
+ goto out;
+ }
+ pmd = pmd_offset(pud, addr);
+diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
+index e507692e51e71..8af02176f68bf 100644
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size)
+ * PLT for hotpatchable calls. The calling convention is the same as for the
+ * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
+ */
+-extern const char bpf_plt[];
+-extern const char bpf_plt_ret[];
+-extern const char bpf_plt_target[];
+-extern const char bpf_plt_end[];
+-#define BPF_PLT_SIZE 32
++struct bpf_plt {
++ char code[16];
++ void *ret;
++ void *target;
++} __packed;
++extern const struct bpf_plt bpf_plt;
+ asm(
+ ".pushsection .rodata\n"
+ " .balign 8\n"
+@@ -531,15 +532,14 @@ asm(
+ " .balign 8\n"
+ "bpf_plt_ret: .quad 0\n"
+ "bpf_plt_target: .quad 0\n"
+- "bpf_plt_end:\n"
+ " .popsection\n"
+ );
+
+-static void bpf_jit_plt(void *plt, void *ret, void *target)
++static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
+ {
+- memcpy(plt, bpf_plt, BPF_PLT_SIZE);
+- *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
+- *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
++ memcpy(plt, &bpf_plt, sizeof(*plt));
++ plt->ret = ret;
++ plt->target = target;
+ }
+
+ /*
+@@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
+ jit->prg = ALIGN(jit->prg, 8);
+ jit->prologue_plt = jit->prg;
+ if (jit->prg_buf)
+- bpf_jit_plt(jit->prg_buf + jit->prg,
++ bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
+ jit->prg_buf + jit->prologue_plt_ret, NULL);
+- jit->prg += BPF_PLT_SIZE;
++ jit->prg += sizeof(struct bpf_plt);
+ }
+
+ static int get_probe_mem_regno(const u8 *insn)
+@@ -1901,9 +1901,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+ struct bpf_jit jit;
+ int pass;
+
+- if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
+- return orig_fp;
+-
+ if (!fp->jit_requested)
+ return orig_fp;
+
+@@ -2009,14 +2006,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
+ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+ {
++ struct bpf_plt expected_plt, current_plt, new_plt, *plt;
+ struct {
+ u16 opc;
+ s32 disp;
+ } __packed insn;
+- char expected_plt[BPF_PLT_SIZE];
+- char current_plt[BPF_PLT_SIZE];
+- char new_plt[BPF_PLT_SIZE];
+- char *plt;
+ char *ret;
+ int err;
+
+@@ -2035,18 +2029,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ */
+ } else {
+ /* Verify the PLT. */
+- plt = (char *)ip + (insn.disp << 1);
+- err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
++ plt = ip + (insn.disp << 1);
++ err = copy_from_kernel_nofault(&current_plt, plt,
++ sizeof(current_plt));
+ if (err < 0)
+ return err;
+ ret = (char *)ip + 6;
+- bpf_jit_plt(expected_plt, ret, old_addr);
+- if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
++ bpf_jit_plt(&expected_plt, ret, old_addr);
++ if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
+ return -EINVAL;
+ /* Adjust the call address. */
+- bpf_jit_plt(new_plt, ret, new_addr);
+- s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
+- new_plt + (bpf_plt_target - bpf_plt),
++ bpf_jit_plt(&new_plt, ret, new_addr);
++ s390_kernel_write(&plt->target, &new_plt.target,
+ sizeof(void *));
+ }
+
+diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
+index f83017992eaae..d7db4e737218c 100644
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -1665,7 +1665,7 @@ bool kern_addr_valid(unsigned long addr)
+ if (pud_none(*pud))
+ return false;
+
+- if (pud_large(*pud))
++ if (pud_leaf(*pud))
+ return pfn_valid(pud_pfn(*pud));
+
+ pmd = pmd_offset(pud, addr);
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 4b81e884a6147..b4e6859542a39 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -2566,6 +2566,31 @@ config MITIGATION_RFDS
+ stored in floating point, vector and integer registers.
+ See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+
++choice
++ prompt "Clear branch history"
++ depends on CPU_SUP_INTEL
++ default SPECTRE_BHI_ON
++ help
++ Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
++ where the branch history buffer is poisoned to speculatively steer
++ indirect branches.
++ See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
++
++config SPECTRE_BHI_ON
++ bool "on"
++ help
++ Equivalent to setting spectre_bhi=on command line parameter.
++config SPECTRE_BHI_OFF
++ bool "off"
++ help
++ Equivalent to setting spectre_bhi=off command line parameter.
++config SPECTRE_BHI_AUTO
++ bool "auto"
++ help
++ Equivalent to setting spectre_bhi=auto command line parameter.
++
++endchoice
++
+ endif
+
+ config ARCH_HAS_ADD_PAGES
+diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
+index 71fc531b95b4e..583c11664c63b 100644
+--- a/arch/x86/boot/compressed/Makefile
++++ b/arch/x86/boot/compressed/Makefile
+@@ -84,7 +84,7 @@ LDFLAGS_vmlinux += -T
+ hostprogs := mkpiggy
+ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+
+-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
++sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+
+ quiet_cmd_voffset = VOFFSET $@
+ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
+diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
+index f711f2a85862e..b5ecbd32a46fa 100644
+--- a/arch/x86/boot/compressed/misc.c
++++ b/arch/x86/boot/compressed/misc.c
+@@ -330,6 +330,7 @@ static size_t parse_elf(void *output)
+ return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
+ }
+
++const unsigned long kernel_text_size = VO___start_rodata - VO__text;
+ const unsigned long kernel_total_size = VO__end - VO__text;
+
+ static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
+@@ -357,6 +358,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+ return entry;
+ }
+
++/*
++ * Set the memory encryption xloadflag based on the mem_encrypt= command line
++ * parameter, if provided.
++ */
++static void parse_mem_encrypt(struct setup_header *hdr)
++{
++ int on = cmdline_find_option_bool("mem_encrypt=on");
++ int off = cmdline_find_option_bool("mem_encrypt=off");
++
++ if (on > off)
++ hdr->xloadflags |= XLF_MEM_ENCRYPTION;
++}
++
+ /*
+ * The compressed kernel image (ZO), has been moved so that its position
+ * is against the end of the buffer used to hold the uncompressed kernel
+@@ -387,6 +401,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
+ /* Clear flags intended for solely in-kernel use. */
+ boot_params->hdr.loadflags &= ~KASLR_FLAG;
+
++ parse_mem_encrypt(&boot_params->hdr);
++
+ sanitize_boot_params(boot_params);
+
+ if (boot_params->screen_info.orig_video_mode == 7) {
+diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
+index 80d76aea1f7bf..0a49218a516a2 100644
+--- a/arch/x86/boot/compressed/sev.c
++++ b/arch/x86/boot/compressed/sev.c
+@@ -116,6 +116,9 @@ static bool fault_in_kernel_space(unsigned long address)
+ #undef __init
+ #define __init
+
++#undef __head
++#define __head
++
+ #define __BOOT_COMPRESSED
+
+ /* Basic instruction decoding support needed */
+diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
+index d07be9d05cd03..ddd4efdc79d66 100644
+--- a/arch/x86/coco/core.c
++++ b/arch/x86/coco/core.c
+@@ -3,13 +3,17 @@
+ * Confidential Computing Platform Capability checks
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
++ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ */
+
+ #include <linux/export.h>
+ #include <linux/cc_platform.h>
++#include <linux/string.h>
++#include <linux/random.h>
+
++#include <asm/archrandom.h>
+ #include <asm/coco.h>
+ #include <asm/processor.h>
+
+@@ -148,3 +152,40 @@ u64 cc_mkdec(u64 val)
+ }
+ }
+ EXPORT_SYMBOL_GPL(cc_mkdec);
++
++__init void cc_random_init(void)
++{
++ /*
++ * The seed is 32 bytes (in units of longs), which is 256 bits, which
++ * is the security level that the RNG is targeting.
++ */
++ unsigned long rng_seed[32 / sizeof(long)];
++ size_t i, longs;
++
++ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
++ return;
++
++ /*
++ * Since the CoCo threat model includes the host, the only reliable
++ * source of entropy that can be neither observed nor manipulated is
++ * RDRAND. Usually, RDRAND failure is considered tolerable, but since
++ * CoCo guests have no other unobservable source of entropy, it's
++ * important to at least ensure the RNG gets some initial random seeds.
++ */
++ for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) {
++ longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i);
++
++ /*
++ * A zero return value means that the guest doesn't have RDRAND
++ * or the CPU is physically broken, and in both cases that
++ * means most crypto inside of the CoCo instance will be
++ * broken, defeating the purpose of CoCo in the first place. So
++ * just panic here because it's absolutely unsafe to continue
++ * executing.
++ */
++ if (longs == 0)
++ panic("RDRAND is defective.");
++ }
++ add_device_randomness(rng_seed, sizeof(rng_seed));
++ memzero_explicit(rng_seed, sizeof(rng_seed));
++}
+diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
+index 9c0b26ae51069..e72dac092245a 100644
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -48,7 +48,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
+
+ if (likely(unr < NR_syscalls)) {
+ unr = array_index_nospec(unr, NR_syscalls);
+- regs->ax = sys_call_table[unr](regs);
++ regs->ax = x64_sys_call(regs, unr);
+ return true;
+ }
+ return false;
+@@ -65,7 +65,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
+
+ if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
+ xnr = array_index_nospec(xnr, X32_NR_syscalls);
+- regs->ax = x32_sys_call_table[xnr](regs);
++ regs->ax = x32_sys_call(regs, xnr);
+ return true;
+ }
+ return false;
+@@ -114,7 +114,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
+
+ if (likely(unr < IA32_NR_syscalls)) {
+ unr = array_index_nospec(unr, IA32_NR_syscalls);
+- regs->ax = ia32_sys_call_table[unr](regs);
++ regs->ax = ia32_sys_call(regs, unr);
+ } else if (nr != -1) {
+ regs->ax = __ia32_sys_ni_syscall(regs);
+ }
+@@ -141,7 +141,7 @@ static __always_inline bool int80_is_external(void)
+ }
+
+ /**
+- * int80_emulation - 32-bit legacy syscall entry
++ * do_int80_emulation - 32-bit legacy syscall C entry from asm
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
+@@ -159,7 +159,7 @@ static __always_inline bool int80_is_external(void)
+ * eax: system call number
+ * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
+ */
+-DEFINE_IDTENTRY_RAW(int80_emulation)
++__visible noinstr void do_int80_emulation(struct pt_regs *regs)
+ {
+ int nr;
+
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 9f97a8bd11e81..5d96561c0d6ad 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
+ /* clobbers %rax, make sure it is after saving the syscall nr */
+ IBRS_ENTER
+ UNTRAIN_RET
++ CLEAR_BRANCH_HISTORY
+
+ call do_syscall_64 /* returns with IRQs disabled */
+
+@@ -1549,3 +1550,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
+ call make_task_dead
+ SYM_CODE_END(rewind_stack_and_make_dead)
+ .popsection
++
++/*
++ * This sequence executes branches in order to remove user branch information
++ * from the branch history tracker in the Branch Predictor, therefore removing
++ * user influence on subsequent BTB lookups.
++ *
++ * It should be used on parts prior to Alder Lake. Newer parts should use the
++ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
++ * virtualized on newer hardware the VMM should protect against BHI attacks by
++ * setting BHI_DIS_S for the guests.
++ *
++ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
++ * and not clearing the branch history. The call tree looks like:
++ *
++ * call 1
++ * call 2
++ * call 2
++ * call 2
++ * call 2
++ * call 2
++ * ret
++ * ret
++ * ret
++ * ret
++ * ret
++ * ret
++ *
++ * This means that the stack is non-constant and ORC can't unwind it with %rsp
++ * alone. Therefore we unconditionally set up the frame pointer, which allows
++ * ORC to unwind properly.
++ *
++ * The alignment is for performance and not for safety, and may be safely
++ * refactored in the future if needed.
++ */
++SYM_FUNC_START(clear_bhb_loop)
++ push %rbp
++ mov %rsp, %rbp
++ movl $5, %ecx
++ ANNOTATE_INTRA_FUNCTION_CALL
++ call 1f
++ jmp 5f
++ .align 64, 0xcc
++ ANNOTATE_INTRA_FUNCTION_CALL
++1: call 2f
++ RET
++ .align 64, 0xcc
++2: movl $5, %eax
++3: jmp 4f
++ nop
++4: sub $1, %eax
++ jnz 3b
++ sub $1, %ecx
++ jnz 1b
++ RET
++5: lfence
++ pop %rbp
++ RET
++SYM_FUNC_END(clear_bhb_loop)
++EXPORT_SYMBOL_GPL(clear_bhb_loop)
++STACK_FRAME_NON_STANDARD(clear_bhb_loop)
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 306181e4fcb90..4c1dfc51c56e4 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+
+ IBRS_ENTER
+ UNTRAIN_RET
++ CLEAR_BRANCH_HISTORY
+
+ /*
+ * SYSENTER doesn't filter flags, so we need to clear NT and AC
+@@ -209,6 +210,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
+
+ IBRS_ENTER
+ UNTRAIN_RET
++ CLEAR_BRANCH_HISTORY
+
+ movq %rsp, %rdi
+ call do_fast_syscall_32
+@@ -277,3 +279,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+ int3
+ SYM_CODE_END(entry_SYSCALL_compat)
++
++/*
++ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
++ * point to C routines, however since this is a system call interface the branch
++ * history needs to be scrubbed to protect against BHI attacks, and that
++ * scrubbing needs to take place in assembly code prior to entering any C
++ * routines.
++ */
++SYM_CODE_START(int80_emulation)
++ ANNOTATE_NOENDBR
++ UNWIND_HINT_FUNC
++ CLEAR_BRANCH_HISTORY
++ jmp do_int80_emulation
++SYM_CODE_END(int80_emulation)
+diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
+index 8cfc9bc73e7f8..c2235bae17ef6 100644
+--- a/arch/x86/entry/syscall_32.c
++++ b/arch/x86/entry/syscall_32.c
+@@ -18,8 +18,25 @@
+ #include <asm/syscalls_32.h>
+ #undef __SYSCALL
+
++/*
++ * The sys_call_table[] is no longer used for system calls, but
++ * kernel/trace/trace_syscalls.c still wants to know the system
++ * call address.
++ */
++#ifdef CONFIG_X86_32
+ #define __SYSCALL(nr, sym) __ia32_##sym,
+-
+-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
++const sys_call_ptr_t sys_call_table[] = {
+ #include <asm/syscalls_32.h>
+ };
++#undef __SYSCALL
++#endif
++
++#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
++
++long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
++{
++ switch (nr) {
++ #include <asm/syscalls_32.h>
++ default: return __ia32_sys_ni_syscall(regs);
++ }
++};
+diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
+index be120eec1fc9f..33b3f09e6f151 100644
+--- a/arch/x86/entry/syscall_64.c
++++ b/arch/x86/entry/syscall_64.c
+@@ -11,8 +11,23 @@
+ #include <asm/syscalls_64.h>
+ #undef __SYSCALL
+
++/*
++ * The sys_call_table[] is no longer used for system calls, but
++ * kernel/trace/trace_syscalls.c still wants to know the system
++ * call address.
++ */
+ #define __SYSCALL(nr, sym) __x64_##sym,
+-
+-asmlinkage const sys_call_ptr_t sys_call_table[] = {
++const sys_call_ptr_t sys_call_table[] = {
+ #include <asm/syscalls_64.h>
+ };
++#undef __SYSCALL
++
++#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
++
++long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
++{
++ switch (nr) {
++ #include <asm/syscalls_64.h>
++ default: return __x64_sys_ni_syscall(regs);
++ }
++};
+diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
+index bdd0e03a1265d..03de4a9321318 100644
+--- a/arch/x86/entry/syscall_x32.c
++++ b/arch/x86/entry/syscall_x32.c
+@@ -11,8 +11,12 @@
+ #include <asm/syscalls_x32.h>
+ #undef __SYSCALL
+
+-#define __SYSCALL(nr, sym) __x64_##sym,
++#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
+-#include <asm/syscalls_x32.h>
++long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
++{
++ switch (nr) {
++ #include <asm/syscalls_x32.h>
++ default: return __x64_sys_ni_syscall(regs);
++ }
+ };
+diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
+index 5365d6acbf090..8ed10366c4a27 100644
+--- a/arch/x86/events/amd/core.c
++++ b/arch/x86/events/amd/core.c
+@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
+ /*
+ * AMD Performance Monitor Family 17h and later:
+ */
+-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
++static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
+ {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
+ };
+
++static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
++{
++ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
++ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
++ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
++ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
++ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
++ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
++ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
++};
++
+ static u64 amd_pmu_event_map(int hw_event)
+ {
+- if (boot_cpu_data.x86 >= 0x17)
+- return amd_f17h_perfmon_event_map[hw_event];
++ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
++ return amd_zen2_perfmon_event_map[hw_event];
++
++ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
++ return amd_zen1_perfmon_event_map[hw_event];
+
+ return amd_perfmon_event_map[hw_event];
+ }
+@@ -904,8 +918,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
+ if (!status)
+ goto done;
+
+- /* Read branch records before unfreezing */
+- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
++ /* Read branch records */
++ if (x86_pmu.lbr_nr) {
+ amd_pmu_lbr_read();
+ status &= ~GLOBAL_STATUS_LBRS_FROZEN;
+ }
+diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
+index eb31f850841a8..110e34c59643a 100644
+--- a/arch/x86/events/amd/lbr.c
++++ b/arch/x86/events/amd/lbr.c
+@@ -400,10 +400,12 @@ void amd_pmu_lbr_enable_all(void)
+ wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
+ }
+
+- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
++ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
++ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
++ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
++ }
+
+- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
++ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
+ }
+
+@@ -416,10 +418,12 @@ void amd_pmu_lbr_disable_all(void)
+ return;
+
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+-
+ wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
+- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
++
++ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
++ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
++ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
++ }
+ }
+
+ __init int amd_pmu_lbr_init(void)
+diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
+index eb8dd8b8a1e86..2b53f696c3c96 100644
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -1236,11 +1236,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
+ struct pmu *pmu = event->pmu;
+
+ /*
+- * Make sure we get updated with the first PEBS
+- * event. It will trigger also during removal, but
+- * that does not hurt:
++ * Make sure we get updated with the first PEBS event.
++ * During removal, ->pebs_data_cfg is still valid for
++ * the last PEBS event. Don't clear it.
+ */
+- if (cpuc->n_pebs == 1)
++ if ((cpuc->n_pebs == 1) && add)
+ cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
+
+ if (needed_cb != pebs_needs_sched_cb(cpuc)) {
+diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
+index b1a98fa38828e..0e82074517f6b 100644
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -13,6 +13,7 @@
+ #include <asm/preempt.h>
+ #include <asm/asm.h>
+ #include <asm/gsseg.h>
++#include <asm/nospec-branch.h>
+
+ #ifndef CONFIG_X86_CMPXCHG64
+ extern void cmpxchg8b_emu(void);
+diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
+index b3a7cfb0d99e0..c945c893c52e0 100644
+--- a/arch/x86/include/asm/boot.h
++++ b/arch/x86/include/asm/boot.h
+@@ -81,6 +81,7 @@
+
+ #ifndef __ASSEMBLY__
+ extern unsigned int output_len;
++extern const unsigned long kernel_text_size;
+ extern const unsigned long kernel_total_size;
+
+ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
+index 21940ef8d2904..de03537a01823 100644
+--- a/arch/x86/include/asm/coco.h
++++ b/arch/x86/include/asm/coco.h
+@@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask)
+
+ u64 cc_mkenc(u64 val);
+ u64 cc_mkdec(u64 val);
++void cc_random_init(void);
+ #else
+ static inline u64 cc_mkenc(u64 val)
+ {
+@@ -32,6 +33,7 @@ static inline u64 cc_mkdec(u64 val)
+ {
+ return val;
+ }
++static inline void cc_random_init(void) { }
+ #endif
+
+ #endif /* _ASM_X86_COCO_H */
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index a1273698fc430..686e92d2663ee 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -33,6 +33,8 @@ enum cpuid_leafs
+ CPUID_7_EDX,
+ CPUID_8000_001F_EAX,
+ CPUID_8000_0021_EAX,
++ CPUID_LNX_5,
++ NR_CPUID_WORDS,
+ };
+
+ #define X86_CAP_FMT_NUM "%d:%d"
+@@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
++ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
+ REQUIRED_MASK_CHECK || \
+- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
++ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
+
+ #define DISABLED_MASK_BIT_SET(feature_bit) \
+ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
+@@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
++ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
+ DISABLED_MASK_CHECK || \
+- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
++ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
+
+ #define cpu_has(c, bit) \
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index bd33f6366c80d..8c1593dd2c317 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -13,7 +13,7 @@
+ /*
+ * Defines x86 CPU feature bits
+ */
+-#define NCAPINTS 21 /* N 32-bit words worth of info */
++#define NCAPINTS 22 /* N 32-bit words worth of info */
+ #define NBUGINTS 2 /* N 32-bit bug flags */
+
+ /*
+@@ -218,7 +218,7 @@
+ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
+ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
+ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
+-#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */
++#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
+ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
+ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
+ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
+@@ -312,6 +312,10 @@
+ #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
+ #define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+ #define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
++#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
++#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
++#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
++#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */
+
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
+@@ -452,6 +456,18 @@
+ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+
++/*
++ * Extended auxiliary flags: Linux defined - for features scattered in various
++ * CPUID levels like 0x80000022, etc and Linux defined features.
++ *
++ * Reuse free bits when adding new feature flags!
++ */
++#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
++#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
++#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
++#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
++#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
++
+ /*
+ * BUG word(s)
+ */
+@@ -499,4 +515,5 @@
+ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
+ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
+ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
++#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
+index 702d93fdd10e8..88fcf08458d9c 100644
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -143,6 +143,7 @@
+ #define DISABLED_MASK18 (DISABLE_IBT)
+ #define DISABLED_MASK19 0
+ #define DISABLED_MASK20 0
+-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
++#define DISABLED_MASK21 0
++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
+
+ #endif /* _ASM_X86_DISABLED_FEATURES_H */
+diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
+index 5f1d3c421f686..cc9ccf61b6bd1 100644
+--- a/arch/x86/include/asm/init.h
++++ b/arch/x86/include/asm/init.h
+@@ -2,6 +2,8 @@
+ #ifndef _ASM_X86_INIT_H
+ #define _ASM_X86_INIT_H
+
++#define __head __section(".head.text")
++
+ struct x86_mapping_info {
+ void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+ void *context; /* context for alloc_pgt_page */
+diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
+index f4f5269846295..76081a34fc231 100644
+--- a/arch/x86/include/asm/mem_encrypt.h
++++ b/arch/x86/include/asm/mem_encrypt.h
+@@ -46,8 +46,8 @@ void __init sme_unmap_bootdata(char *real_mode_data);
+ void __init sme_early_init(void);
+ void __init sev_setup_arch(void);
+
+-void __init sme_encrypt_kernel(struct boot_params *bp);
+-void __init sme_enable(struct boot_params *bp);
++void sme_encrypt_kernel(struct boot_params *bp);
++void sme_enable(struct boot_params *bp);
+
+ int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
+ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+@@ -81,8 +81,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
+ static inline void __init sme_early_init(void) { }
+ static inline void __init sev_setup_arch(void) { }
+
+-static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
+-static inline void __init sme_enable(struct boot_params *bp) { }
++static inline void sme_encrypt_kernel(struct boot_params *bp) { }
++static inline void sme_enable(struct boot_params *bp) { }
+
+ static inline void sev_es_init_vc_handling(void) { }
+
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index c75cc5610be30..621bac6b74011 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -50,10 +50,13 @@
+ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
++#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
++#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
+
+ /* A mask for bits which the kernel toggles when controlling mitigations */
+ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
+- | SPEC_CTRL_RRSBA_DIS_S)
++ | SPEC_CTRL_RRSBA_DIS_S \
++ | SPEC_CTRL_BHI_DIS_S)
+
+ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
+@@ -152,6 +155,10 @@
+ * are restricted to targets in
+ * kernel.
+ */
++#define ARCH_CAP_BHI_NO BIT(20) /*
++ * CPU is not affected by Branch
++ * History Injection.
++ */
+ #define ARCH_CAP_PBRSB_NO BIT(24) /*
+ * Not susceptible to Post-Barrier
+ * Return Stack Buffer Predictions.
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
+index 8ae2cb30ade3d..a8781c8763b44 100644
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -271,11 +271,20 @@
+ .Lskip_rsb_\@:
+ .endm
+
++/*
++ * The CALL to srso_alias_untrain_ret() must be patched in directly at
++ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
++ * must be the target of a CALL instruction instead of indirectly
++ * jumping to a wrapper which then calls it. Therefore, this macro is
++ * called outside of __UNTRAIN_RET below, for the time being, before the
++ * kernel can support nested alternatives with arbitrary nesting.
++ */
++.macro CALL_UNTRAIN_RET
+ #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
+-#define CALL_UNTRAIN_RET "call entry_untrain_ret"
+-#else
+-#define CALL_UNTRAIN_RET ""
++ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
++ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+ #endif
++.endm
+
+ /*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+@@ -288,38 +297,24 @@
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+-.macro UNTRAIN_RET
+-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
++.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
++#if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY)
+ VALIDATE_UNRET_END
+- ALTERNATIVE_3 "", \
+- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
+- __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
++ CALL_UNTRAIN_RET
++ ALTERNATIVE_2 "", \
++ "call entry_ibpb", \ibpb_feature, \
++ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
+ #endif
+ .endm
+
+-.macro UNTRAIN_RET_VM
+-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+- VALIDATE_UNRET_END
+- ALTERNATIVE_3 "", \
+- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+- "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT, \
+- __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+-#endif
+-.endm
++#define UNTRAIN_RET \
++ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)
+
+-.macro UNTRAIN_RET_FROM_CALL
+-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+- VALIDATE_UNRET_END
+- ALTERNATIVE_3 "", \
+- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
+- __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
+-#endif
+-.endm
++#define UNTRAIN_RET_VM \
++ __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)
++
++#define UNTRAIN_RET_FROM_CALL \
++ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)
+
+
+ .macro CALL_DEPTH_ACCOUNT
+@@ -340,6 +335,19 @@
+ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+ .endm
+
++#ifdef CONFIG_X86_64
++.macro CLEAR_BRANCH_HISTORY
++ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
++.endm
++
++.macro CLEAR_BRANCH_HISTORY_VMEXIT
++ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
++.endm
++#else
++#define CLEAR_BRANCH_HISTORY
++#define CLEAR_BRANCH_HISTORY_VMEXIT
++#endif
++
+ #else /* __ASSEMBLY__ */
+
+ #define ANNOTATE_RETPOLINE_SAFE \
+@@ -359,6 +367,22 @@ extern void __x86_return_thunk(void);
+ static inline void __x86_return_thunk(void) {}
+ #endif
+
++#ifdef CONFIG_CPU_UNRET_ENTRY
++extern void retbleed_return_thunk(void);
++#else
++static inline void retbleed_return_thunk(void) {}
++#endif
++
++extern void srso_alias_untrain_ret(void);
++
++#ifdef CONFIG_CPU_SRSO
++extern void srso_return_thunk(void);
++extern void srso_alias_return_thunk(void);
++#else
++static inline void srso_return_thunk(void) {}
++static inline void srso_alias_return_thunk(void) {}
++#endif
++
+ extern void retbleed_return_thunk(void);
+ extern void srso_return_thunk(void);
+ extern void srso_alias_return_thunk(void);
+@@ -370,6 +394,10 @@ extern void srso_alias_untrain_ret(void);
+ extern void entry_untrain_ret(void);
+ extern void entry_ibpb(void);
+
++#ifdef CONFIG_X86_64
++extern void clear_bhb_loop(void);
++#endif
++
+ extern void (*x86_return_thunk)(void);
+
+ #ifdef CONFIG_CALL_DEPTH_TRACKING
+diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
+index 7ba1726b71c7b..e9187ddd3d1fd 100644
+--- a/arch/x86/include/asm/required-features.h
++++ b/arch/x86/include/asm/required-features.h
+@@ -99,6 +99,7 @@
+ #define REQUIRED_MASK18 0
+ #define REQUIRED_MASK19 0
+ #define REQUIRED_MASK20 0
+-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
++#define REQUIRED_MASK21 0
++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
+
+ #endif /* _ASM_X86_REQUIRED_FEATURES_H */
+diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
+index 36f905797075e..75a5388d40681 100644
+--- a/arch/x86/include/asm/sev.h
++++ b/arch/x86/include/asm/sev.h
+@@ -199,15 +199,15 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
+ struct snp_guest_request_ioctl;
+
+ void setup_ghcb(void);
+-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+- unsigned long npages);
+-void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+- unsigned long npages);
++void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
++ unsigned long npages);
++void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
++ unsigned long npages);
+ void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
+ void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
+ void snp_set_wakeup_secondary_cpu(void);
+ bool snp_init(struct boot_params *bp);
+-void __init __noreturn snp_abort(void);
++void __noreturn snp_abort(void);
+ void snp_dmi_setup(void);
+ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
+ void snp_accept_memory(phys_addr_t start, phys_addr_t end);
+diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
+index 4fb36fba4b5a1..03bb950eba690 100644
+--- a/arch/x86/include/asm/syscall.h
++++ b/arch/x86/include/asm/syscall.h
+@@ -16,19 +16,17 @@
+ #include <asm/thread_info.h> /* for TS_COMPAT */
+ #include <asm/unistd.h>
+
++/* This is used purely for kernel/trace/trace_syscalls.c */
+ typedef long (*sys_call_ptr_t)(const struct pt_regs *);
+ extern const sys_call_ptr_t sys_call_table[];
+
+-#if defined(CONFIG_X86_32)
+-#define ia32_sys_call_table sys_call_table
+-#else
+ /*
+ * These may not exist, but still put the prototypes in so we
+ * can use IS_ENABLED().
+ */
+-extern const sys_call_ptr_t ia32_sys_call_table[];
+-extern const sys_call_ptr_t x32_sys_call_table[];
+-#endif
++extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
++extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
++extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
+
+ /*
+ * Only the low 32 bits of orig_ax are meaningful, so we return int.
+@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
+ }
+
+ void do_syscall_64(struct pt_regs *regs, int nr);
++void do_int80_emulation(struct pt_regs *regs);
+
+ #endif /* CONFIG_X86_32 */
+
+diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
+index 01d19fc223463..eeea058cf6028 100644
+--- a/arch/x86/include/uapi/asm/bootparam.h
++++ b/arch/x86/include/uapi/asm/bootparam.h
+@@ -38,6 +38,7 @@
+ #define XLF_EFI_KEXEC (1<<4)
+ #define XLF_5LEVEL (1<<5)
+ #define XLF_5LEVEL_ENABLED (1<<6)
++#define XLF_MEM_ENCRYPTION (1<<7)
+
+ #ifndef __ASSEMBLY__
+
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index 031bca974fbf3..9fd91022d92d0 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -66,20 +66,6 @@ static const int amd_erratum_400[] =
+ static const int amd_erratum_383[] =
+ AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+
+-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+-static const int amd_erratum_1054[] =
+- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+-
+-static const int amd_zenbleed[] =
+- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
+- AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
+- AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
+- AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
+-
+-static const int amd_div0[] =
+- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+- AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+-
+ static const int amd_erratum_1485[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
+ AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
+@@ -620,6 +606,49 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
+ }
+
+ resctrl_cpu_detect(c);
++
++ /* Figure out Zen generations: */
++ switch (c->x86) {
++ case 0x17: {
++ switch (c->x86_model) {
++ case 0x00 ... 0x2f:
++ case 0x50 ... 0x5f:
++ setup_force_cpu_cap(X86_FEATURE_ZEN1);
++ break;
++ case 0x30 ... 0x4f:
++ case 0x60 ... 0x7f:
++ case 0x90 ... 0x91:
++ case 0xa0 ... 0xaf:
++ setup_force_cpu_cap(X86_FEATURE_ZEN2);
++ break;
++ default:
++ goto warn;
++ }
++ break;
++ }
++ case 0x19: {
++ switch (c->x86_model) {
++ case 0x00 ... 0x0f:
++ case 0x20 ... 0x5f:
++ setup_force_cpu_cap(X86_FEATURE_ZEN3);
++ break;
++ case 0x10 ... 0x1f:
++ case 0x60 ... 0xaf:
++ setup_force_cpu_cap(X86_FEATURE_ZEN4);
++ break;
++ default:
++ goto warn;
++ }
++ break;
++ }
++ default:
++ break;
++ }
++
++ return;
++
++warn:
++ WARN_ONCE(1, "Family 0x%x, model: 0x%x??\n", c->x86, c->x86_model);
+ }
+
+ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
+@@ -945,6 +974,19 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
+ clear_rdrand_cpuid_bit(c);
+ }
+
++static void fix_erratum_1386(struct cpuinfo_x86 *c)
++{
++ /*
++ * Work around Erratum 1386. The XSAVES instruction malfunctions in
++ * certain circumstances on Zen1/2 uarch, and not all parts have had
++ * updated microcode at the time of writing (March 2023).
++ *
++ * Affected parts all have no supervisor XSAVE states, meaning that
++ * the XSAVEC instruction (which works fine) is equivalent.
++ */
++ clear_cpu_cap(c, X86_FEATURE_XSAVES);
++}
++
+ void init_spectral_chicken(struct cpuinfo_x86 *c)
+ {
+ #ifdef CONFIG_CPU_UNRET_ENTRY
+@@ -965,24 +1007,19 @@ void init_spectral_chicken(struct cpuinfo_x86 *c)
+ }
+ }
+ #endif
+- /*
+- * Work around Erratum 1386. The XSAVES instruction malfunctions in
+- * certain circumstances on Zen1/2 uarch, and not all parts have had
+- * updated microcode at the time of writing (March 2023).
+- *
+- * Affected parts all have no supervisor XSAVE states, meaning that
+- * the XSAVEC instruction (which works fine) is equivalent.
+- */
+- clear_cpu_cap(c, X86_FEATURE_XSAVES);
+ }
+
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+ {
+- set_cpu_cap(c, X86_FEATURE_ZEN);
+-
++ setup_force_cpu_cap(X86_FEATURE_ZEN);
+ #ifdef CONFIG_NUMA
+ node_reclaim_distance = 32;
+ #endif
++}
++
++static void init_amd_zen1(struct cpuinfo_x86 *c)
++{
++ fix_erratum_1386(c);
+
+ /* Fix up CPUID bits, but only if not virtualised. */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
+@@ -999,6 +1036,9 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
+ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
+ set_cpu_cap(c, X86_FEATURE_BTC_NO);
+ }
++
++ pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
++ setup_force_cpu_bug(X86_BUG_DIV0);
+ }
+
+ static bool cpu_has_zenbleed_microcode(void)
+@@ -1023,11 +1063,8 @@ static bool cpu_has_zenbleed_microcode(void)
+ return true;
+ }
+
+-static void zenbleed_check(struct cpuinfo_x86 *c)
++static void zen2_zenbleed_check(struct cpuinfo_x86 *c)
+ {
+- if (!cpu_has_amd_erratum(c, amd_zenbleed))
+- return;
+-
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return;
+
+@@ -1042,6 +1079,20 @@ static void zenbleed_check(struct cpuinfo_x86 *c)
+ }
+ }
+
++static void init_amd_zen2(struct cpuinfo_x86 *c)
++{
++ fix_erratum_1386(c);
++ zen2_zenbleed_check(c);
++}
++
++static void init_amd_zen3(struct cpuinfo_x86 *c)
++{
++}
++
++static void init_amd_zen4(struct cpuinfo_x86 *c)
++{
++}
++
+ static void init_amd(struct cpuinfo_x86 *c)
+ {
+ early_init_amd(c);
+@@ -1080,6 +1131,15 @@ static void init_amd(struct cpuinfo_x86 *c)
+ case 0x19: init_amd_zn(c); break;
+ }
+
++ if (boot_cpu_has(X86_FEATURE_ZEN1))
++ init_amd_zen1(c);
++ else if (boot_cpu_has(X86_FEATURE_ZEN2))
++ init_amd_zen2(c);
++ else if (boot_cpu_has(X86_FEATURE_ZEN3))
++ init_amd_zen3(c);
++ else if (boot_cpu_has(X86_FEATURE_ZEN4))
++ init_amd_zen4(c);
++
+ /*
+ * Enable workaround for FXSAVE leak on CPUs
+ * without a XSaveErPtr feature
+@@ -1131,7 +1191,7 @@ static void init_amd(struct cpuinfo_x86 *c)
+ * Counter May Be Inaccurate".
+ */
+ if (cpu_has(c, X86_FEATURE_IRPERF) &&
+- !cpu_has_amd_erratum(c, amd_erratum_1054))
++ (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f))
+ msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
+
+ check_null_seg_clears_base(c);
+@@ -1147,13 +1207,6 @@ static void init_amd(struct cpuinfo_x86 *c)
+ cpu_has(c, X86_FEATURE_AUTOIBRS))
+ WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
+
+- zenbleed_check(c);
+-
+- if (cpu_has_amd_erratum(c, amd_div0)) {
+- pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+- setup_force_cpu_bug(X86_BUG_DIV0);
+- }
+-
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ cpu_has_amd_erratum(c, amd_erratum_1485))
+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+@@ -1313,7 +1366,7 @@ static void zenbleed_check_cpu(void *unused)
+ {
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+
+- zenbleed_check(c);
++ zen2_zenbleed_check(c);
+ }
+
+ void amd_check_microcode(void)
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 3452f7271d074..3fc2301556271 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -63,7 +63,7 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd);
+
+ static DEFINE_MUTEX(spec_ctrl_mutex);
+
+-void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
++void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
+
+ /* Update SPEC_CTRL MSR and its cached copy unconditionally */
+ static void update_spec_ctrl(u64 val)
+@@ -1108,8 +1108,7 @@ static void __init retbleed_select_mitigation(void)
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+- if (IS_ENABLED(CONFIG_RETHUNK))
+- x86_return_thunk = retbleed_return_thunk;
++ x86_return_thunk = retbleed_return_thunk;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+@@ -1607,6 +1606,79 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
+ dump_stack();
+ }
+
++/*
++ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
++ * branch history in userspace. Not needed if BHI_NO is set.
++ */
++static bool __init spec_ctrl_bhi_dis(void)
++{
++ if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
++ return false;
++
++ x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
++ update_spec_ctrl(x86_spec_ctrl_base);
++ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
++
++ return true;
++}
++
++enum bhi_mitigations {
++ BHI_MITIGATION_OFF,
++ BHI_MITIGATION_ON,
++ BHI_MITIGATION_AUTO,
++};
++
++static enum bhi_mitigations bhi_mitigation __ro_after_init =
++ IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON :
++ IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF :
++ BHI_MITIGATION_AUTO;
++
++static int __init spectre_bhi_parse_cmdline(char *str)
++{
++ if (!str)
++ return -EINVAL;
++
++ if (!strcmp(str, "off"))
++ bhi_mitigation = BHI_MITIGATION_OFF;
++ else if (!strcmp(str, "on"))
++ bhi_mitigation = BHI_MITIGATION_ON;
++ else if (!strcmp(str, "auto"))
++ bhi_mitigation = BHI_MITIGATION_AUTO;
++ else
++ pr_err("Ignoring unknown spectre_bhi option (%s)", str);
++
++ return 0;
++}
++early_param("spectre_bhi", spectre_bhi_parse_cmdline);
++
++static void __init bhi_select_mitigation(void)
++{
++ if (bhi_mitigation == BHI_MITIGATION_OFF)
++ return;
++
++ /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
++ !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
++ return;
++
++ if (spec_ctrl_bhi_dis())
++ return;
++
++ if (!IS_ENABLED(CONFIG_X86_64))
++ return;
++
++ /* Mitigate KVM by default */
++ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
++ pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
++
++ if (bhi_mitigation == BHI_MITIGATION_AUTO)
++ return;
++
++ /* Mitigate syscalls when the mitigation is forced =on */
++ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
++ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
++}
++
+ static void __init spectre_v2_select_mitigation(void)
+ {
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+@@ -1718,6 +1790,9 @@ static void __init spectre_v2_select_mitigation(void)
+ mode == SPECTRE_V2_RETPOLINE)
+ spec_ctrl_disable_kernel_rrsba();
+
++ if (boot_cpu_has(X86_BUG_BHI))
++ bhi_select_mitigation();
++
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+
+@@ -2695,15 +2770,15 @@ static char *stibp_state(void)
+
+ switch (spectre_v2_user_stibp) {
+ case SPECTRE_V2_USER_NONE:
+- return ", STIBP: disabled";
++ return "; STIBP: disabled";
+ case SPECTRE_V2_USER_STRICT:
+- return ", STIBP: forced";
++ return "; STIBP: forced";
+ case SPECTRE_V2_USER_STRICT_PREFERRED:
+- return ", STIBP: always-on";
++ return "; STIBP: always-on";
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ if (static_key_enabled(&switch_to_cond_stibp))
+- return ", STIBP: conditional";
++ return "; STIBP: conditional";
+ }
+ return "";
+ }
+@@ -2712,10 +2787,10 @@ static char *ibpb_state(void)
+ {
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ if (static_key_enabled(&switch_mm_always_ibpb))
+- return ", IBPB: always-on";
++ return "; IBPB: always-on";
+ if (static_key_enabled(&switch_mm_cond_ibpb))
+- return ", IBPB: conditional";
+- return ", IBPB: disabled";
++ return "; IBPB: conditional";
++ return "; IBPB: disabled";
+ }
+ return "";
+ }
+@@ -2725,14 +2800,31 @@ static char *pbrsb_eibrs_state(void)
+ if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
+ if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
+ boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
+- return ", PBRSB-eIBRS: SW sequence";
++ return "; PBRSB-eIBRS: SW sequence";
+ else
+- return ", PBRSB-eIBRS: Vulnerable";
++ return "; PBRSB-eIBRS: Vulnerable";
+ } else {
+- return ", PBRSB-eIBRS: Not affected";
++ return "; PBRSB-eIBRS: Not affected";
+ }
+ }
+
++static const char * const spectre_bhi_state(void)
++{
++ if (!boot_cpu_has_bug(X86_BUG_BHI))
++ return "; BHI: Not affected";
++ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
++ return "; BHI: BHI_DIS_S";
++ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
++ return "; BHI: SW loop, KVM: SW loop";
++ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
++ !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
++ return "; BHI: Retpoline";
++ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
++ return "; BHI: Syscall hardening, KVM: SW loop";
++
++ return "; BHI: Vulnerable (Syscall hardening enabled)";
++}
++
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+ if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+@@ -2745,13 +2837,15 @@ static ssize_t spectre_v2_show_state(char *buf)
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
+
+- return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
++ return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
+ spectre_v2_strings[spectre_v2_enabled],
+ ibpb_state(),
+- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
++ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
+ stibp_state(),
+- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
++ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
+ pbrsb_eibrs_state(),
++ spectre_bhi_state(),
++ /* this should always be at the end */
+ spectre_v2_module_string());
+ }
+
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 73cfac3fc9c4c..fc4c9a7fb1e3d 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1165,6 +1165,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+ #define NO_SPECTRE_V2 BIT(8)
+ #define NO_MMIO BIT(9)
+ #define NO_EIBRS_PBRSB BIT(10)
++#define NO_BHI BIT(11)
+
+ #define VULNWL(vendor, family, model, whitelist) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
+@@ -1227,18 +1228,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
+ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+
+ /* AMD Family 0xf - 0x12 */
+- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+
+ /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
++ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+
+ /* Zhaoxin Family 7 */
+- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
++ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
++ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+ {}
+ };
+
+@@ -1475,6 +1476,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+ if (vulnerable_to_rfds(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_RFDS);
+
++ /* When virtualized, eIBRS could be hidden, assume vulnerable */
++ if (!(ia32_cap & ARCH_CAP_BHI_NO) &&
++ !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
++ (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
++ boot_cpu_has(X86_FEATURE_HYPERVISOR)))
++ setup_force_cpu_bug(X86_BUG_BHI);
++
+ if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+ return;
+
+diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
+index 20ab11aec60b8..e103c227acd3a 100644
+--- a/arch/x86/kernel/cpu/mce/core.c
++++ b/arch/x86/kernel/cpu/mce/core.c
+@@ -2468,12 +2468,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
+ return -EINVAL;
+
+ b = &per_cpu(mce_banks_array, s->id)[bank];
+-
+ if (!b->init)
+ return -ENODEV;
+
+ b->ctl = new;
++
++ mutex_lock(&mce_sysfs_mutex);
+ mce_restart();
++ mutex_unlock(&mce_sysfs_mutex);
+
+ return size;
+ }
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
+index 0dad49a09b7a9..af5aa2c754c22 100644
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
+ { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
++ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
+ { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
+ { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
+@@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = {
+ { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
+ { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
++ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
+ { 0, 0, 0, 0, 0 }
+ };
+
+diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
+index bbc21798df10e..c58213bce294e 100644
+--- a/arch/x86/kernel/head64.c
++++ b/arch/x86/kernel/head64.c
+@@ -41,6 +41,7 @@
+ #include <asm/trapnr.h>
+ #include <asm/sev.h>
+ #include <asm/tdx.h>
++#include <asm/init.h>
+
+ /*
+ * Manage page tables very early on.
+@@ -84,8 +85,6 @@ static struct desc_ptr startup_gdt_descr = {
+ .address = 0,
+ };
+
+-#define __head __section(".head.text")
+-
+ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
+ {
+ return ptr - (void *)_text + (void *)physaddr;
+diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
+index 15c700d358700..b223922248e9f 100644
+--- a/arch/x86/kernel/mpparse.c
++++ b/arch/x86/kernel/mpparse.c
+@@ -196,12 +196,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
+ if (!smp_check_mpc(mpc, oem, str))
+ return 0;
+
+- if (early) {
+- /* Initialize the lapic mapping */
+- if (!acpi_lapic)
+- register_lapic_address(mpc->lapic);
++ /* Initialize the lapic mapping */
++ if (!acpi_lapic)
++ register_lapic_address(mpc->lapic);
++
++ if (early)
+ return 1;
+- }
+
+ /* Now process the configuration blocks. */
+ while (count < mpc->length) {
+diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
+index e63a8d05ce298..eb129277dcdd6 100644
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -35,6 +35,7 @@
+ #include <asm/bios_ebda.h>
+ #include <asm/bugs.h>
+ #include <asm/cacheinfo.h>
++#include <asm/coco.h>
+ #include <asm/cpu.h>
+ #include <asm/efi.h>
+ #include <asm/gart.h>
+@@ -1120,6 +1121,7 @@ void __init setup_arch(char **cmdline_p)
+ * memory size.
+ */
+ sev_setup_arch();
++ cc_random_init();
+
+ efi_fake_memmap();
+ efi_find_mirror();
+diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
+index 466fe09898ccd..acbec4de3ec31 100644
+--- a/arch/x86/kernel/sev-shared.c
++++ b/arch/x86/kernel/sev-shared.c
+@@ -89,7 +89,8 @@ static bool __init sev_es_check_cpu_features(void)
+ return true;
+ }
+
+-static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
++static void __head __noreturn
++sev_es_terminate(unsigned int set, unsigned int reason)
+ {
+ u64 val = GHCB_MSR_TERM_REQ;
+
+@@ -326,13 +327,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid
+ */
+ static const struct snp_cpuid_table *snp_cpuid_get_table(void)
+ {
+- void *ptr;
+-
+- asm ("lea cpuid_table_copy(%%rip), %0"
+- : "=r" (ptr)
+- : "p" (&cpuid_table_copy));
+-
+- return ptr;
++ return &RIP_REL_REF(cpuid_table_copy);
+ }
+
+ /*
+@@ -391,7 +386,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
+ return xsave_size;
+ }
+
+-static bool
++static bool __head
+ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
+ {
+ const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+@@ -528,7 +523,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+ * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
+ * should be treated as fatal by caller.
+ */
+-static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
++static int __head
++snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+ {
+ const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+
+@@ -570,7 +566,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le
+ * page yet, so it only supports the MSR based communication with the
+ * hypervisor and only the CPUID exit-code.
+ */
+-void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
++void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+ {
+ unsigned int subfn = lower_bits(regs->cx, 32);
+ unsigned int fn = lower_bits(regs->ax, 32);
+@@ -1016,7 +1012,8 @@ struct cc_setup_data {
+ * Search for a Confidential Computing blob passed in as a setup_data entry
+ * via the Linux Boot Protocol.
+ */
+-static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
++static __head
++struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
+ {
+ struct cc_setup_data *sd = NULL;
+ struct setup_data *hdr;
+@@ -1043,7 +1040,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
+ * mapping needs to be updated in sync with all the changes to virtual memory
+ * layout and related mapping facilities throughout the boot process.
+ */
+-static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
++static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
+ {
+ const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
+ int i;
+diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
+index a8db68a063c46..9905dc0e0b096 100644
+--- a/arch/x86/kernel/sev.c
++++ b/arch/x86/kernel/sev.c
+@@ -26,6 +26,7 @@
+ #include <linux/dmi.h>
+ #include <uapi/linux/sev-guest.h>
+
++#include <asm/init.h>
+ #include <asm/cpu_entry_area.h>
+ #include <asm/stacktrace.h>
+ #include <asm/sev.h>
+@@ -683,8 +684,9 @@ static u64 __init get_jump_table_addr(void)
+ return ret;
+ }
+
+-static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+- unsigned long npages, enum psc_op op)
++static void __head
++early_set_pages_state(unsigned long vaddr, unsigned long paddr,
++ unsigned long npages, enum psc_op op)
+ {
+ unsigned long paddr_end;
+ u64 val;
+@@ -740,7 +742,7 @@ static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+ }
+
+-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
++void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages)
+ {
+ /*
+@@ -2045,7 +2047,7 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
+ *
+ * Scan for the blob in that order.
+ */
+-static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
++static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+ {
+ struct cc_blob_sev_info *cc_info;
+
+@@ -2071,7 +2073,7 @@ static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+ return cc_info;
+ }
+
+-bool __init snp_init(struct boot_params *bp)
++bool __head snp_init(struct boot_params *bp)
+ {
+ struct cc_blob_sev_info *cc_info;
+
+@@ -2093,7 +2095,7 @@ bool __init snp_init(struct boot_params *bp)
+ return true;
+ }
+
+-void __init __noreturn snp_abort(void)
++void __head __noreturn snp_abort(void)
+ {
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+ }
+diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
+index f15fb71f280e2..54a5596adaa61 100644
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -139,10 +139,7 @@ SECTIONS
+ STATIC_CALL_TEXT
+
+ ALIGN_ENTRY_TEXT_BEGIN
+-#ifdef CONFIG_CPU_SRSO
+ *(.text..__x86.rethunk_untrain)
+-#endif
+-
+ ENTRY_TEXT
+
+ #ifdef CONFIG_CPU_SRSO
+@@ -520,12 +517,12 @@ INIT_PER_CPU(irq_stack_backing_store);
+ "fixed_percpu_data is not at start of per-cpu area");
+ #endif
+
+-#ifdef CONFIG_RETHUNK
++#ifdef CONFIG_CPU_UNRET_ENTRY
+ . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
+-. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+ #endif
+
+ #ifdef CONFIG_CPU_SRSO
++. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+ /*
+ * GNU ld cannot do XOR until 2.41.
+ * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index f7901cb4d2fa4..11c484d72eab2 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -3120,7 +3120,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
+ if (pud_none(pud) || !pud_present(pud))
+ goto out;
+
+- if (pud_large(pud)) {
++ if (pud_leaf(pud)) {
+ level = PG_LEVEL_1G;
+ goto out;
+ }
+diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
+index aadefcaa9561d..2f4e155080bad 100644
+--- a/arch/x86/kvm/reverse_cpuid.h
++++ b/arch/x86/kvm/reverse_cpuid.h
+@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
+ #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
+ #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
+ #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
+-#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
++#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+ #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
+
+ /* CPUID level 0x80000007 (EDX). */
+@@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = {
+ */
+ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
+ {
++ BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
++ BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
+ BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
+ BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
+ }
+@@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
+ KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
+ KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
+ KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
++ KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
+ default:
+ return x86_feature;
+ }
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index e86231c3b8a54..c5845f31c34dc 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -84,9 +84,10 @@ struct enc_region {
+ };
+
+ /* Called with the sev_bitmap_lock held, or on shutdown */
+-static int sev_flush_asids(int min_asid, int max_asid)
++static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
+ {
+- int ret, asid, error = 0;
++ int ret, error = 0;
++ unsigned int asid;
+
+ /* Check if there are any ASIDs to reclaim before performing a flush */
+ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
+@@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm)
+ }
+
+ /* Must be called with the sev_bitmap_lock held */
+-static bool __sev_recycle_asids(int min_asid, int max_asid)
++static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
+ {
+ if (sev_flush_asids(min_asid, max_asid))
+ return false;
+@@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
+
+ static int sev_asid_new(struct kvm_sev_info *sev)
+ {
+- int asid, min_asid, max_asid, ret;
++ /*
++ * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
++ * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
++ * Note: min ASID can end up larger than the max if basic SEV support is
++ * effectively disabled by disallowing use of ASIDs for SEV guests.
++ */
++ unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
++ unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
++ unsigned int asid;
+ bool retry = true;
++ int ret;
++
++ if (min_asid > max_asid)
++ return -ENOTTY;
+
+ WARN_ON(sev->misc_cg);
+ sev->misc_cg = get_current_misc_cg();
+@@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev)
+
+ mutex_lock(&sev_bitmap_lock);
+
+- /*
+- * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+- * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
+- */
+- min_asid = sev->es_active ? 1 : min_sev_asid;
+- max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
+ again:
+ asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+ if (asid > max_asid) {
+@@ -187,7 +194,7 @@ static int sev_asid_new(struct kvm_sev_info *sev)
+ return ret;
+ }
+
+-static int sev_get_asid(struct kvm *kvm)
++static unsigned int sev_get_asid(struct kvm *kvm)
+ {
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+@@ -284,8 +291,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+
+ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
+ {
++ unsigned int asid = sev_get_asid(kvm);
+ struct sev_data_activate activate;
+- int asid = sev_get_asid(kvm);
+ int ret;
+
+ /* activate ASID on the given handle */
+@@ -2234,8 +2241,10 @@ void __init sev_hardware_setup(void)
+ goto out;
+ }
+
+- sev_asid_count = max_sev_asid - min_sev_asid + 1;
+- WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
++ if (min_sev_asid <= max_sev_asid) {
++ sev_asid_count = max_sev_asid - min_sev_asid + 1;
++ WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
++ }
+ sev_supported = true;
+
+ /* SEV-ES support requested? */
+@@ -2266,7 +2275,9 @@ void __init sev_hardware_setup(void)
+ out:
+ if (boot_cpu_has(X86_FEATURE_SEV))
+ pr_info("SEV %s (ASIDs %u - %u)\n",
+- sev_supported ? "enabled" : "disabled",
++ sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
++ "unusable" :
++ "disabled",
+ min_sev_asid, max_sev_asid);
+ if (boot_cpu_has(X86_FEATURE_SEV_ES))
+ pr_info("SEV-ES %s (ASIDs %u - %u)\n",
+@@ -2314,7 +2325,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
+ */
+ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
+ {
+- int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
++ unsigned int asid = sev_get_asid(vcpu->kvm);
+
+ /*
+ * Note! The address must be a kernel address, as regular page walk
+@@ -2632,7 +2643,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
+ void pre_sev_run(struct vcpu_svm *svm, int cpu)
+ {
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
+- int asid = sev_get_asid(svm->vcpu.kvm);
++ unsigned int asid = sev_get_asid(svm->vcpu.kvm);
+
+ /* Assign the asid allocated with this SEV guest */
+ svm->asid = asid;
+diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
+index 83843379813ee..b82e6ed4f0241 100644
+--- a/arch/x86/kvm/trace.h
++++ b/arch/x86/kvm/trace.h
+@@ -732,13 +732,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+ TRACE_EVENT(kvm_invlpga,
+- TP_PROTO(__u64 rip, int asid, u64 address),
++ TP_PROTO(__u64 rip, unsigned int asid, u64 address),
+ TP_ARGS(rip, asid, address),
+
+ TP_STRUCT__entry(
+- __field( __u64, rip )
+- __field( int, asid )
+- __field( __u64, address )
++ __field( __u64, rip )
++ __field( unsigned int, asid )
++ __field( __u64, address )
+ ),
+
+ TP_fast_assign(
+@@ -747,7 +747,7 @@ TRACE_EVENT(kvm_invlpga,
+ __entry->address = address;
+ ),
+
+- TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
++ TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx",
+ __entry->rip, __entry->asid, __entry->address)
+ );
+
+diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
+index 139960deb7362..9522d46567f81 100644
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
+
+ call vmx_spec_ctrl_restore_host
+
++ CLEAR_BRANCH_HISTORY_VMEXIT
++
+ /* Put return value in AX */
+ mov %_ASM_BX, %_ASM_AX
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 4aafd007964fe..4ed8a7dc05369 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
+ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
+ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
+ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
+- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
++ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
+
+ static u64 kvm_get_arch_capabilities(void)
+ {
+diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
+index ea3a28e7b613c..f0dae4fb6d071 100644
+--- a/arch/x86/lib/Makefile
++++ b/arch/x86/lib/Makefile
+@@ -14,19 +14,6 @@ ifdef CONFIG_KCSAN
+ CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE)
+ endif
+
+-# Early boot use of cmdline; don't instrument it
+-ifdef CONFIG_AMD_MEM_ENCRYPT
+-KCOV_INSTRUMENT_cmdline.o := n
+-KASAN_SANITIZE_cmdline.o := n
+-KCSAN_SANITIZE_cmdline.o := n
+-
+-ifdef CONFIG_FUNCTION_TRACER
+-CFLAGS_REMOVE_cmdline.o = -pg
+-endif
+-
+-CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
+-endif
+-
+ inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
+ inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
+ quiet_cmd_inat_tables = GEN $@
+diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
+index cd86aeb5fdd3e..ffa51f392e17a 100644
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -126,12 +126,13 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
+ #endif
+-/*
+- * This function name is magical and is used by -mfunction-return=thunk-extern
+- * for the compiler to generate JMPs to it.
+- */
++
+ #ifdef CONFIG_RETHUNK
+
++ .section .text..__x86.return_thunk
++
++#ifdef CONFIG_CPU_SRSO
++
+ /*
+ * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
+ * special addresses:
+@@ -147,9 +148,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
+ *
+ * As a result, srso_alias_safe_ret() becomes a safe return.
+ */
+-#ifdef CONFIG_CPU_SRSO
+- .section .text..__x86.rethunk_untrain
+-
++ .pushsection .text..__x86.rethunk_untrain
+ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+@@ -158,17 +157,9 @@ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ jmp srso_alias_return_thunk
+ SYM_FUNC_END(srso_alias_untrain_ret)
+ __EXPORT_THUNK(srso_alias_untrain_ret)
++ .popsection
+
+- .section .text..__x86.rethunk_safe
+-#else
+-/* dummy definition for alternatives */
+-SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+- ANNOTATE_UNRET_SAFE
+- ret
+- int3
+-SYM_FUNC_END(srso_alias_untrain_ret)
+-#endif
+-
++ .pushsection .text..__x86.rethunk_safe
+ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ lea 8(%_ASM_SP), %_ASM_SP
+ UNWIND_HINT_FUNC
+@@ -177,14 +168,69 @@ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ int3
+ SYM_FUNC_END(srso_alias_safe_ret)
+
+- .section .text..__x86.return_thunk
+-
+-SYM_CODE_START(srso_alias_return_thunk)
++SYM_CODE_START_NOALIGN(srso_alias_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ call srso_alias_safe_ret
+ ud2
+ SYM_CODE_END(srso_alias_return_thunk)
++ .popsection
++
++/*
++ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
++ * above. On kernel entry, srso_untrain_ret() is executed which is a
++ *
++ * movabs $0xccccc30824648d48,%rax
++ *
++ * and when the return thunk executes the inner label srso_safe_ret()
++ * later, it is a stack manipulation and a RET which is mispredicted and
++ * thus a "safe" one to use.
++ */
++ .align 64
++ .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
++SYM_START(srso_untrain_ret, SYM_L_LOCAL, SYM_A_NONE)
++ ANNOTATE_NOENDBR
++ .byte 0x48, 0xb8
++
++/*
++ * This forces the function return instruction to speculate into a trap
++ * (UD2 in srso_return_thunk() below). This RET will then mispredict
++ * and execution will continue at the return site read from the top of
++ * the stack.
++ */
++SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
++ lea 8(%_ASM_SP), %_ASM_SP
++ ret
++ int3
++ int3
++ /* end of movabs */
++ lfence
++ call srso_safe_ret
++ ud2
++SYM_CODE_END(srso_safe_ret)
++SYM_FUNC_END(srso_untrain_ret)
++
++SYM_CODE_START(srso_return_thunk)
++ UNWIND_HINT_FUNC
++ ANNOTATE_NOENDBR
++ call srso_safe_ret
++ ud2
++SYM_CODE_END(srso_return_thunk)
++
++#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret"
++#else /* !CONFIG_CPU_SRSO */
++#define JMP_SRSO_UNTRAIN_RET "ud2"
++/* Dummy for the alternative in CALL_UNTRAIN_RET. */
++SYM_CODE_START(srso_alias_untrain_ret)
++ ANNOTATE_UNRET_SAFE
++ ANNOTATE_NOENDBR
++ ret
++ int3
++SYM_FUNC_END(srso_alias_untrain_ret)
++__EXPORT_THUNK(srso_alias_untrain_ret)
++#endif /* CONFIG_CPU_SRSO */
++
++#ifdef CONFIG_CPU_UNRET_ENTRY
+
+ /*
+ * Some generic notes on the untraining sequences:
+@@ -266,65 +312,19 @@ SYM_CODE_END(retbleed_return_thunk)
+ SYM_FUNC_END(retbleed_untrain_ret)
+ __EXPORT_THUNK(retbleed_untrain_ret)
+
+-/*
+- * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
+- * above. On kernel entry, srso_untrain_ret() is executed which is a
+- *
+- * movabs $0xccccc30824648d48,%rax
+- *
+- * and when the return thunk executes the inner label srso_safe_ret()
+- * later, it is a stack manipulation and a RET which is mispredicted and
+- * thus a "safe" one to use.
+- */
+- .align 64
+- .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
+-SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+- ANNOTATE_NOENDBR
+- .byte 0x48, 0xb8
+-
+-/*
+- * This forces the function return instruction to speculate into a trap
+- * (UD2 in srso_return_thunk() below). This RET will then mispredict
+- * and execution will continue at the return site read from the top of
+- * the stack.
+- */
+-SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+- lea 8(%_ASM_SP), %_ASM_SP
+- ret
+- int3
+- int3
+- /* end of movabs */
+- lfence
+- call srso_safe_ret
+- ud2
+-SYM_CODE_END(srso_safe_ret)
+-SYM_FUNC_END(srso_untrain_ret)
+-__EXPORT_THUNK(srso_untrain_ret)
++#define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret"
++#else /* !CONFIG_CPU_UNRET_ENTRY */
++#define JMP_RETBLEED_UNTRAIN_RET "ud2"
++#endif /* CONFIG_CPU_UNRET_ENTRY */
+
+-SYM_CODE_START(srso_return_thunk)
+- UNWIND_HINT_FUNC
+- ANNOTATE_NOENDBR
+- call srso_safe_ret
+- ud2
+-SYM_CODE_END(srso_return_thunk)
++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
+
+ SYM_FUNC_START(entry_untrain_ret)
+- ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
+- "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
+- "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
++ ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO
+ SYM_FUNC_END(entry_untrain_ret)
+ __EXPORT_THUNK(entry_untrain_ret)
+
+-SYM_CODE_START(__x86_return_thunk)
+- UNWIND_HINT_FUNC
+- ANNOTATE_NOENDBR
+- ANNOTATE_UNRET_SAFE
+- ret
+- int3
+-SYM_CODE_END(__x86_return_thunk)
+-EXPORT_SYMBOL(__x86_return_thunk)
+-
+-#endif /* CONFIG_RETHUNK */
++#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */
+
+ #ifdef CONFIG_CALL_DEPTH_TRACKING
+
+@@ -359,3 +359,22 @@ SYM_FUNC_START(__x86_return_skl)
+ SYM_FUNC_END(__x86_return_skl)
+
+ #endif /* CONFIG_CALL_DEPTH_TRACKING */
++
++/*
++ * This function name is magical and is used by -mfunction-return=thunk-extern
++ * for the compiler to generate JMPs to it.
++ *
++ * This code is only used during kernel boot or module init. All
++ * 'JMP __x86_return_thunk' sites are changed to something else by
++ * apply_returns().
++ */
++SYM_CODE_START(__x86_return_thunk)
++ UNWIND_HINT_FUNC
++ ANNOTATE_NOENDBR
++ ANNOTATE_UNRET_SAFE
++ ret
++ int3
++SYM_CODE_END(__x86_return_thunk)
++EXPORT_SYMBOL(__x86_return_thunk)
++
++#endif /* CONFIG_RETHUNK */
+diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
+index a9d69ec994b75..e238517968836 100644
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -376,7 +376,7 @@ static void dump_pagetable(unsigned long address)
+ goto bad;
+
+ pr_cont("PUD %lx ", pud_val(*pud));
+- if (!pud_present(*pud) || pud_large(*pud))
++ if (!pud_present(*pud) || pud_leaf(*pud))
+ goto out;
+
+ pmd = pmd_offset(pud, address);
+@@ -1037,7 +1037,7 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address)
+ if (!pud_present(*pud))
+ return 0;
+
+- if (pud_large(*pud))
++ if (pud_leaf(*pud))
+ return spurious_kernel_fault_check(error_code, (pte_t *) pud);
+
+ pmd = pmd_offset(pud, address);
+diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
+index f50cc210a9818..968d7005f4a72 100644
+--- a/arch/x86/mm/ident_map.c
++++ b/arch/x86/mm/ident_map.c
+@@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+ for (; addr < end; addr = next) {
+ pud_t *pud = pud_page + pud_index(addr);
+ pmd_t *pmd;
+- bool use_gbpage;
+
+ next = (addr & PUD_MASK) + PUD_SIZE;
+ if (next > end)
+ next = end;
+
+- /* if this is already a gbpage, this portion is already mapped */
+- if (pud_large(*pud))
+- continue;
+-
+- /* Is using a gbpage allowed? */
+- use_gbpage = info->direct_gbpages;
+-
+- /* Don't use gbpage if it maps more than the requested region. */
+- /* at the begining: */
+- use_gbpage &= ((addr & ~PUD_MASK) == 0);
+- /* ... or at the end: */
+- use_gbpage &= ((next & ~PUD_MASK) == 0);
+-
+- /* Never overwrite existing mappings */
+- use_gbpage &= !pud_present(*pud);
+-
+- if (use_gbpage) {
++ if (info->direct_gbpages) {
+ pud_t pudval;
+
++ if (pud_present(*pud))
++ continue;
++
++ addr &= PUD_MASK;
+ pudval = __pud((addr - info->offset) | info->page_flag);
+ set_pud(pud, pudval);
+ continue;
+diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
+index a190aae8ceaf7..19d209b412d7a 100644
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -617,7 +617,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
+ }
+
+ if (!pud_none(*pud)) {
+- if (!pud_large(*pud)) {
++ if (!pud_leaf(*pud)) {
+ pmd = pmd_offset(pud, 0);
+ paddr_last = phys_pmd_init(pmd, paddr,
+ paddr_end,
+@@ -1163,7 +1163,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
+ if (!pud_present(*pud))
+ continue;
+
+- if (pud_large(*pud) &&
++ if (pud_leaf(*pud) &&
+ IS_ALIGNED(addr, PUD_SIZE) &&
+ IS_ALIGNED(next, PUD_SIZE)) {
+ spin_lock(&init_mm.page_table_lock);
+diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
+index 0302491d799d1..fcf508c52bdc5 100644
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -115,7 +115,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
+ pud = pud_offset(p4d, addr);
+ do {
+ next = pud_addr_end(addr, end);
+- if (!pud_large(*pud))
++ if (!pud_leaf(*pud))
+ kasan_populate_pud(pud, addr, next, nid);
+ } while (pud++, addr = next, addr != end);
+ }
+diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
+index 0166ab1780ccb..cc47a818a640a 100644
+--- a/arch/x86/mm/mem_encrypt_identity.c
++++ b/arch/x86/mm/mem_encrypt_identity.c
+@@ -41,9 +41,9 @@
+ #include <linux/mem_encrypt.h>
+ #include <linux/cc_platform.h>
+
++#include <asm/init.h>
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+-#include <asm/cmdline.h>
+ #include <asm/coco.h>
+ #include <asm/sev.h>
+
+@@ -95,10 +95,7 @@ struct sme_populate_pgd_data {
+ */
+ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
+
+-static char sme_cmdline_arg[] __initdata = "mem_encrypt";
+-static char sme_cmdline_on[] __initdata = "on";
+-
+-static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
++static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+ {
+ unsigned long pgd_start, pgd_end, pgd_size;
+ pgd_t *pgd_p;
+@@ -113,7 +110,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+ memset(pgd_p, 0, pgd_size);
+ }
+
+-static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
++static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+ {
+ pgd_t *pgd;
+ p4d_t *p4d;
+@@ -144,13 +141,13 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+ set_pud(pud, __pud(PUD_FLAGS | __pa(pmd)));
+ }
+
+- if (pud_large(*pud))
++ if (pud_leaf(*pud))
+ return NULL;
+
+ return pud;
+ }
+
+-static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
++static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+ {
+ pud_t *pud;
+ pmd_t *pmd;
+@@ -166,7 +163,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+ set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
+ }
+
+-static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
++static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+ {
+ pud_t *pud;
+ pmd_t *pmd;
+@@ -192,7 +189,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+ set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
+ }
+
+-static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
++static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+ {
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd_large(ppd);
+@@ -202,7 +199,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+ }
+ }
+
+-static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
++static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+ {
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd(ppd);
+@@ -212,7 +209,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+ }
+ }
+
+-static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
++static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
+ pmdval_t pmd_flags, pteval_t pte_flags)
+ {
+ unsigned long vaddr_end;
+@@ -236,22 +233,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+ __sme_map_range_pte(ppd);
+ }
+
+-static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
++static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+ {
+ __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
+ }
+
+-static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
++static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+ {
+ __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
+ }
+
+-static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
++static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+ {
+ __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
+ }
+
+-static unsigned long __init sme_pgtable_calc(unsigned long len)
++static unsigned long __head sme_pgtable_calc(unsigned long len)
+ {
+ unsigned long entries = 0, tables = 0;
+
+@@ -288,7 +285,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
+ return entries + tables;
+ }
+
+-void __init sme_encrypt_kernel(struct boot_params *bp)
++void __head sme_encrypt_kernel(struct boot_params *bp)
+ {
+ unsigned long workarea_start, workarea_end, workarea_len;
+ unsigned long execute_start, execute_end, execute_len;
+@@ -323,9 +320,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
+ * memory from being cached.
+ */
+
+- /* Physical addresses gives us the identity mapped virtual addresses */
+- kernel_start = __pa_symbol(_text);
+- kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE);
++ kernel_start = (unsigned long)RIP_REL_REF(_text);
++ kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE);
+ kernel_len = kernel_end - kernel_start;
+
+ initrd_start = 0;
+@@ -342,14 +338,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
+ }
+ #endif
+
+- /*
+- * We're running identity mapped, so we must obtain the address to the
+- * SME encryption workarea using rip-relative addressing.
+- */
+- asm ("lea sme_workarea(%%rip), %0"
+- : "=r" (workarea_start)
+- : "p" (sme_workarea));
+-
+ /*
+ * Calculate required number of workarea bytes needed:
+ * executable encryption area size:
+@@ -359,7 +347,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
+ * pagetable structures for the encryption of the kernel
+ * pagetable structures for workarea (in case not currently mapped)
+ */
+- execute_start = workarea_start;
++ execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea);
+ execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
+ execute_len = execute_end - execute_start;
+
+@@ -502,13 +490,11 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
+ native_write_cr3(__native_read_cr3());
+ }
+
+-void __init sme_enable(struct boot_params *bp)
++void __head sme_enable(struct boot_params *bp)
+ {
+- const char *cmdline_ptr, *cmdline_arg, *cmdline_on;
+ unsigned int eax, ebx, ecx, edx;
+ unsigned long feature_mask;
+ unsigned long me_mask;
+- char buffer[16];
+ bool snp;
+ u64 msr;
+
+@@ -551,6 +537,9 @@ void __init sme_enable(struct boot_params *bp)
+
+ /* Check if memory encryption is enabled */
+ if (feature_mask == AMD_SME_BIT) {
++ if (!(bp->hdr.xloadflags & XLF_MEM_ENCRYPTION))
++ return;
++
+ /*
+ * No SME if Hypervisor bit is set. This check is here to
+ * prevent a guest from trying to enable SME. For running as a
+@@ -570,31 +559,8 @@ void __init sme_enable(struct boot_params *bp)
+ msr = __rdmsr(MSR_AMD64_SYSCFG);
+ if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
+ return;
+- } else {
+- /* SEV state cannot be controlled by a command line option */
+- goto out;
+ }
+
+- /*
+- * Fixups have not been applied to phys_base yet and we're running
+- * identity mapped, so we must obtain the address to the SME command
+- * line argument data using rip-relative addressing.
+- */
+- asm ("lea sme_cmdline_arg(%%rip), %0"
+- : "=r" (cmdline_arg)
+- : "p" (sme_cmdline_arg));
+- asm ("lea sme_cmdline_on(%%rip), %0"
+- : "=r" (cmdline_on)
+- : "p" (sme_cmdline_on));
+-
+- cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+- ((u64)bp->ext_cmd_line_ptr << 32));
+-
+- if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 ||
+- strncmp(buffer, cmdline_on, sizeof(buffer)))
+- return;
+-
+-out:
+ RIP_REL_REF(sme_me_mask) = me_mask;
+ physical_mask &= ~me_mask;
+ cc_vendor = CC_VENDOR_AMD;
+diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
+index de10800cd4dd4..e7b9ac63bb02a 100644
+--- a/arch/x86/mm/pat/memtype.c
++++ b/arch/x86/mm/pat/memtype.c
+@@ -950,6 +950,38 @@ static void free_pfn_range(u64 paddr, unsigned long size)
+ memtype_free(paddr, paddr + size);
+ }
+
++static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
++ pgprot_t *pgprot)
++{
++ unsigned long prot;
++
++ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
++
++ /*
++ * We need the starting PFN and cachemode used for track_pfn_remap()
++ * that covered the whole VMA. For most mappings, we can obtain that
++ * information from the page tables. For COW mappings, we might now
++ * suddenly have anon folios mapped and follow_phys() will fail.
++ *
++ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
++ * detect the PFN. If we need the cachemode as well, we're out of luck
++ * for now and have to fail fork().
++ */
++ if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
++ if (pgprot)
++ *pgprot = __pgprot(prot);
++ return 0;
++ }
++ if (is_cow_mapping(vma->vm_flags)) {
++ if (pgprot)
++ return -EINVAL;
++ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
++ return 0;
++ }
++ WARN_ON_ONCE(1);
++ return -EINVAL;
++}
++
+ /*
+ * track_pfn_copy is called when vma that is covering the pfnmap gets
+ * copied through copy_page_range().
+@@ -960,20 +992,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
+ int track_pfn_copy(struct vm_area_struct *vma)
+ {
+ resource_size_t paddr;
+- unsigned long prot;
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
+ pgprot_t pgprot;
+
+ if (vma->vm_flags & VM_PAT) {
+- /*
+- * reserve the whole chunk covered by vma. We need the
+- * starting address and protection from pte.
+- */
+- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+- WARN_ON_ONCE(1);
++ if (get_pat_info(vma, &paddr, &pgprot))
+ return -EINVAL;
+- }
+- pgprot = __pgprot(prot);
++ /* reserve the whole chunk covered by vma. */
+ return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
+ }
+
+@@ -1048,7 +1073,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ unsigned long size, bool mm_wr_locked)
+ {
+ resource_size_t paddr;
+- unsigned long prot;
+
+ if (vma && !(vma->vm_flags & VM_PAT))
+ return;
+@@ -1056,11 +1080,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ /* free the chunk starting from pfn or the whole chunk */
+ paddr = (resource_size_t)pfn << PAGE_SHIFT;
+ if (!paddr && !size) {
+- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+- WARN_ON_ONCE(1);
++ if (get_pat_info(vma, &paddr, NULL))
+ return;
+- }
+-
+ size = vma->vm_end - vma->vm_start;
+ }
+ free_pfn_range(paddr, size);
+diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
+index bda9f129835e9..f3c4c756fe1ee 100644
+--- a/arch/x86/mm/pat/set_memory.c
++++ b/arch/x86/mm/pat/set_memory.c
+@@ -684,7 +684,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+ return NULL;
+
+ *level = PG_LEVEL_1G;
+- if (pud_large(*pud) || !pud_present(*pud))
++ if (pud_leaf(*pud) || !pud_present(*pud))
+ return (pte_t *)pud;
+
+ pmd = pmd_offset(pud, address);
+@@ -743,7 +743,7 @@ pmd_t *lookup_pmd_address(unsigned long address)
+ return NULL;
+
+ pud = pud_offset(p4d, address);
+- if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
++ if (pud_none(*pud) || pud_leaf(*pud) || !pud_present(*pud))
+ return NULL;
+
+ return pmd_offset(pud, address);
+@@ -1274,7 +1274,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
+ */
+ while (end - start >= PUD_SIZE) {
+
+- if (pud_large(*pud))
++ if (pud_leaf(*pud))
+ pud_clear(pud);
+ else
+ unmap_pmd_range(pud, start, start + PUD_SIZE);
+diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
+index 9deadf517f14a..8e1ef5345b7a8 100644
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -774,7 +774,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+ */
+ int pud_clear_huge(pud_t *pud)
+ {
+- if (pud_large(*pud)) {
++ if (pud_leaf(*pud)) {
+ pud_clear(pud);
+ return 1;
+ }
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index 78414c6d1b5ed..51b6b78e6b175 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -217,7 +217,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+
+ pud = pud_offset(p4d, address);
+ /* The user page tables do not use large mappings: */
+- if (pud_large(*pud)) {
++ if (pud_leaf(*pud)) {
+ WARN_ON(1);
+ return NULL;
+ }
+diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
+index 955133077c105..a6a4d3ca8ddc6 100644
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -344,7 +344,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
+ static int emit_rsb_call(u8 **pprog, void *func, void *ip)
+ {
+ OPTIMIZER_HIDE_VAR(func);
+- x86_call_depth_emit_accounting(pprog, func);
++ ip += x86_call_depth_emit_accounting(pprog, func);
+ return emit_patch(pprog, func, ip, 0xE8);
+ }
+
+diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
+index 6f955eb1e1631..d8af46e677503 100644
+--- a/arch/x86/power/hibernate.c
++++ b/arch/x86/power/hibernate.c
+@@ -170,7 +170,7 @@ int relocate_restore_code(void)
+ goto out;
+ }
+ pud = pud_offset(p4d, relocated_restore_code);
+- if (pud_large(*pud)) {
++ if (pud_leaf(*pud)) {
+ set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
+ goto out;
+ }
+diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
+index b6830554ff690..9d4a9311e819b 100644
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -1082,7 +1082,7 @@ static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
+ pmd_t *pmd_tbl;
+ int i;
+
+- if (pud_large(*pud)) {
++ if (pud_leaf(*pud)) {
+ pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
+ xen_free_ro_pages(pa, PUD_SIZE);
+ return;
+@@ -1863,7 +1863,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
+ if (!pud_present(pud))
+ return 0;
+ pa = pud_val(pud) & PTE_PFN_MASK;
+- if (pud_large(pud))
++ if (pud_leaf(pud))
+ return pa + (vaddr & ~PUD_MASK);
+
+ pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
+diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
+index b91155ea9c343..c9131259f717b 100644
+--- a/drivers/acpi/acpica/dbnames.c
++++ b/drivers/acpi/acpica/dbnames.c
+@@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle,
+ ACPI_FREE(buffer.pointer);
+
+ buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
+- acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
+-
++ status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
++ if (ACPI_FAILURE(status)) {
++ acpi_os_printf("Could Not evaluate object %p\n",
++ obj_handle);
++ return (AE_OK);
++ }
+ /*
+ * Since this is a field unit, surround the output in braces
+ */
+diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
+index 45e48d653c60b..80a45e11fb5b6 100644
+--- a/drivers/ata/sata_mv.c
++++ b/drivers/ata/sata_mv.c
+@@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = {
+ },
+ };
+
+-static const struct pci_device_id mv_pci_tbl[] = {
+- { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
+- { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
+- { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
+- { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
+- /* RocketRAID 1720/174x have different identifiers */
+- { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
+- { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
+- { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
+-
+- { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
+- { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
+- { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
+- { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
+- { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
+-
+- { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
+-
+- /* Adaptec 1430SA */
+- { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
+-
+- /* Marvell 7042 support */
+- { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
+-
+- /* Highpoint RocketRAID PCIe series */
+- { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
+- { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
+-
+- { } /* terminate list */
+-};
+-
+ static const struct mv_hw_ops mv5xxx_ops = {
+ .phy_errata = mv5_phy_errata,
+ .enable_leds = mv5_enable_leds,
+@@ -4300,6 +4269,36 @@ static int mv_pci_init_one(struct pci_dev *pdev,
+ static int mv_pci_device_resume(struct pci_dev *pdev);
+ #endif
+
++static const struct pci_device_id mv_pci_tbl[] = {
++ { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
++ { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
++ { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
++ { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
++ /* RocketRAID 1720/174x have different identifiers */
++ { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
++ { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
++ { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
++
++ { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
++ { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
++ { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
++ { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
++ { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
++
++ { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
++
++ /* Adaptec 1430SA */
++ { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
++
++ /* Marvell 7042 support */
++ { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
++
++ /* Highpoint RocketRAID PCIe series */
++ { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
++ { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
++
++ { } /* terminate list */
++};
+
+ static struct pci_driver mv_pci_driver = {
+ .name = DRV_NAME,
+@@ -4312,6 +4311,7 @@ static struct pci_driver mv_pci_driver = {
+ #endif
+
+ };
++MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
+
+ /**
+ * mv_print_info - Dump key info to kernel log for perusal.
+@@ -4484,7 +4484,6 @@ static void __exit mv_exit(void)
+ MODULE_AUTHOR("Brett Russ");
+ MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers");
+ MODULE_LICENSE("GPL v2");
+-MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
+ MODULE_VERSION(DRV_VERSION);
+ MODULE_ALIAS("platform:" DRV_NAME);
+
+diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
+index b51d7a9d0d90c..a482741eb181f 100644
+--- a/drivers/ata/sata_sx4.c
++++ b/drivers/ata/sata_sx4.c
+@@ -957,8 +957,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource,
+
+ offset -= (idx * window_size);
+ idx++;
+- dist = ((long) (window_size - (offset + size))) >= 0 ? size :
+- (long) (window_size - offset);
++ dist = min(size, window_size - offset);
+ memcpy_fromio(psource, dimm_mmio + offset / 4, dist);
+
+ psource += dist;
+@@ -1005,8 +1004,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource,
+ readl(mmio + PDC_DIMM_WINDOW_CTLR);
+ offset -= (idx * window_size);
+ idx++;
+- dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size :
+- (long) (window_size - offset);
++ dist = min(size, window_size - offset);
+ memcpy_toio(dimm_mmio + offset / 4, psource, dist);
+ writel(0x01, mmio + PDC_GENERAL_CTLR);
+ readl(mmio + PDC_GENERAL_CTLR);
+diff --git a/drivers/base/core.c b/drivers/base/core.c
+index 2cc0ab8541680..0214288765c8c 100644
+--- a/drivers/base/core.c
++++ b/drivers/base/core.c
+@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
+ static void __fw_devlink_link_to_consumers(struct device *dev);
+ static bool fw_devlink_drv_reg_done;
+ static bool fw_devlink_best_effort;
++static struct workqueue_struct *device_link_wq;
+
+ /**
+ * __fwnode_link_add - Create a link between two fwnode_handles.
+@@ -531,12 +532,26 @@ static void devlink_dev_release(struct device *dev)
+ /*
+ * It may take a while to complete this work because of the SRCU
+ * synchronization in device_link_release_fn() and if the consumer or
+- * supplier devices get deleted when it runs, so put it into the "long"
+- * workqueue.
++ * supplier devices get deleted when it runs, so put it into the
++ * dedicated workqueue.
+ */
+- queue_work(system_long_wq, &link->rm_work);
++ queue_work(device_link_wq, &link->rm_work);
+ }
+
++/**
++ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
++ */
++void device_link_wait_removal(void)
++{
++ /*
++ * devlink removal jobs are queued in the dedicated work queue.
++ * To be sure that all removal jobs are terminated, ensure that any
++ * scheduled work has run to completion.
++ */
++ flush_workqueue(device_link_wq);
++}
++EXPORT_SYMBOL_GPL(device_link_wait_removal);
++
+ static struct class devlink_class = {
+ .name = "devlink",
+ .dev_groups = devlink_groups,
+@@ -4090,9 +4105,14 @@ int __init devices_init(void)
+ sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
+ if (!sysfs_dev_char_kobj)
+ goto char_kobj_err;
++ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
++ if (!device_link_wq)
++ goto wq_err;
+
+ return 0;
+
++ wq_err:
++ kobject_put(sysfs_dev_char_kobj);
+ char_kobj_err:
+ kobject_put(sysfs_dev_block_kobj);
+ block_kobj_err:
+diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
+index 41edd6a430eb4..55999a50ccc0b 100644
+--- a/drivers/base/regmap/regcache-maple.c
++++ b/drivers/base/regmap/regcache-maple.c
+@@ -112,7 +112,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
+ unsigned long *entry, *lower, *upper;
+ unsigned long lower_index, lower_last;
+ unsigned long upper_index, upper_last;
+- int ret;
++ int ret = 0;
+
+ lower = NULL;
+ upper = NULL;
+@@ -145,7 +145,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
+ upper_index = max + 1;
+ upper_last = mas.last;
+
+- upper = kmemdup(&entry[max + 1],
++ upper = kmemdup(&entry[max - mas.index + 1],
+ ((mas.last - max) *
+ sizeof(unsigned long)),
+ map->alloc_flags);
+@@ -244,7 +244,7 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min,
+ unsigned long lmin = min;
+ unsigned long lmax = max;
+ unsigned int r, v, sync_start;
+- int ret;
++ int ret = 0;
+ bool sync_needed = false;
+
+ map->cache_bypass = true;
+diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
+index 0211f704a358b..5277090c6d6d7 100644
+--- a/drivers/bluetooth/btqca.c
++++ b/drivers/bluetooth/btqca.c
+@@ -758,11 +758,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup);
+
+ int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
+ {
++ bdaddr_t bdaddr_swapped;
+ struct sk_buff *skb;
+ int err;
+
+- skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr,
+- HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
++ baswap(&bdaddr_swapped, bdaddr);
++
++ skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6,
++ &bdaddr_swapped, HCI_EV_VENDOR,
++ HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err);
+diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
+index f2d4985e036e4..8861b8017fbdf 100644
+--- a/drivers/bluetooth/hci_qca.c
++++ b/drivers/bluetooth/hci_qca.c
+@@ -7,7 +7,6 @@
+ *
+ * Copyright (C) 2007 Texas Instruments, Inc.
+ * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
+- * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ *
+ * Acknowledgements:
+ * This file is based on hci_ll.c, which was...
+@@ -226,6 +225,7 @@ struct qca_serdev {
+ struct qca_power *bt_power;
+ u32 init_speed;
+ u32 oper_speed;
++ bool bdaddr_property_broken;
+ const char *firmware_name;
+ };
+
+@@ -1825,6 +1825,7 @@ static int qca_setup(struct hci_uart *hu)
+ const char *firmware_name = qca_get_firmware_name(hu);
+ int ret;
+ struct qca_btsoc_version ver;
++ struct qca_serdev *qcadev;
+ const char *soc_name;
+
+ ret = qca_check_speeds(hu);
+@@ -1882,16 +1883,11 @@ static int qca_setup(struct hci_uart *hu)
+ case QCA_WCN6750:
+ case QCA_WCN6855:
+ case QCA_WCN7850:
++ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+
+- /* Set BDA quirk bit for reading BDA value from fwnode property
+- * only if that property exist in DT.
+- */
+- if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
+- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+- bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
+- } else {
+- bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
+- }
++ qcadev = serdev_device_get_drvdata(hu->serdev);
++ if (qcadev->bdaddr_property_broken)
++ set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks);
+
+ hci_set_aosp_capable(hdev);
+
+@@ -2264,6 +2260,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
+ if (!qcadev->oper_speed)
+ BT_DBG("UART will pick default operating speed");
+
++ qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev,
++ "qcom,local-bd-address-broken");
++
+ if (data)
+ qcadev->btsoc_type = data->soc_type;
+ else
+diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
+index c0979c8049b5a..661de4add4c72 100644
+--- a/drivers/dma-buf/st-dma-fence-chain.c
++++ b/drivers/dma-buf/st-dma-fence-chain.c
+@@ -84,11 +84,11 @@ static int sanitycheck(void *arg)
+ return -ENOMEM;
+
+ chain = mock_chain(NULL, f, 1);
+- if (!chain)
++ if (chain)
++ dma_fence_enable_sw_signaling(chain);
++ else
+ err = -ENOMEM;
+
+- dma_fence_enable_sw_signaling(chain);
+-
+ dma_fence_signal(f);
+ dma_fence_put(f);
+
+diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
+index bfa30625f5d03..3dc2f9aaf08db 100644
+--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
++++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
+@@ -24,6 +24,8 @@ static bool efi_noinitrd;
+ static bool efi_nosoftreserve;
+ static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA);
+
++int efi_mem_encrypt;
++
+ bool __pure __efi_soft_reserve_enabled(void)
+ {
+ return !efi_nosoftreserve;
+@@ -75,6 +77,12 @@ efi_status_t efi_parse_options(char const *cmdline)
+ efi_noinitrd = true;
+ } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+ efi_no5lvl = true;
++ } else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) &&
++ !strcmp(param, "mem_encrypt") && val) {
++ if (parse_option_str(val, "on"))
++ efi_mem_encrypt = 1;
++ else if (parse_option_str(val, "off"))
++ efi_mem_encrypt = -1;
+ } else if (!strcmp(param, "efi") && val) {
+ efi_nochunk = parse_option_str(val, "nochunk");
+ efi_novamap |= parse_option_str(val, "novamap");
+diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
+index c04b82ea40f21..fc18fd649ed77 100644
+--- a/drivers/firmware/efi/libstub/efistub.h
++++ b/drivers/firmware/efi/libstub/efistub.h
+@@ -37,8 +37,8 @@ extern bool efi_no5lvl;
+ extern bool efi_nochunk;
+ extern bool efi_nokaslr;
+ extern int efi_loglevel;
++extern int efi_mem_encrypt;
+ extern bool efi_novamap;
+-
+ extern const efi_system_table_t *efi_system_table;
+
+ typedef union efi_dxe_services_table efi_dxe_services_table_t;
+diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
+index 8307950fe3ced..e4ae3db727efa 100644
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -238,6 +238,15 @@ efi_status_t efi_adjust_memory_range_protection(unsigned long start,
+ rounded_end = roundup(start + size, EFI_PAGE_SIZE);
+
+ if (memattr != NULL) {
++ status = efi_call_proto(memattr, set_memory_attributes,
++ rounded_start,
++ rounded_end - rounded_start,
++ EFI_MEMORY_RO);
++ if (status != EFI_SUCCESS) {
++ efi_warn("Failed to set EFI_MEMORY_RO attribute\n");
++ return status;
++ }
++
+ status = efi_call_proto(memattr, clear_memory_attributes,
+ rounded_start,
+ rounded_end - rounded_start,
+@@ -816,7 +825,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
+
+ *kernel_entry = addr + entry;
+
+- return efi_adjust_memory_range_protection(addr, kernel_total_size);
++ return efi_adjust_memory_range_protection(addr, kernel_text_size);
+ }
+
+ static void __noreturn enter_kernel(unsigned long kernel_addr,
+@@ -888,6 +897,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
+ }
+ }
+
++ if (efi_mem_encrypt > 0)
++ hdr->xloadflags |= XLF_MEM_ENCRYPTION;
++
+ status = efi_decompress_kernel(&kernel_entry);
+ if (status != EFI_SUCCESS) {
+ efi_err("Failed to decompress kernel\n");
+diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
+index 4f3e66ece7f78..84125e55de101 100644
+--- a/drivers/gpio/gpiolib-cdev.c
++++ b/drivers/gpio/gpiolib-cdev.c
+@@ -655,6 +655,25 @@ static u32 line_event_id(int level)
+ GPIO_V2_LINE_EVENT_FALLING_EDGE;
+ }
+
++static inline char *make_irq_label(const char *orig)
++{
++ char *new;
++
++ if (!orig)
++ return NULL;
++
++ new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
++ if (!new)
++ return ERR_PTR(-ENOMEM);
++
++ return new;
++}
++
++static inline void free_irq_label(const char *label)
++{
++ kfree(label);
++}
++
+ #ifdef CONFIG_HTE
+
+ static enum hte_return process_hw_ts_thread(void *p)
+@@ -942,6 +961,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
+ {
+ unsigned long irqflags;
+ int ret, level, irq;
++ char *label;
+
+ /* try hardware */
+ ret = gpiod_set_debounce(line->desc, debounce_period_us);
+@@ -964,11 +984,17 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
+ if (irq < 0)
+ return -ENXIO;
+
++ label = make_irq_label(line->req->label);
++ if (IS_ERR(label))
++ return -ENOMEM;
++
+ irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING;
+ ret = request_irq(irq, debounce_irq_handler, irqflags,
+- line->req->label, line);
+- if (ret)
++ label, line);
++ if (ret) {
++ free_irq_label(label);
+ return ret;
++ }
+ line->irq = irq;
+ } else {
+ ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH);
+@@ -1013,7 +1039,7 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc,
+ static void edge_detector_stop(struct line *line)
+ {
+ if (line->irq) {
+- free_irq(line->irq, line);
++ free_irq_label(free_irq(line->irq, line));
+ line->irq = 0;
+ }
+
+@@ -1038,6 +1064,7 @@ static int edge_detector_setup(struct line *line,
+ unsigned long irqflags = 0;
+ u64 eflags;
+ int irq, ret;
++ char *label;
+
+ eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS;
+ if (eflags && !kfifo_initialized(&line->req->events)) {
+@@ -1074,11 +1101,17 @@ static int edge_detector_setup(struct line *line,
+ IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
+ irqflags |= IRQF_ONESHOT;
+
++ label = make_irq_label(line->req->label);
++ if (IS_ERR(label))
++ return PTR_ERR(label);
++
+ /* Request a thread to read the events */
+ ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
+- irqflags, line->req->label, line);
+- if (ret)
++ irqflags, label, line);
++ if (ret) {
++ free_irq_label(label);
+ return ret;
++ }
+
+ line->irq = irq;
+ return 0;
+@@ -1943,7 +1976,7 @@ static void lineevent_free(struct lineevent_state *le)
+ blocking_notifier_chain_unregister(&le->gdev->device_notifier,
+ &le->device_unregistered_nb);
+ if (le->irq)
+- free_irq(le->irq, le);
++ free_irq_label(free_irq(le->irq, le));
+ if (le->desc)
+ gpiod_free(le->desc);
+ kfree(le->label);
+@@ -2091,6 +2124,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ int fd;
+ int ret;
+ int irq, irqflags = 0;
++ char *label;
+
+ if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
+ return -EFAULT;
+@@ -2175,15 +2209,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ if (ret)
+ goto out_free_le;
+
++ label = make_irq_label(le->label);
++ if (IS_ERR(label)) {
++ ret = PTR_ERR(label);
++ goto out_free_le;
++ }
++
+ /* Request a thread to read the events */
+ ret = request_threaded_irq(irq,
+ lineevent_irq_handler,
+ lineevent_irq_thread,
+ irqflags,
+- le->label,
++ label,
+ le);
+- if (ret)
++ if (ret) {
++ free_irq_label(label);
+ goto out_free_le;
++ }
+
+ le->irq = irq;
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 85efd686e538d..d59e8536192ca 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -1369,6 +1369,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
+ void amdgpu_driver_release_kms(struct drm_device *dev);
+
+ int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
++int amdgpu_device_prepare(struct drm_device *dev);
+ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
+ int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
+ u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 79261bec26542..062d78818da16 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -1549,6 +1549,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
+ } else {
+ pr_info("switched off\n");
+ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
++ amdgpu_device_prepare(dev);
+ amdgpu_device_suspend(dev, true);
+ amdgpu_device_cache_pci_state(pdev);
+ /* Shut down the device */
+@@ -4094,6 +4095,43 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
+ /*
+ * Suspend & resume.
+ */
++/**
++ * amdgpu_device_prepare - prepare for device suspend
++ *
++ * @dev: drm dev pointer
++ *
++ * Prepare to put the hw in the suspend state (all asics).
++ * Returns 0 for success or an error on failure.
++ * Called at driver suspend.
++ */
++int amdgpu_device_prepare(struct drm_device *dev)
++{
++ struct amdgpu_device *adev = drm_to_adev(dev);
++ int i, r;
++
++ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
++ return 0;
++
++ /* Evict the majority of BOs before starting suspend sequence */
++ r = amdgpu_device_evict_resources(adev);
++ if (r)
++ return r;
++
++ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
++
++ for (i = 0; i < adev->num_ip_blocks; i++) {
++ if (!adev->ip_blocks[i].status.valid)
++ continue;
++ if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
++ continue;
++ r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
++ if (r)
++ return r;
++ }
++
++ return 0;
++}
++
+ /**
+ * amdgpu_device_suspend - initiate device suspend
+ *
+@@ -4114,11 +4152,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+
+ adev->in_suspend = true;
+
+- /* Evict the majority of BOs before grabbing the full access */
+- r = amdgpu_device_evict_resources(adev);
+- if (r)
+- return r;
+-
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ r = amdgpu_virt_request_full_gpu(adev, false);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index 3204c3a42f2a3..f9bc38d20ce3e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -2386,8 +2386,9 @@ static int amdgpu_pmops_prepare(struct device *dev)
+ /* Return a positive number here so
+ * DPM_FLAG_SMART_SUSPEND works properly
+ */
+- if (amdgpu_device_supports_boco(drm_dev))
+- return pm_runtime_suspended(dev);
++ if (amdgpu_device_supports_boco(drm_dev) &&
++ pm_runtime_suspended(dev))
++ return 1;
+
+ /* if we will not support s3 or s2i for the device
+ * then skip suspend
+@@ -2396,7 +2397,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
+ !amdgpu_acpi_is_s3_active(adev))
+ return 1;
+
+- return 0;
++ return amdgpu_device_prepare(drm_dev);
+ }
+
+ static void amdgpu_pmops_complete(struct device *dev)
+@@ -2598,6 +2599,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
+ if (amdgpu_device_supports_boco(drm_dev))
+ adev->mp1_state = PP_MP1_STATE_UNLOAD;
+
++ ret = amdgpu_device_prepare(drm_dev);
++ if (ret)
++ return ret;
+ ret = amdgpu_device_suspend(drm_dev, false);
+ if (ret) {
+ adev->in_runpm = false;
+diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+index 251dd800a2a66..7b5c1498941dd 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
++++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+@@ -1179,9 +1179,10 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
+ dto_params.timing = &pipe_ctx->stream->timing;
+ dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+ if (dccg) {
+- dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
+ dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
++ if (dccg && dccg->funcs->set_dtbclk_dto)
++ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ }
+ } else if (dccg && dccg->funcs->disable_symclk_se) {
+ dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+index 1e3803739ae61..12af2859002f7 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+@@ -2728,18 +2728,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
+ }
+
+ if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
+- dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+- dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
+-
+- phyd32clk = get_phyd32clk_src(link);
+- dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
+-
+ dto_params.otg_inst = tg->inst;
+ dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10;
+ dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx);
+ dto_params.timing = &pipe_ctx->stream->timing;
+ dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
++ dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
++ dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
++
++ phyd32clk = get_phyd32clk_src(link);
++ dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
+ } else {
+ }
+ if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) {
+diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
+index abe829bbd54af..a9880fc531955 100644
+--- a/drivers/gpu/drm/amd/include/amd_shared.h
++++ b/drivers/gpu/drm/amd/include/amd_shared.h
+@@ -295,6 +295,7 @@ struct amd_ip_funcs {
+ int (*hw_init)(void *handle);
+ int (*hw_fini)(void *handle);
+ void (*late_fini)(void *handle);
++ int (*prepare_suspend)(void *handle);
+ int (*suspend)(void *handle);
+ int (*resume)(void *handle);
+ bool (*is_idle)(void *handle);
+diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
+index 7352bde299d54..03bd3c7bd0dc2 100644
+--- a/drivers/gpu/drm/drm_prime.c
++++ b/drivers/gpu/drm/drm_prime.c
+@@ -582,7 +582,12 @@ int drm_gem_map_attach(struct dma_buf *dma_buf,
+ {
+ struct drm_gem_object *obj = dma_buf->priv;
+
+- if (!obj->funcs->get_sg_table)
++ /*
++ * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers
++ * that implement their own ->map_dma_buf() do not.
++ */
++ if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf &&
++ !obj->funcs->get_sg_table)
+ return -ENOSYS;
+
+ return drm_gem_pin(obj);
+diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
+index 79f65eff6bb2a..23400313d8a64 100644
+--- a/drivers/gpu/drm/i915/Makefile
++++ b/drivers/gpu/drm/i915/Makefile
+@@ -104,6 +104,7 @@ gt-y += \
+ gt/intel_ggtt_fencing.o \
+ gt/intel_gt.o \
+ gt/intel_gt_buffer_pool.o \
++ gt/intel_gt_ccs_mode.o \
+ gt/intel_gt_clock_utils.o \
+ gt/intel_gt_debugfs.o \
+ gt/intel_gt_engines_debugfs.o \
+diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
+index b342fad180ca5..61df6cd3f3778 100644
+--- a/drivers/gpu/drm/i915/display/intel_cursor.c
++++ b/drivers/gpu/drm/i915/display/intel_cursor.c
+@@ -23,6 +23,8 @@
+ #include "intel_psr.h"
+ #include "skl_watermark.h"
+
++#include "gem/i915_gem_object.h"
++
+ /* Cursor formats */
+ static const u32 intel_cursor_formats[] = {
+ DRM_FORMAT_ARGB8888,
+@@ -32,12 +34,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
+ {
+ struct drm_i915_private *dev_priv =
+ to_i915(plane_state->uapi.plane->dev);
+- const struct drm_framebuffer *fb = plane_state->hw.fb;
+- const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+ u32 base;
+
+ if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
+- base = sg_dma_address(obj->mm.pages->sgl);
++ base = plane_state->phys_dma_addr;
+ else
+ base = intel_plane_ggtt_offset(plane_state);
+
+diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
+index 7fc92b1474cc4..8b0dc2b75da4a 100644
+--- a/drivers/gpu/drm/i915/display/intel_display_types.h
++++ b/drivers/gpu/drm/i915/display/intel_display_types.h
+@@ -701,6 +701,7 @@ struct intel_plane_state {
+ #define PLANE_HAS_FENCE BIT(0)
+
+ struct intel_fb_view view;
++ u32 phys_dma_addr; /* for cursor_needs_physical */
+
+ /* Plane pxp decryption state */
+ bool decrypt;
+diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
+index fffd568070d41..a131656757f2b 100644
+--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
++++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
+@@ -254,6 +254,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
+ return PTR_ERR(vma);
+
+ plane_state->ggtt_vma = vma;
++
++ /*
++ * Pre-populate the dma address before we enter the vblank
++ * evade critical section as i915_gem_object_get_dma_address()
++ * will trigger might_sleep() even if it won't actually sleep,
++ * which is the case when the fb has already been pinned.
++ */
++ if (phys_cursor)
++ plane_state->phys_dma_addr =
++ i915_gem_object_get_dma_address(intel_fb_obj(fb), 0);
+ } else {
+ struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+
+diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
+index ffc15d278a39d..d557ecd4e1ebe 100644
+--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
++++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
+@@ -20,6 +20,7 @@
+ #include "skl_scaler.h"
+ #include "skl_universal_plane.h"
+ #include "skl_watermark.h"
++#include "gt/intel_gt.h"
+ #include "pxp/intel_pxp.h"
+
+ static const u32 skl_plane_formats[] = {
+@@ -2169,8 +2170,8 @@ static bool skl_plane_has_rc_ccs(struct drm_i915_private *i915,
+ enum pipe pipe, enum plane_id plane_id)
+ {
+ /* Wa_14017240301 */
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++ if (IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 71), STEP_A0, STEP_B0))
+ return false;
+
+ /* Wa_22011186057 */
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
+index d24c0ce8805c7..19156ba4b9ef4 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
+@@ -405,8 +405,8 @@ static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+ BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+ offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+- /* Limiting the extension only to Meteor Lake */
+- if (!IS_METEORLAKE(i915))
++ /* Limiting the extension only to Xe_LPG and beyond */
++ if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70))
+ return -ENODEV;
+
+ if (copy_from_user(&ext, base, sizeof(ext)))
+diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+index 7ad36198aab2a..cddf8c16e9a72 100644
+--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
++++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+@@ -4,9 +4,9 @@
+ */
+
+ #include "gen8_engine_cs.h"
+-#include "i915_drv.h"
+ #include "intel_engine_regs.h"
+ #include "intel_gpu_commands.h"
++#include "intel_gt.h"
+ #include "intel_lrc.h"
+ #include "intel_ring.h"
+
+@@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
+ static int mtl_dummy_pipe_control(struct i915_request *rq)
+ {
+ /* Wa_14016712196 */
+- if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) {
++ if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
++ IS_DG2(rq->i915)) {
+ u32 *cs;
+
+ /* dummy PIPE_CONTROL + depth flush */
+@@ -808,6 +808,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
+ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
+ {
+ struct drm_i915_private *i915 = rq->i915;
++ struct intel_gt *gt = rq->engine->gt;
+ u32 flags = (PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TLB_INVALIDATE |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+@@ -818,8 +819,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
+ PIPE_CONTROL_FLUSH_ENABLE);
+
+ /* Wa_14016712196 */
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
+ /* dummy PIPE_CONTROL + depth flush */
+ cs = gen12_emit_pipe_control(cs, 0,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+index e85d70a62123f..765387639dabb 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+@@ -912,6 +912,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
+ info->engine_mask &= ~BIT(GSC0);
+ }
+
++ /*
++ * Do not create the command streamer for CCS slices beyond the first.
++ * All the workload submitted to the first engine will be shared among
++ * all the slices.
++ *
++ * Once the user will be allowed to customize the CCS mode, then this
++ * check needs to be removed.
++ */
++ if (IS_DG2(gt->i915)) {
++ u8 first_ccs = __ffs(CCS_MASK(gt));
++
++ /* Mask off all the CCS engine */
++ info->engine_mask &= ~GENMASK(CCS3, CCS0);
++ /* Put back in the first CCS engine */
++ info->engine_mask |= BIT(_CCS(first_ccs));
++ }
++
+ return info->engine_mask;
+ }
+
+@@ -1616,9 +1633,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
+ * Wa_22011802037: Prior to doing a reset, ensure CS is
+ * stopped, set ring stop bit and prefetch disable bit to halt CS
+ */
+- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+- (GRAPHICS_VER(engine->i915) >= 11 &&
+- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
++ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
+ intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
+ _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+index a95615b345cd7..5a3a5b29d1507 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+@@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine)
+ {
+ struct drm_i915_private *i915 = engine->i915;
+
+- if (IS_METEORLAKE(i915) && engine->id == GSC0) {
++ if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) {
+ intel_uncore_write(engine->gt->uncore,
+ RC_PSMI_CTRL_GSCCS,
+ _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
+diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+index 5a720e2523126..42e09f1589205 100644
+--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+@@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
+ * Wa_22011802037: In addition to stopping the cs, we need
+ * to wait for any pending mi force wakeups
+ */
+- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+- (GRAPHICS_VER(engine->i915) >= 11 &&
+- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
++ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
+ intel_engine_wait_for_pending_mi_fw(engine);
+
+ engine->execlists.reset_ccid = active_ccid(engine);
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
+index 6c34547b58b59..6e63b46682f76 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt.h
+@@ -14,6 +14,37 @@
+ struct drm_i915_private;
+ struct drm_printer;
+
++/*
++ * Check that the GT is a graphics GT and has an IP version within the
++ * specified range (inclusive).
++ */
++#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \
++ BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \
++ BUILD_BUG_ON_ZERO((until) < (from)) + \
++ ((gt)->type != GT_MEDIA && \
++ GRAPHICS_VER_FULL((gt)->i915) >= (from) && \
++ GRAPHICS_VER_FULL((gt)->i915) <= (until)))
++
++/*
++ * Check that the GT is a graphics GT with a specific IP version and has
++ * a stepping in the range [from, until). The lower stepping bound is
++ * inclusive, the upper bound is exclusive. The most common use-case of this
++ * macro is for checking bounds for workarounds, which usually have a stepping
++ * ("from") at which the hardware issue is first present and another stepping
++ * ("until") at which a hardware fix is present and the software workaround is
++ * no longer necessary. E.g.,
++ *
++ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)
++ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER)
++ *
++ * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper
++ * stepping bound for the specified IP version.
++ */
++#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \
++ BUILD_BUG_ON_ZERO((until) <= (from)) + \
++ (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \
++ IS_GRAPHICS_STEP((gt)->i915, (from), (until))))
++
+ #define GT_TRACE(gt, fmt, ...) do { \
+ const struct intel_gt *gt__ __maybe_unused = (gt); \
+ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+new file mode 100644
+index 0000000000000..044219c5960a5
+--- /dev/null
++++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+@@ -0,0 +1,39 @@
++// SPDX-License-Identifier: MIT
++/*
++ * Copyright © 2024 Intel Corporation
++ */
++
++#include "i915_drv.h"
++#include "intel_gt.h"
++#include "intel_gt_ccs_mode.h"
++#include "intel_gt_regs.h"
++
++void intel_gt_apply_ccs_mode(struct intel_gt *gt)
++{
++ int cslice;
++ u32 mode = 0;
++ int first_ccs = __ffs(CCS_MASK(gt));
++
++ if (!IS_DG2(gt->i915))
++ return;
++
++ /* Build the value for the fixed CCS load balancing */
++ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
++ if (CCS_MASK(gt) & BIT(cslice))
++ /*
++ * If available, assign the cslice
++ * to the first available engine...
++ */
++ mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
++
++ else
++ /*
++ * ... otherwise, mark the cslice as
++ * unavailable if no CCS dispatches here
++ */
++ mode |= XEHP_CCS_MODE_CSLICE(cslice,
++ XEHP_CCS_MODE_CSLICE_MASK);
++ }
++
++ intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
++}
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
+new file mode 100644
+index 0000000000000..9e5549caeb269
+--- /dev/null
++++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
+@@ -0,0 +1,13 @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Copyright © 2024 Intel Corporation
++ */
++
++#ifndef __INTEL_GT_CCS_MODE_H__
++#define __INTEL_GT_CCS_MODE_H__
++
++struct intel_gt;
++
++void intel_gt_apply_ccs_mode(struct intel_gt *gt);
++
++#endif /* __INTEL_GT_CCS_MODE_H__ */
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+index 2c0f1f3e28ff8..c6dec485aefbe 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
++++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+@@ -3,8 +3,7 @@
+ * Copyright © 2022 Intel Corporation
+ */
+
+-#include "i915_drv.h"
+-
++#include "intel_gt.h"
+ #include "intel_gt_mcr.h"
+ #include "intel_gt_print.h"
+ #include "intel_gt_regs.h"
+@@ -166,8 +165,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
+ gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
+ } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+ /* Wa_14016747170 */
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
+ fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK,
+ intel_uncore_read(gt->uncore,
+ MTL_GT_ACTIVITY_FACTOR));
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+index 2cdfb2f713d02..64acab146b52f 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+@@ -1468,8 +1468,14 @@
+ #define ECOBITS_PPGTT_CACHE4B (0 << 8)
+
+ #define GEN12_RCU_MODE _MMIO(0x14800)
++#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
+ #define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
+
++#define XEHP_CCS_MODE _MMIO(0x14804)
++#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
++#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
++#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
++
+ #define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168)
+ #define CHV_FGT_DISABLE_SS0 (1 << 10)
+ #define CHV_FGT_DISABLE_SS1 (1 << 11)
+diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
+index c378cc7c953c4..b99efa348ad1e 100644
+--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
++++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
+@@ -1316,29 +1316,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
+ return cs;
+ }
+
+-/*
+- * On DG2 during context restore of a preempted context in GPGPU mode,
+- * RCS restore hang is detected. This is extremely timing dependent.
+- * To address this below sw wabb is implemented for DG2 A steppings.
+- */
+-static u32 *
+-dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
+-{
+- *cs++ = MI_LOAD_REGISTER_IMM(1);
+- *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base));
+- *cs++ = 0x21;
+-
+- *cs++ = MI_LOAD_REGISTER_REG;
+- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
+- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1);
+-
+- *cs++ = MI_LOAD_REGISTER_REG;
+- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
+- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2);
+-
+- return cs;
+-}
+-
+ /*
+ * The bspec's tuning guide asks us to program a vertical watermark value of
+ * 0x3FF. However this register is not saved/restored properly by the
+@@ -1363,21 +1340,15 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
+ cs = gen12_emit_cmd_buf_wa(ce, cs);
+ cs = gen12_emit_restore_scratch(ce, cs);
+
+- /* Wa_22011450934:dg2 */
+- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
+- IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
+- cs = dg2_emit_rcs_hang_wabb(ce, cs);
+-
+ /* Wa_16013000631:dg2 */
+- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
+- IS_DG2_G11(ce->engine->i915))
++ if (IS_DG2_G11(ce->engine->i915))
+ cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
+
+ cs = gen12_emit_aux_table_inv(ce->engine, cs);
+
+ /* Wa_16014892111 */
+- if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) ||
++ if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(ce->engine->i915))
+ cs = dg2_emit_draw_watermark_setting(cs);
+
+@@ -1391,8 +1362,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
+ cs = gen12_emit_restore_scratch(ce, cs);
+
+ /* Wa_16013000631:dg2 */
+- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
+- IS_DG2_G11(ce->engine->i915))
++ if (IS_DG2_G11(ce->engine->i915))
+ if (ce->engine->class == COMPUTE_CLASS)
+ cs = gen8_emit_pipe_control(cs,
+ PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
+diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
+index 2c014407225cc..07269ff3be136 100644
+--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
++++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
+@@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
+ MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+ };
+
+-static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
+- /* Wa_14011441408: Set Go to Memory for MOCS#0 */
+- MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+- /* UC - Coherent; GO:Memory */
+- MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+- /* UC - Non-Coherent; GO:Memory */
+- MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+-
+- /* WB - LC */
+- MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+-};
+-
+ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
+ /* Error */
+ MOCS_ENTRY(0, 0, L3_3_WB),
+@@ -507,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
+ memset(table, 0, sizeof(struct drm_i915_mocs_table));
+
+ table->unused_entries_index = I915_MOCS_PTE;
+- if (IS_METEORLAKE(i915)) {
++ if (IS_GFX_GT_IP_RANGE(&i915->gt0, IP_VER(12, 70), IP_VER(12, 71))) {
+ table->size = ARRAY_SIZE(mtl_mocs_table);
+ table->table = mtl_mocs_table;
+ table->n_entries = MTL_NUM_MOCS_ENTRIES;
+@@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
+ table->wb_index = 2;
+ table->unused_entries_index = 2;
+ } else if (IS_DG2(i915)) {
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
+- table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
+- table->table = dg2_mocs_table_g10_ax;
+- } else {
+- table->size = ARRAY_SIZE(dg2_mocs_table);
+- table->table = dg2_mocs_table;
+- }
++ table->size = ARRAY_SIZE(dg2_mocs_table);
++ table->table = dg2_mocs_table;
+ table->uc_index = 1;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->unused_entries_index = 3;
+diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
+index ccdc1afbf11b5..9e113e9473260 100644
+--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
++++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
+@@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
+ GEN6_RC_CTL_EI_MODE(1);
+
+ /*
+- * Wa_16011777198 and BSpec 52698 - Render powergating must be off.
++ * BSpec 52698 - Render powergating must be off.
+ * FIXME BSpec is outdated, disabling powergating for MTL is just
+ * temporary wa and should be removed after fixing real cause
+ * of forcewake timeouts.
+ */
+- if (IS_METEORLAKE(gt->i915) ||
+- IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
+ pg_enable =
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE;
+diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
+index 5fa57a34cf4bb..13fb8e5042c58 100644
+--- a/drivers/gpu/drm/i915/gt/intel_reset.c
++++ b/drivers/gpu/drm/i915/gt/intel_reset.c
+@@ -705,7 +705,7 @@ static int __reset_guc(struct intel_gt *gt)
+
+ static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+ {
+- if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0))
++ if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0))
+ return false;
+
+ if (!__HAS_ENGINE(engine_mask, GSC0))
+@@ -1632,6 +1632,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
+ w->gt = NULL;
+ }
+
++/*
++ * Wa_22011802037 requires that we (or the GuC) ensure that no command
++ * streamers are executing MI_FORCE_WAKE while an engine reset is initiated.
++ */
++bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt)
++{
++ if (GRAPHICS_VER(gt->i915) < 11)
++ return false;
++
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0))
++ return true;
++
++ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
++ return false;
++
++ return true;
++}
++
+ #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+ #include "selftest_reset.c"
+ #include "selftest_hangcheck.c"
+diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
+index 25c975b6e8fc0..f615b30b81c59 100644
+--- a/drivers/gpu/drm/i915/gt/intel_reset.h
++++ b/drivers/gpu/drm/i915/gt/intel_reset.h
+@@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
+ bool intel_has_gpu_reset(const struct intel_gt *gt);
+ bool intel_has_reset_engine(const struct intel_gt *gt);
+
++bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt);
++
+ #endif /* I915_RESET_H */
+diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
+index 092542f53aad9..4feef874e6d69 100644
+--- a/drivers/gpu/drm/i915/gt/intel_rps.c
++++ b/drivers/gpu/drm/i915/gt/intel_rps.c
+@@ -1161,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
+ {
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+- if (IS_METEORLAKE(i915))
++ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ return mtl_get_freq_caps(rps, caps);
+ else
+ return __gen6_rps_get_freq_caps(rps, caps);
+diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
+index 3ae0dbd39eaa3..be060b32bd9ce 100644
+--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
+@@ -10,6 +10,7 @@
+ #include "intel_engine_regs.h"
+ #include "intel_gpu_commands.h"
+ #include "intel_gt.h"
++#include "intel_gt_ccs_mode.h"
+ #include "intel_gt_mcr.h"
+ #include "intel_gt_regs.h"
+ #include "intel_ring.h"
+@@ -50,7 +51,8 @@
+ * registers belonging to BCS, VCS or VECS should be implemented in
+ * xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
+ * engine's MMIO range but that are part of of the common RCS/CCS reset domain
+- * should be implemented in general_render_compute_wa_init().
++ * should be implemented in general_render_compute_wa_init(). The settings
++ * about the CCS load balancing should be added in ccs_engine_wa_mode().
+ *
+ * - GT workarounds: the list of these WAs is applied whenever these registers
+ * revert to their default values: on GPU reset, suspend/resume [1]_, etc.
+@@ -764,39 +766,15 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
+ {
+ dg2_ctx_gt_tuning_init(engine, wal);
+
+- /* Wa_16011186671:dg2_g11 */
+- if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+- wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
+- wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
+- }
+-
+- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+- /* Wa_14010469329:dg2_g10 */
+- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+- XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
+-
+- /*
+- * Wa_22010465075:dg2_g10
+- * Wa_22010613112:dg2_g10
+- * Wa_14010698770:dg2_g10
+- */
+- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+- GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+- }
+-
+ /* Wa_16013271637:dg2 */
+ wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
+ MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
+
+ /* Wa_14014947963:dg2 */
+- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
+- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
+- wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
++ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
+
+ /* Wa_18018764978:dg2 */
+- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
+- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
+- wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
++ wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
+
+ /* Wa_15010599737:dg2 */
+ wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
+@@ -805,27 +783,32 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
+ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
+ }
+
+-static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine,
+- struct i915_wa_list *wal)
++static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
++ struct i915_wa_list *wal)
+ {
+- struct drm_i915_private *i915 = engine->i915;
++ struct intel_gt *gt = engine->gt;
+
+ dg2_ctx_gt_tuning_init(engine, wal);
+
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
++ /*
++ * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
++ * gen12_emit_indirect_ctx_rcs() rather than here on some early
++ * steppings.
++ */
++ if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
+ wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
+ }
+
+-static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
+- struct i915_wa_list *wal)
++static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
++ struct i915_wa_list *wal)
+ {
+- struct drm_i915_private *i915 = engine->i915;
++ struct intel_gt *gt = engine->gt;
+
+- mtl_ctx_gt_tuning_init(engine, wal);
++ xelpg_ctx_gt_tuning_init(engine, wal);
+
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
+ /* Wa_14014947963 */
+ wa_masked_field_set(wal, VF_PREEMPTION,
+ PREEMPTION_VERTEX_COUNT, 0x4000);
+@@ -931,8 +914,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
+ if (engine->class != RENDER_CLASS)
+ goto done;
+
+- if (IS_METEORLAKE(i915))
+- mtl_ctx_workarounds_init(engine, wal);
++ if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
++ xelpg_ctx_workarounds_init(engine, wal);
+ else if (IS_PONTEVECCHIO(i915))
+ ; /* noop; none at this time */
+ else if (IS_DG2(i915))
+@@ -1606,31 +1589,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ static void
+ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ {
+- struct intel_engine_cs *engine;
+- int id;
+-
+ xehp_init_mcr(gt, wal);
+
+ /* Wa_14011060649:dg2 */
+ wa_14011060649(gt, wal);
+
+- /*
+- * Although there are per-engine instances of these registers,
+- * they technically exist outside the engine itself and are not
+- * impacted by engine resets. Furthermore, they're part of the
+- * GuC blacklist so trying to treat them as engine workarounds
+- * will result in GuC initialization failure and a wedged GPU.
+- */
+- for_each_engine(engine, gt, id) {
+- if (engine->class != VIDEO_DECODE_CLASS)
+- continue;
+-
+- /* Wa_16010515920:dg2_g10 */
+- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
+- wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
+- ALNUNIT_CLKGATE_DIS);
+- }
+-
+ if (IS_DG2_G10(gt->i915)) {
+ /* Wa_22010523718:dg2 */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+@@ -1641,65 +1604,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ DSS_ROUTER_CLKGATE_DIS);
+ }
+
+- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
+- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
+- /* Wa_14012362059:dg2 */
+- wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+- }
+-
+- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
+- /* Wa_14010948348:dg2_g10 */
+- wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
+-
+- /* Wa_14011037102:dg2_g10 */
+- wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
+-
+- /* Wa_14011371254:dg2_g10 */
+- wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
+-
+- /* Wa_14011431319:dg2_g10 */
+- wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
+- GAMTLBVDBOX7_CLKGATE_DIS |
+- GAMTLBVDBOX6_CLKGATE_DIS |
+- GAMTLBVDBOX5_CLKGATE_DIS |
+- GAMTLBVDBOX4_CLKGATE_DIS |
+- GAMTLBVDBOX3_CLKGATE_DIS |
+- GAMTLBVDBOX2_CLKGATE_DIS |
+- GAMTLBVDBOX1_CLKGATE_DIS |
+- GAMTLBVDBOX0_CLKGATE_DIS |
+- GAMTLBKCR_CLKGATE_DIS |
+- GAMTLBGUC_CLKGATE_DIS |
+- GAMTLBBLT_CLKGATE_DIS);
+- wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
+- GAMTLBGFXA1_CLKGATE_DIS |
+- GAMTLBCOMPA0_CLKGATE_DIS |
+- GAMTLBCOMPA1_CLKGATE_DIS |
+- GAMTLBCOMPB0_CLKGATE_DIS |
+- GAMTLBCOMPB1_CLKGATE_DIS |
+- GAMTLBCOMPC0_CLKGATE_DIS |
+- GAMTLBCOMPC1_CLKGATE_DIS |
+- GAMTLBCOMPD0_CLKGATE_DIS |
+- GAMTLBCOMPD1_CLKGATE_DIS |
+- GAMTLBMERT_CLKGATE_DIS |
+- GAMTLBVEBOX3_CLKGATE_DIS |
+- GAMTLBVEBOX2_CLKGATE_DIS |
+- GAMTLBVEBOX1_CLKGATE_DIS |
+- GAMTLBVEBOX0_CLKGATE_DIS);
+-
+- /* Wa_14010569222:dg2_g10 */
+- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+- GAMEDIA_CLKGATE_DIS);
+-
+- /* Wa_14011028019:dg2_g10 */
+- wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+-
+- /* Wa_14010680813:dg2_g10 */
+- wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL,
+- CONTROL_BLOCK_CLKGATE_DIS |
+- EGRESS_BLOCK_CLKGATE_DIS |
+- TAG_BLOCK_CLKGATE_DIS);
+- }
+-
+ /* Wa_14014830051:dg2 */
+ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+
+@@ -1741,14 +1645,15 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ static void
+ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ {
+- /* Wa_14018778641 / Wa_18018781329 */
++ /* Wa_14018575942 / Wa_18018781329 */
++ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+
+ /* Wa_22016670082 */
+ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
+
+- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) {
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
+ /* Wa_14014830051 */
+ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+
+@@ -1791,10 +1696,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ */
+ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
+ {
+- if (IS_METEORLAKE(gt->i915)) {
+- if (gt->type != GT_MEDIA)
+- wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+-
++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
++ wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+ wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
+ }
+
+@@ -1826,7 +1729,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
+ return;
+ }
+
+- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
+ xelpg_gt_workarounds_init(gt, wal);
+ else if (IS_PONTEVECCHIO(i915))
+ pvc_gt_workarounds_init(gt, wal);
+@@ -2242,29 +2145,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine)
+
+ switch (engine->class) {
+ case RENDER_CLASS:
+- /*
+- * Wa_1507100340:dg2_g10
+- *
+- * This covers 4 registers which are next to one another :
+- * - PS_INVOCATION_COUNT
+- * - PS_INVOCATION_COUNT_UDW
+- * - PS_DEPTH_COUNT
+- * - PS_DEPTH_COUNT_UDW
+- */
+- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
+- whitelist_reg_ext(w, PS_INVOCATION_COUNT,
+- RING_FORCE_TO_NONPRIV_ACCESS_RD |
+- RING_FORCE_TO_NONPRIV_RANGE_4);
+-
+ /* Required by recommended tuning setting (not a workaround) */
+ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
+
+ break;
+- case COMPUTE_CLASS:
+- /* Wa_16011157294:dg2_g10 */
+- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
+- whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
+- break;
+ default:
+ break;
+ }
+@@ -2294,7 +2178,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine)
+ blacklist_trtt(engine);
+ }
+
+-static void mtl_whitelist_build(struct intel_engine_cs *engine)
++static void xelpg_whitelist_build(struct intel_engine_cs *engine)
+ {
+ struct i915_wa_list *w = &engine->whitelist;
+
+@@ -2316,8 +2200,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
+
+ wa_init_start(w, engine->gt, "whitelist", engine->name);
+
+- if (IS_METEORLAKE(i915))
+- mtl_whitelist_build(engine);
++ if (engine->gt->type == GT_MEDIA)
++ ; /* none yet */
++ else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
++ xelpg_whitelist_build(engine);
+ else if (IS_PONTEVECCHIO(i915))
+ pvc_whitelist_build(engine);
+ else if (IS_DG2(i915))
+@@ -2415,62 +2301,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ }
+ }
+
+-static bool needs_wa_1308578152(struct intel_engine_cs *engine)
+-{
+- return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
+- GEN_DSS_PER_GSLICE;
+-}
+-
+ static void
+ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ {
+ struct drm_i915_private *i915 = engine->i915;
++ struct intel_gt *gt = engine->gt;
+
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
+ /* Wa_22014600077 */
+ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
+ ENABLE_EU_COUNT_FOR_TDL_FLUSH);
+ }
+
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
+- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
++ IS_DG2(i915)) {
+ /* Wa_1509727124 */
+ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+ SC_DISABLE_POWER_OPTIMIZATION_EBB);
+ }
+
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+- IS_DG2_G11(i915) || IS_DG2_G12(i915) ||
+- IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) {
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_DG2(i915)) {
+ /* Wa_22012856258 */
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+ GEN12_DISABLE_READ_SUPPRESSION);
+ }
+
+- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
+- /* Wa_14013392000:dg2_g11 */
+- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
+- }
+-
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
+- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
+- /* Wa_14012419201:dg2 */
+- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
+- GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
+- }
+-
+- /* Wa_1308578152:dg2_g10 when first gslice is fused off */
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
+- needs_wa_1308578152(engine)) {
+- wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
+- GEN12_REPLAY_MODE_GRANULARITY);
+- }
+-
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
++ if (IS_DG2(i915)) {
+ /*
+ * Wa_22010960976:dg2
+ * Wa_14013347512:dg2
+@@ -2479,34 +2338,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
+ }
+
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
+- /*
+- * Wa_1608949956:dg2_g10
+- * Wa_14010198302:dg2_g10
+- */
+- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+- MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+- }
+-
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
+- /* Wa_22010430635:dg2 */
+- wa_mcr_masked_en(wal,
+- GEN9_ROW_CHICKEN4,
+- GEN12_DISABLE_GRF_CLEAR);
+-
+- /* Wa_14013202645:dg2 */
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
+- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
+- wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+-
+- /* Wa_22012532006:dg2 */
+- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
+- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+- wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+-
+- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
+- IS_DG2_G10(i915)) {
++ if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
+ /* Wa_22014600077:dg2 */
+ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+ _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
+@@ -2514,6 +2346,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ true);
+ }
+
++ if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
++ IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
++ /*
++ * Wa_1606700617:tgl,dg1,adl-p
++ * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
++ * Wa_14010826681:tgl,dg1,rkl,adl-p
++ * Wa_18019627453:dg2
++ */
++ wa_masked_en(wal,
++ GEN9_CS_DEBUG_MODE1,
++ FF_DOP_CLOCK_GATE_DISABLE);
++ }
++
+ if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
+ IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+ /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
+@@ -2527,19 +2372,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ */
+ wa_write_or(wal, GEN7_FF_THREAD_MODE,
+ GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+- }
+
+- if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
+- IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+- /*
+- * Wa_1606700617:tgl,dg1,adl-p
+- * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
+- * Wa_14010826681:tgl,dg1,rkl,adl-p
+- * Wa_18019627453:dg2
+- */
+- wa_masked_en(wal,
+- GEN9_CS_DEBUG_MODE1,
+- FF_DOP_CLOCK_GATE_DISABLE);
++ /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
++ wa_mcr_masked_en(wal,
++ GEN10_SAMPLER_MODE,
++ ENABLE_SMALLPL);
+ }
+
+ if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
+@@ -2566,14 +2403,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ GEN8_RC_SEMA_IDLE_MSG_DISABLE);
+ }
+
+- if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
+- IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
+- /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
+- wa_mcr_masked_en(wal,
+- GEN10_SAMPLER_MODE,
+- ENABLE_SMALLPL);
+- }
+-
+ if (GRAPHICS_VER(i915) == 11) {
+ /* This is not an Wa. Enable for better image quality */
+ wa_masked_en(wal,
+@@ -2975,10 +2804,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ * function invoked by __intel_engine_init_ctx_wa().
+ */
+ static void
+-add_render_compute_tuning_settings(struct drm_i915_private *i915,
++add_render_compute_tuning_settings(struct intel_gt *gt,
+ struct i915_wa_list *wal)
+ {
+- if (IS_METEORLAKE(i915) || IS_DG2(i915))
++ struct drm_i915_private *i915 = gt->i915;
++
++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
+ wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
+
+ /*
+@@ -2994,6 +2825,28 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
+ wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
+ }
+
++static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
++{
++ struct intel_gt *gt = engine->gt;
++
++ if (!IS_DG2(gt->i915))
++ return;
++
++ /*
++ * Wa_14019159160: This workaround, along with others, leads to
++ * significant challenges in utilizing load balancing among the
++ * CCS slices. Consequently, an architectural decision has been
++ * made to completely disable automatic CCS load balancing.
++ */
++ wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
++
++ /*
++ * After having disabled automatic load balancing we need to
++ * assign all slices to a single CCS. We will call it CCS mode 1
++ */
++ intel_gt_apply_ccs_mode(gt);
++}
++
+ /*
+ * The workarounds in this function apply to shared registers in
+ * the general render reset domain that aren't tied to a
+@@ -3007,8 +2860,9 @@ static void
+ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ {
+ struct drm_i915_private *i915 = engine->i915;
++ struct intel_gt *gt = engine->gt;
+
+- add_render_compute_tuning_settings(i915, wal);
++ add_render_compute_tuning_settings(gt, wal);
+
+ if (GRAPHICS_VER(i915) >= 11) {
+ /* This is not a Wa (although referred to as
+@@ -3029,13 +2883,14 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
+ GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
+ }
+
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
++ IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74)))
+ /* Wa_14017856879 */
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
+
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
+ /*
+ * Wa_14017066071
+ * Wa_14017654203
+@@ -3043,37 +2898,47 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
+ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+ MTL_DISABLE_SAMPLER_SC_OOO);
+
+- if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
+ /* Wa_22015279794 */
+ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
+ DISABLE_PREFETCH_INTO_IC);
+
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
+- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
++ IS_DG2(i915)) {
+ /* Wa_22013037850 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
+ DISABLE_128B_EVICTION_COMMAND_UDW);
++
++ /* Wa_18017747507 */
++ wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
+ }
+
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_PONTEVECCHIO(i915) ||
+ IS_DG2(i915)) {
+ /* Wa_22014226127 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+ }
+
+- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
+- IS_DG2(i915)) {
+- /* Wa_18017747507 */
+- wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
++ if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) {
++ /* Wa_14015227452:dg2,pvc */
++ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
++
++ /* Wa_16015675438:dg2,pvc */
++ wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
++ }
++
++ if (IS_DG2(i915)) {
++ /*
++ * Wa_16011620976:dg2_g11
++ * Wa_22015475538:dg2
++ */
++ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+ }
+
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
+- IS_DG2_G11(i915)) {
++ if (IS_DG2_G11(i915)) {
+ /*
+ * Wa_22012826095:dg2
+ * Wa_22013059131:dg2
+@@ -3085,18 +2950,18 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
+ /* Wa_22013059131:dg2 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
+ FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
+- }
+
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
+ /*
+- * Wa_14010918519:dg2_g10
++ * Wa_22012654132
+ *
+- * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
+- * so ignoring verification.
++ * Note that register 0xE420 is write-only and cannot be read
++ * back for verification on DG2 (due to Wa_14012342262), so
++ * we need to explicitly skip the readback.
+ */
+- wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
+- FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
+- 0, false);
++ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
++ _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
++ 0 /* write-only, so skip validation */,
++ true);
+ }
+
+ if (IS_XEHPSDV(i915)) {
+@@ -3114,35 +2979,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
+ wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+ }
+-
+- if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
+- /* Wa_14015227452:dg2,pvc */
+- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+-
+- /* Wa_16015675438:dg2,pvc */
+- wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
+- }
+-
+- if (IS_DG2(i915)) {
+- /*
+- * Wa_16011620976:dg2_g11
+- * Wa_22015475538:dg2
+- */
+- wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+- }
+-
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
+- /*
+- * Wa_22012654132
+- *
+- * Note that register 0xE420 is write-only and cannot be read
+- * back for verification on DG2 (due to Wa_14012342262), so
+- * we need to explicitly skip the readback.
+- */
+- wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+- _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+- 0 /* write-only, so skip validation */,
+- true);
+ }
+
+ static void
+@@ -3158,8 +2994,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
+ * to a single RCS/CCS engine's workaround list since
+ * they're reset as part of the general render domain reset.
+ */
+- if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
++ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
+ general_render_compute_wa_init(engine, wal);
++ ccs_engine_wa_mode(engine, wal);
++ }
+
+ if (engine->class == COMPUTE_CLASS)
+ ccs_engine_wa_init(engine, wal);
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+index 569b5fe94c416..861d0c58388cf 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+@@ -272,18 +272,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
+ GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
+ flags |= GUC_WA_POLLCS;
+
+- /* Wa_16011759253:dg2_g10:a0 */
+- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
+- flags |= GUC_WA_GAM_CREDITS;
+-
+ /* Wa_14014475959 */
+- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_DG2(gt->i915))
+ flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+
+ /*
+- * Wa_14012197797:dg2_g10:a0,dg2_g11:a0
+- * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12
++ * Wa_14012197797
++ * Wa_22011391025
+ *
+ * The same WA bit is used for both and 22011391025 is applicable to
+ * all DG2.
+@@ -292,22 +288,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
+ flags |= GUC_WA_DUAL_QUEUE;
+
+ /* Wa_22011802037: graphics version 11/12 */
+- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
+- (GRAPHICS_VER(gt->i915) >= 11 &&
+- GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70)))
++ if (intel_engine_reset_needs_wa_22011802037(gt))
+ flags |= GUC_WA_PRE_PARSER;
+
+- /* Wa_16011777198:dg2 */
+- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
+- flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
+-
+ /*
+- * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..)
+- * Wa_22012727685:dg2_g11[a0..)
++ * Wa_22012727170
++ * Wa_22012727685
+ */
+- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
++ if (IS_DG2_G11(gt->i915))
+ flags |= GUC_WA_CONTEXT_ISOLATION;
+
+ /* Wa_16015675438 */
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+index 836e4d9d65ef6..b5de5a9f59671 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+@@ -1690,9 +1690,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
+ * Wa_22011802037: In addition to stopping the cs, we need
+ * to wait for any pending mi force wakeups
+ */
+- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+- (GRAPHICS_VER(engine->i915) >= 11 &&
+- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) {
++ if (intel_engine_reset_needs_wa_22011802037(engine->gt)) {
+ intel_engine_stop_cs(engine);
+ intel_engine_wait_for_pending_mi_fw(engine);
+ }
+@@ -4299,7 +4297,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
+
+ /* Wa_14014475959:dg2 */
+ if (engine->class == COMPUTE_CLASS)
+- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
++ if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_DG2(engine->i915))
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+
+diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
+index 4de44cf1026dc..7a90a2e32c9f1 100644
+--- a/drivers/gpu/drm/i915/i915_debugfs.c
++++ b/drivers/gpu/drm/i915/i915_debugfs.c
+@@ -144,7 +144,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj)
+ {
+ struct drm_i915_private *i915 = obj_to_i915(obj);
+
+- if (IS_METEORLAKE(i915)) {
++ if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) {
+ switch (obj->pat_index) {
+ case 0: return " WB";
+ case 1: return " WT";
+diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
+index 7a8ce7239bc9e..e0e0493d6c1f0 100644
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -658,10 +658,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
+ #define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \
+ (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until))
+
+-#define IS_MTL_GRAPHICS_STEP(__i915, variant, since, until) \
+- (IS_SUBPLATFORM(__i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_##variant) && \
+- IS_GRAPHICS_STEP(__i915, since, until))
+-
+ #define IS_MTL_DISPLAY_STEP(__i915, since, until) \
+ (IS_METEORLAKE(__i915) && \
+ IS_DISPLAY_STEP(__i915, since, until))
+diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
+index 8f4a25d2cfc24..3f90403d86cb4 100644
+--- a/drivers/gpu/drm/i915/i915_perf.c
++++ b/drivers/gpu/drm/i915/i915_perf.c
+@@ -3255,11 +3255,10 @@ get_sseu_config(struct intel_sseu *out_sseu,
+ */
+ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
+ {
+- /*
+- * Wa_18013179988:dg2
+- * Wa_14015846243:mtl
+- */
+- if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
++ struct intel_gt *gt = to_gt(i915);
++
++ /* Wa_18013179988 */
++ if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
+ intel_wakeref_t wakeref;
+ u32 reg, shift;
+
+@@ -4564,7 +4563,7 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
+
+ static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
+ {
+- if (IS_METEORLAKE(perf->i915))
++ if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
+ return reg_in_range_table(addr, mtl_oa_mux_regs);
+ else
+ return reg_in_range_table(addr, gen12_oa_mux_regs);
+diff --git a/drivers/gpu/drm/i915/intel_clock_gating.c b/drivers/gpu/drm/i915/intel_clock_gating.c
+index 81a4d32734e94..c66eb6abd4a2e 100644
+--- a/drivers/gpu/drm/i915/intel_clock_gating.c
++++ b/drivers/gpu/drm/i915/intel_clock_gating.c
+@@ -396,14 +396,6 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915)
+ /* Wa_22010954014:dg2 */
+ intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
+ SGSI_SIDECLK_DIS);
+-
+- /*
+- * Wa_14010733611:dg2_g10
+- * Wa_22010146351:dg2_g10
+- */
+- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
+- intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
+- SGR_DIS | SGGI_DIS);
+ }
+
+ static void pvc_init_clock_gating(struct drm_i915_private *i915)
+diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+index aae780e4a4aa3..2bbcdc649e862 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -804,15 +804,15 @@ op_remap(struct drm_gpuva_op_remap *r,
+ struct drm_gpuva_op_unmap *u = r->unmap;
+ struct nouveau_uvma *uvma = uvma_from_va(u->va);
+ u64 addr = uvma->va.va.addr;
+- u64 range = uvma->va.va.range;
++ u64 end = uvma->va.va.addr + uvma->va.va.range;
+
+ if (r->prev)
+ addr = r->prev->va.addr + r->prev->va.range;
+
+ if (r->next)
+- range = r->next->va.addr - addr;
++ end = r->next->va.addr;
+
+- op_unmap_range(u, addr, range);
++ op_unmap_range(u, addr, end - addr);
+ }
+
+ static int
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
+index eca45b83e4e67..c067ff550692a 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
+@@ -387,19 +387,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev)
+
+ gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
+ ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
+- val, !val, 1, 1000);
++ val, !val, 1, 2000);
+ if (ret)
+ dev_err(pfdev->dev, "shader power transition timeout");
+
+ gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
+ ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
+- val, !val, 1, 1000);
++ val, !val, 1, 2000);
+ if (ret)
+ dev_err(pfdev->dev, "tiler power transition timeout");
+
+ gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
+ ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
+- val, !val, 0, 1000);
++ val, !val, 0, 2000);
+ if (ret)
+ dev_err(pfdev->dev, "l2 power transition timeout");
+ }
+diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
+index e7cd27e387df1..470add73f7bda 100644
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -4231,7 +4231,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
+ } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
+ log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
+ } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
+- if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
++ if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+ r = -EINVAL;
+ ti->error = "Invalid bitmap_flush_interval argument";
+ goto bad;
+diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
+index 5ad51271a5349..b8fde22aebf93 100644
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -5386,8 +5386,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
+ .family = MV88E6XXX_FAMILY_6250,
+ .name = "Marvell 88E6020",
+ .num_databases = 64,
+- .num_ports = 4,
++ /* Ports 2-4 are not routed to pins
++ * => usable ports 0, 1, 5, 6
++ */
++ .num_ports = 7,
+ .num_internal_phys = 2,
++ .invalid_port_mask = BIT(2) | BIT(3) | BIT(4),
+ .max_vid = 4095,
+ .port_base_addr = 0x8,
+ .phy_base_addr = 0x0,
+diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
+index 833e55e4b9612..52ddb4ef259e9 100644
+--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
++++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
+@@ -94,7 +94,7 @@ int sja1110_pcs_mdio_read_c45(struct mii_bus *bus, int phy, int mmd, int reg)
+ return tmp & 0xffff;
+ }
+
+-int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd,
++int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg,
+ u16 val)
+ {
+ struct sja1105_mdio_private *mdio_priv = bus->priv;
+diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+index 9cae5a3090000..b3d04f49f77e9 100644
+--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
++++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+@@ -391,7 +391,9 @@ static void umac_reset(struct bcmasp_intf *intf)
+ umac_wl(intf, 0x0, UMC_CMD);
+ umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD);
+ usleep_range(10, 100);
+- umac_wl(intf, 0x0, UMC_CMD);
++ /* We hold the umac in reset and bring it out of
++ * reset when phy link is up.
++ */
+ }
+
+ static void umac_set_hw_addr(struct bcmasp_intf *intf,
+@@ -411,6 +413,8 @@ static void umac_enable_set(struct bcmasp_intf *intf, u32 mask,
+ u32 reg;
+
+ reg = umac_rl(intf, UMC_CMD);
++ if (reg & UMC_CMD_SW_RESET)
++ return;
+ if (enable)
+ reg |= mask;
+ else
+@@ -429,7 +433,6 @@ static void umac_init(struct bcmasp_intf *intf)
+ umac_wl(intf, 0x800, UMC_FRM_LEN);
+ umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL);
+ umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ);
+- umac_enable_set(intf, UMC_CMD_PROMISC, 1);
+ }
+
+ static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
+@@ -656,6 +659,12 @@ static void bcmasp_adj_link(struct net_device *dev)
+ UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE |
+ UMC_CMD_TX_PAUSE_IGNORE);
+ reg |= cmd_bits;
++ if (reg & UMC_CMD_SW_RESET) {
++ reg &= ~UMC_CMD_SW_RESET;
++ umac_wl(intf, reg, UMC_CMD);
++ udelay(2);
++ reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC;
++ }
+ umac_wl(intf, reg, UMC_CMD);
+
+ intf->eee.eee_active = phy_init_eee(phydev, 0) >= 0;
+@@ -1061,9 +1070,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
+
+ umac_init(intf);
+
+- /* Disable the UniMAC RX/TX */
+- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0);
+-
+ umac_set_hw_addr(intf, dev->dev_addr);
+
+ intf->old_duplex = -1;
+@@ -1083,9 +1089,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
+
+ bcmasp_enable_rx(intf, 1);
+
+- /* Turn on UniMAC TX/RX */
+- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1);
+-
+ intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD);
+
+ bcmasp_netif_start(dev);
+@@ -1321,7 +1324,14 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf)
+ if (intf->wolopts & WAKE_FILTER)
+ bcmasp_netfilt_suspend(intf);
+
+- /* UniMAC receive needs to be turned on */
++ /* Bring UniMAC out of reset if needed and enable RX */
++ reg = umac_rl(intf, UMC_CMD);
++ if (reg & UMC_CMD_SW_RESET)
++ reg &= ~UMC_CMD_SW_RESET;
++
++ reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC;
++ umac_wl(intf, reg, UMC_CMD);
++
+ umac_enable_set(intf, UMC_CMD_RX_EN, 1);
+
+ if (intf->parent->wol_irq > 0) {
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index 54da59286df4e..7ca8cd78d5574 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -2381,8 +2381,6 @@ static int fec_enet_mii_probe(struct net_device *ndev)
+ fep->link = 0;
+ fep->full_duplex = 0;
+
+- phy_dev->mac_managed_pm = true;
+-
+ phy_attached_info(phy_dev);
+
+ return 0;
+@@ -2394,10 +2392,12 @@ static int fec_enet_mii_init(struct platform_device *pdev)
+ struct net_device *ndev = platform_get_drvdata(pdev);
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ bool suppress_preamble = false;
++ struct phy_device *phydev;
+ struct device_node *node;
+ int err = -ENXIO;
+ u32 mii_speed, holdtime;
+ u32 bus_freq;
++ int addr;
+
+ /*
+ * The i.MX28 dual fec interfaces are not equal.
+@@ -2511,6 +2511,13 @@ static int fec_enet_mii_init(struct platform_device *pdev)
+ goto err_out_free_mdiobus;
+ of_node_put(node);
+
++ /* find all the PHY devices on the bus and set mac_managed_pm to true */
++ for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
++ phydev = mdiobus_get_phy(fep->mii_bus, addr);
++ if (phydev)
++ phydev->mac_managed_pm = true;
++ }
++
+ mii_cnt++;
+
+ /* save fec0 mii_bus */
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
+index f3c9395d8351c..618f66d9586b3 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
+@@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle,
+ hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS,
+ true);
+
+- desc.data[0] = cpu_to_le32(tqp->index & 0x1ff);
++ desc.data[0] = cpu_to_le32(tqp->index);
+ ret = hclge_comm_cmd_send(hw, &desc, 1);
+ if (ret) {
+ dev_err(&hw->cmq.csq.pdev->dev,
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+index 682239f33082b..78181eea93c1c 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+@@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = {
+ #define HNS3_NIC_LB_TEST_NO_MEM_ERR 1
+ #define HNS3_NIC_LB_TEST_TX_CNT_ERR 2
+ #define HNS3_NIC_LB_TEST_RX_CNT_ERR 3
++#define HNS3_NIC_LB_TEST_UNEXECUTED 4
++
++static int hns3_get_sset_count(struct net_device *netdev, int stringset);
+
+ static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
+ {
+@@ -418,18 +421,26 @@ static void hns3_do_external_lb(struct net_device *ndev,
+ static void hns3_self_test(struct net_device *ndev,
+ struct ethtool_test *eth_test, u64 *data)
+ {
++ int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST);
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+ int st_param[HNAE3_LOOP_NONE][2];
+ bool if_running = netif_running(ndev);
++ int i;
++
++ /* initialize the loopback test result, avoid marking an unexcuted
++ * loopback test as PASS.
++ */
++ for (i = 0; i < cnt; i++)
++ data[i] = HNS3_NIC_LB_TEST_UNEXECUTED;
+
+ if (hns3_nic_resetting(ndev)) {
+ netdev_err(ndev, "dev resetting!");
+- return;
++ goto failure;
+ }
+
+ if (!(eth_test->flags & ETH_TEST_FL_OFFLINE))
+- return;
++ goto failure;
+
+ if (netif_msg_ifdown(h))
+ netdev_info(ndev, "self test start\n");
+@@ -451,6 +462,10 @@ static void hns3_self_test(struct net_device *ndev,
+
+ if (netif_msg_ifdown(h))
+ netdev_info(ndev, "self test end\n");
++ return;
++
++failure:
++ eth_test->flags |= ETH_TEST_FL_FAILED;
+ }
+
+ static void hns3_update_limit_promisc_mode(struct net_device *netdev,
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index f1ca2cda2961e..dfd0c5f4cb9f5 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -11614,6 +11614,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+ if (ret)
+ goto err_pci_uninit;
+
++ devl_lock(hdev->devlink);
++
+ /* Firmware command queue initialize */
+ ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw);
+ if (ret)
+@@ -11793,6 +11795,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+
+ hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+
++ devl_unlock(hdev->devlink);
+ return 0;
+
+ err_mdiobus_unreg:
+@@ -11805,6 +11808,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+ err_cmd_uninit:
+ hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw);
+ err_devlink_uninit:
++ devl_unlock(hdev->devlink);
+ hclge_devlink_uninit(hdev);
+ err_pci_uninit:
+ pcim_iounmap(pdev, hdev->hw.hw.io_base);
+diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
+index 4542e2bc28e8d..f9328f2e669f8 100644
+--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
++++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
+@@ -5,6 +5,7 @@
+ * Shared functions for accessing and configuring the MAC
+ */
+
++#include <linux/bitfield.h>
+ #include "e1000.h"
+
+ static s32 e1000_check_downshift(struct e1000_hw *hw);
+@@ -3260,8 +3261,7 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw,
+ return ret_val;
+
+ phy_info->mdix_mode =
+- (e1000_auto_x_mode) ((phy_data & IGP01E1000_PSSR_MDIX) >>
+- IGP01E1000_PSSR_MDIX_SHIFT);
++ (e1000_auto_x_mode)FIELD_GET(IGP01E1000_PSSR_MDIX, phy_data);
+
+ if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
+ IGP01E1000_PSSR_SPEED_1000MBPS) {
+@@ -3272,11 +3272,11 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw,
+ if (ret_val)
+ return ret_val;
+
+- phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >>
+- SR_1000T_LOCAL_RX_STATUS_SHIFT) ?
++ phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS,
++ phy_data) ?
+ e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+- phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >>
+- SR_1000T_REMOTE_RX_STATUS_SHIFT) ?
++ phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS,
++ phy_data) ?
+ e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+
+ /* Get cable length */
+@@ -3326,14 +3326,12 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw,
+ return ret_val;
+
+ phy_info->extended_10bt_distance =
+- ((phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >>
+- M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT) ?
++ FIELD_GET(M88E1000_PSCR_10BT_EXT_DIST_ENABLE, phy_data) ?
+ e1000_10bt_ext_dist_enable_lower :
+ e1000_10bt_ext_dist_enable_normal;
+
+ phy_info->polarity_correction =
+- ((phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >>
+- M88E1000_PSCR_POLARITY_REVERSAL_SHIFT) ?
++ FIELD_GET(M88E1000_PSCR_POLARITY_REVERSAL, phy_data) ?
+ e1000_polarity_reversal_disabled : e1000_polarity_reversal_enabled;
+
+ /* Check polarity status */
+@@ -3347,27 +3345,25 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw,
+ return ret_val;
+
+ phy_info->mdix_mode =
+- (e1000_auto_x_mode) ((phy_data & M88E1000_PSSR_MDIX) >>
+- M88E1000_PSSR_MDIX_SHIFT);
++ (e1000_auto_x_mode)FIELD_GET(M88E1000_PSSR_MDIX, phy_data);
+
+ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
+ /* Cable Length Estimation and Local/Remote Receiver Information
+ * are only valid at 1000 Mbps.
+ */
+ phy_info->cable_length =
+- (e1000_cable_length) ((phy_data &
+- M88E1000_PSSR_CABLE_LENGTH) >>
+- M88E1000_PSSR_CABLE_LENGTH_SHIFT);
++ (e1000_cable_length)FIELD_GET(M88E1000_PSSR_CABLE_LENGTH,
++ phy_data);
+
+ ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
+ if (ret_val)
+ return ret_val;
+
+- phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >>
+- SR_1000T_LOCAL_RX_STATUS_SHIFT) ?
++ phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS,
++ phy_data) ?
+ e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+- phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >>
+- SR_1000T_REMOTE_RX_STATUS_SHIFT) ?
++ phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS,
++ phy_data) ?
+ e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+ }
+
+@@ -3515,7 +3511,7 @@ s32 e1000_init_eeprom_params(struct e1000_hw *hw)
+ if (ret_val)
+ return ret_val;
+ eeprom_size =
+- (eeprom_size & EEPROM_SIZE_MASK) >> EEPROM_SIZE_SHIFT;
++ FIELD_GET(EEPROM_SIZE_MASK, eeprom_size);
+ /* 256B eeprom size was not supported in earlier hardware, so we
+ * bump eeprom_size up one to ensure that "1" (which maps to
+ * 256B) is never the result used in the shifting logic below.
+@@ -4891,8 +4887,7 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length,
+ &phy_data);
+ if (ret_val)
+ return ret_val;
+- cable_length = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+- M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++ cable_length = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
+
+ /* Convert the enum value to ranged values */
+ switch (cable_length) {
+@@ -5001,8 +4996,7 @@ static s32 e1000_check_polarity(struct e1000_hw *hw,
+ &phy_data);
+ if (ret_val)
+ return ret_val;
+- *polarity = ((phy_data & M88E1000_PSSR_REV_POLARITY) >>
+- M88E1000_PSSR_REV_POLARITY_SHIFT) ?
++ *polarity = FIELD_GET(M88E1000_PSSR_REV_POLARITY, phy_data) ?
+ e1000_rev_polarity_reversed : e1000_rev_polarity_normal;
+
+ } else if (hw->phy_type == e1000_phy_igp) {
+@@ -5072,8 +5066,8 @@ static s32 e1000_check_downshift(struct e1000_hw *hw)
+ if (ret_val)
+ return ret_val;
+
+- hw->speed_downgraded = (phy_data & M88E1000_PSSR_DOWNSHIFT) >>
+- M88E1000_PSSR_DOWNSHIFT_SHIFT;
++ hw->speed_downgraded = FIELD_GET(M88E1000_PSSR_DOWNSHIFT,
++ phy_data);
+ }
+
+ return E1000_SUCCESS;
+diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+index be9c695dde127..c51fb6bf9c4e0 100644
+--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
++++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+@@ -92,8 +92,7 @@ static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw)
+
+ nvm->type = e1000_nvm_eeprom_spi;
+
+- size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+- E1000_EECD_SIZE_EX_SHIFT);
++ size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd);
+
+ /* Added to a constant, "size" becomes the left-shift value
+ * for setting word_size.
+diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
+index 0b1e890dd583b..969f855a79ee6 100644
+--- a/drivers/net/ethernet/intel/e1000e/82571.c
++++ b/drivers/net/ethernet/intel/e1000e/82571.c
+@@ -157,8 +157,7 @@ static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw)
+ fallthrough;
+ default:
+ nvm->type = e1000_nvm_eeprom_spi;
+- size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+- E1000_EECD_SIZE_EX_SHIFT);
++ size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd);
+ /* Added to a constant, "size" becomes the left-shift value
+ * for setting word_size.
+ */
+diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
+index 9835e6a90d56c..fc0f98ea61332 100644
+--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
++++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
+@@ -654,8 +654,8 @@ static void e1000_get_drvinfo(struct net_device *netdev,
+ */
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d-%d",
+- (adapter->eeprom_vers & 0xF000) >> 12,
+- (adapter->eeprom_vers & 0x0FF0) >> 4,
++ FIELD_GET(0xF000, adapter->eeprom_vers),
++ FIELD_GET(0x0FF0, adapter->eeprom_vers),
+ (adapter->eeprom_vers & 0x000F));
+
+ strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
+@@ -925,8 +925,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
+ }
+
+ if (mac->type >= e1000_pch_lpt)
+- wlock_mac = (er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK) >>
+- E1000_FWSM_WLOCK_MAC_SHIFT;
++ wlock_mac = FIELD_GET(E1000_FWSM_WLOCK_MAC_MASK, er32(FWSM));
+
+ for (i = 0; i < mac->rar_entry_count; i++) {
+ if (mac->type >= e1000_pch_lpt) {
+diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
+index 1fef6bb5a5fbc..4b6e7536170ab 100644
+--- a/drivers/net/ethernet/intel/e1000e/hw.h
++++ b/drivers/net/ethernet/intel/e1000e/hw.h
+@@ -628,6 +628,7 @@ struct e1000_phy_info {
+ u32 id;
+ u32 reset_delay_us; /* in usec */
+ u32 revision;
++ u32 retry_count;
+
+ enum e1000_media_type media_type;
+
+@@ -644,6 +645,7 @@ struct e1000_phy_info {
+ bool polarity_correction;
+ bool speed_downgraded;
+ bool autoneg_wait_to_complete;
++ bool retry_enabled;
+ };
+
+ struct e1000_nvm_info {
+diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
+index 39e9fc601bf5a..4d83c9a0c023a 100644
+--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
++++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
+@@ -222,11 +222,18 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
+ if (hw->mac.type >= e1000_pch_lpt) {
+ /* Only unforce SMBus if ME is not active */
+ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
++ /* Switching PHY interface always returns MDI error
++ * so disable retry mechanism to avoid wasting time
++ */
++ e1000e_disable_phy_retry(hw);
++
+ /* Unforce SMBus mode in PHY */
+ e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
+ phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+ e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
+
++ e1000e_enable_phy_retry(hw);
++
+ /* Unforce SMBus mode in MAC */
+ mac_reg = er32(CTRL_EXT);
+ mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+@@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
+ goto out;
+ }
+
++ /* There is no guarantee that the PHY is accessible at this time
++ * so disable retry mechanism to avoid wasting time
++ */
++ e1000e_disable_phy_retry(hw);
++
+ /* The MAC-PHY interconnect may be in SMBus mode. If the PHY is
+ * inaccessible and resetting the PHY is not blocked, toggle the
+ * LANPHYPC Value bit to force the interconnect to PCIe mode.
+@@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
+ break;
+ }
+
++ e1000e_enable_phy_retry(hw);
++
+ hw->phy.ops.release(hw);
+ if (!ret_val) {
+
+@@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
+
+ phy->id = e1000_phy_unknown;
+
++ if (hw->mac.type == e1000_pch_mtp) {
++ phy->retry_count = 2;
++ e1000e_enable_phy_retry(hw);
++ }
++
+ ret_val = e1000_init_phy_workarounds_pchlan(hw);
+ if (ret_val)
+ return ret_val;
+@@ -1072,13 +1091,11 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
+
+ lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) *
+ (1U << (E1000_LTRV_SCALE_FACTOR *
+- ((lat_enc & E1000_LTRV_SCALE_MASK)
+- >> E1000_LTRV_SCALE_SHIFT)));
++ FIELD_GET(E1000_LTRV_SCALE_MASK, lat_enc)));
+
+ max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) *
+- (1U << (E1000_LTRV_SCALE_FACTOR *
+- ((max_ltr_enc & E1000_LTRV_SCALE_MASK)
+- >> E1000_LTRV_SCALE_SHIFT)));
++ (1U << (E1000_LTRV_SCALE_FACTOR *
++ FIELD_GET(E1000_LTRV_SCALE_MASK, max_ltr_enc)));
+
+ if (lat_enc_d > max_ltr_enc_d)
+ lat_enc = max_ltr_enc;
+@@ -1148,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
+ if (ret_val)
+ goto out;
+
+- /* Force SMBus mode in PHY */
+- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+- if (ret_val)
+- goto release;
+- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
+- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+-
+- /* Force SMBus mode in MAC */
+- mac_reg = er32(CTRL_EXT);
+- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+- ew32(CTRL_EXT, mac_reg);
+-
+ /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
+ * LPLU and disable Gig speed when entering ULP
+ */
+@@ -1315,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
+ /* Toggle LANPHYPC Value bit */
+ e1000_toggle_lanphypc_pch_lpt(hw);
+
++ /* Switching PHY interface always returns MDI error
++ * so disable retry mechanism to avoid wasting time
++ */
++ e1000e_disable_phy_retry(hw);
++
+ /* Unforce SMBus mode in PHY */
+ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+ if (ret_val) {
+@@ -1335,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
+ phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+ e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+
++ e1000e_enable_phy_retry(hw);
++
+ /* Unforce SMBus mode in MAC */
+ mac_reg = er32(CTRL_EXT);
+ mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+@@ -2075,8 +2087,7 @@ static s32 e1000_write_smbus_addr(struct e1000_hw *hw)
+ {
+ u16 phy_data;
+ u32 strap = er32(STRAP);
+- u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >>
+- E1000_STRAP_SMT_FREQ_SHIFT;
++ u32 freq = FIELD_GET(E1000_STRAP_SMT_FREQ_MASK, strap);
+ s32 ret_val;
+
+ strap &= E1000_STRAP_SMBUS_ADDRESS_MASK;
+@@ -2562,8 +2573,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw)
+ hw->phy.ops.write_reg_page(hw, BM_RAR_H(i),
+ (u16)(mac_reg & 0xFFFF));
+ hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i),
+- (u16)((mac_reg & E1000_RAH_AV)
+- >> 16));
++ FIELD_GET(E1000_RAH_AV, mac_reg));
+ }
+
+ e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg);
+@@ -3205,7 +3215,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
+ &nvm_dword);
+ if (ret_val)
+ return ret_val;
+- sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
++ sig_byte = FIELD_GET(0xFF00, nvm_dword);
+ if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+ E1000_ICH_NVM_SIG_VALUE) {
+ *bank = 0;
+@@ -3218,7 +3228,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
+ &nvm_dword);
+ if (ret_val)
+ return ret_val;
+- sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
++ sig_byte = FIELD_GET(0xFF00, nvm_dword);
+ if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+ E1000_ICH_NVM_SIG_VALUE) {
+ *bank = 1;
+diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
+index 5df7ad93f3d77..30515bfb259ea 100644
+--- a/drivers/net/ethernet/intel/e1000e/mac.c
++++ b/drivers/net/ethernet/intel/e1000e/mac.c
+@@ -52,7 +52,7 @@ void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw)
+ * for the device regardless of function swap state.
+ */
+ reg = er32(STATUS);
+- bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
++ bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg);
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
+index f536c856727cb..3692fce201959 100644
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -1788,8 +1788,7 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data)
+ adapter->corr_errors +=
+ pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+ adapter->uncorr_errors +=
+- (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+- E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
++ FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts);
+
+ /* Do the reset outside of interrupt context */
+ schedule_work(&adapter->reset_task);
+@@ -1868,8 +1867,7 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data)
+ adapter->corr_errors +=
+ pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+ adapter->uncorr_errors +=
+- (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+- E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
++ FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts);
+
+ /* Do the reset outside of interrupt context */
+ schedule_work(&adapter->reset_task);
+@@ -5031,8 +5029,7 @@ static void e1000e_update_stats(struct e1000_adapter *adapter)
+ adapter->corr_errors +=
+ pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+ adapter->uncorr_errors +=
+- (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+- E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
++ FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts);
+ }
+ }
+
+@@ -6249,7 +6246,7 @@ static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc)
+ phy_reg |= BM_RCTL_MPE;
+ phy_reg &= ~(BM_RCTL_MO_MASK);
+ if (mac_reg & E1000_RCTL_MO_3)
+- phy_reg |= (((mac_reg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
++ phy_reg |= (FIELD_GET(E1000_RCTL_MO_3, mac_reg)
+ << BM_RCTL_MO_SHIFT);
+ if (mac_reg & E1000_RCTL_BAM)
+ phy_reg |= BM_RCTL_BAM;
+@@ -6626,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ctrl, ctrl_ext, rctl, status, wufc;
+ int retval = 0;
++ u16 smb_ctrl;
+
+ /* Runtime suspend should only enable wakeup for link changes */
+ if (runtime)
+@@ -6691,14 +6689,31 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
+ if (adapter->hw.phy.type == e1000_phy_igp_3) {
+ e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
+ } else if (hw->mac.type >= e1000_pch_lpt) {
+- if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC)))
++ if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) {
+ /* ULP does not support wake from unicast, multicast
+ * or broadcast.
+ */
+ retval = e1000_enable_ulp_lpt_lp(hw, !runtime);
++ if (retval)
++ return retval;
++ }
++
++ /* Force SMBUS to allow WOL */
++ /* Switching PHY interface always returns MDI error
++ * so disable retry mechanism to avoid wasting time
++ */
++ e1000e_disable_phy_retry(hw);
++
++ e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
++ smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
++ e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);
+
+- if (retval)
+- return retval;
++ e1000e_enable_phy_retry(hw);
++
++ /* Force SMBus mode in MAC */
++ ctrl_ext = er32(CTRL_EXT);
++ ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
++ ew32(CTRL_EXT, ctrl_ext);
+ }
+
+ /* Ensure that the appropriate bits are set in LPI_CTRL
+diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
+index 08c3d477dd6f7..395746bcf8f7c 100644
+--- a/drivers/net/ethernet/intel/e1000e/phy.c
++++ b/drivers/net/ethernet/intel/e1000e/phy.c
+@@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
+ return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0);
+ }
+
++void e1000e_disable_phy_retry(struct e1000_hw *hw)
++{
++ hw->phy.retry_enabled = false;
++}
++
++void e1000e_enable_phy_retry(struct e1000_hw *hw)
++{
++ hw->phy.retry_enabled = true;
++}
++
+ /**
+ * e1000e_read_phy_reg_mdic - Read MDI control register
+ * @hw: pointer to the HW structure
+@@ -118,57 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
+ **/
+ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
+ {
++ u32 i, mdic = 0, retry_counter, retry_max;
+ struct e1000_phy_info *phy = &hw->phy;
+- u32 i, mdic = 0;
++ bool success;
+
+ if (offset > MAX_PHY_REG_ADDRESS) {
+ e_dbg("PHY Address %d is out of range\n", offset);
+ return -E1000_ERR_PARAM;
+ }
+
++ retry_max = phy->retry_enabled ? phy->retry_count : 0;
++
+ /* Set up Op-code, Phy Address, and register offset in the MDI
+ * Control register. The MAC will take care of interfacing with the
+ * PHY to retrieve the desired data.
+ */
+- mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+- (phy->addr << E1000_MDIC_PHY_SHIFT) |
+- (E1000_MDIC_OP_READ));
++ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
++ success = true;
+
+- ew32(MDIC, mdic);
++ mdic = ((offset << E1000_MDIC_REG_SHIFT) |
++ (phy->addr << E1000_MDIC_PHY_SHIFT) |
++ (E1000_MDIC_OP_READ));
+
+- /* Poll the ready bit to see if the MDI read completed
+- * Increasing the time out as testing showed failures with
+- * the lower time out
+- */
+- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+- udelay(50);
+- mdic = er32(MDIC);
+- if (mdic & E1000_MDIC_READY)
+- break;
+- }
+- if (!(mdic & E1000_MDIC_READY)) {
+- e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset);
+- return -E1000_ERR_PHY;
+- }
+- if (mdic & E1000_MDIC_ERROR) {
+- e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
+- return -E1000_ERR_PHY;
+- }
+- if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+- e_dbg("MDI Read offset error - requested %d, returned %d\n",
+- offset,
+- (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+- return -E1000_ERR_PHY;
+- }
+- *data = (u16)mdic;
++ ew32(MDIC, mdic);
+
+- /* Allow some time after each MDIC transaction to avoid
+- * reading duplicate data in the next MDIC transaction.
+- */
+- if (hw->mac.type == e1000_pch2lan)
+- udelay(100);
++ /* Poll the ready bit to see if the MDI read completed
++ * Increasing the time out as testing showed failures with
++ * the lower time out
++ */
++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++ usleep_range(50, 60);
++ mdic = er32(MDIC);
++ if (mdic & E1000_MDIC_READY)
++ break;
++ }
++ if (!(mdic & E1000_MDIC_READY)) {
++ e_dbg("MDI Read PHY Reg Address %d did not complete\n",
++ offset);
++ success = false;
++ }
++ if (mdic & E1000_MDIC_ERROR) {
++ e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
++ success = false;
++ }
++ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
++ e_dbg("MDI Read offset error - requested %d, returned %d\n",
++ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
++ success = false;
++ }
+
+- return 0;
++ /* Allow some time after each MDIC transaction to avoid
++ * reading duplicate data in the next MDIC transaction.
++ */
++ if (hw->mac.type == e1000_pch2lan)
++ usleep_range(100, 150);
++
++ if (success) {
++ *data = (u16)mdic;
++ return 0;
++ }
++
++ if (retry_counter != retry_max) {
++ e_dbg("Perform retry on PHY transaction...\n");
++ mdelay(10);
++ }
++ }
++
++ return -E1000_ERR_PHY;
+ }
+
+ /**
+@@ -181,57 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
+ **/
+ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
+ {
++ u32 i, mdic = 0, retry_counter, retry_max;
+ struct e1000_phy_info *phy = &hw->phy;
+- u32 i, mdic = 0;
++ bool success;
+
+ if (offset > MAX_PHY_REG_ADDRESS) {
+ e_dbg("PHY Address %d is out of range\n", offset);
+ return -E1000_ERR_PARAM;
+ }
+
++ retry_max = phy->retry_enabled ? phy->retry_count : 0;
++
+ /* Set up Op-code, Phy Address, and register offset in the MDI
+ * Control register. The MAC will take care of interfacing with the
+ * PHY to retrieve the desired data.
+ */
+- mdic = (((u32)data) |
+- (offset << E1000_MDIC_REG_SHIFT) |
+- (phy->addr << E1000_MDIC_PHY_SHIFT) |
+- (E1000_MDIC_OP_WRITE));
++ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
++ success = true;
+
+- ew32(MDIC, mdic);
++ mdic = (((u32)data) |
++ (offset << E1000_MDIC_REG_SHIFT) |
++ (phy->addr << E1000_MDIC_PHY_SHIFT) |
++ (E1000_MDIC_OP_WRITE));
+
+- /* Poll the ready bit to see if the MDI read completed
+- * Increasing the time out as testing showed failures with
+- * the lower time out
+- */
+- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+- udelay(50);
+- mdic = er32(MDIC);
+- if (mdic & E1000_MDIC_READY)
+- break;
+- }
+- if (!(mdic & E1000_MDIC_READY)) {
+- e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset);
+- return -E1000_ERR_PHY;
+- }
+- if (mdic & E1000_MDIC_ERROR) {
+- e_dbg("MDI Write PHY Red Address %d Error\n", offset);
+- return -E1000_ERR_PHY;
+- }
+- if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+- e_dbg("MDI Write offset error - requested %d, returned %d\n",
+- offset,
+- (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+- return -E1000_ERR_PHY;
+- }
++ ew32(MDIC, mdic);
+
+- /* Allow some time after each MDIC transaction to avoid
+- * reading duplicate data in the next MDIC transaction.
+- */
+- if (hw->mac.type == e1000_pch2lan)
+- udelay(100);
++ /* Poll the ready bit to see if the MDI read completed
++ * Increasing the time out as testing showed failures with
++ * the lower time out
++ */
++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++ usleep_range(50, 60);
++ mdic = er32(MDIC);
++ if (mdic & E1000_MDIC_READY)
++ break;
++ }
++ if (!(mdic & E1000_MDIC_READY)) {
++ e_dbg("MDI Write PHY Reg Address %d did not complete\n",
++ offset);
++ success = false;
++ }
++ if (mdic & E1000_MDIC_ERROR) {
++ e_dbg("MDI Write PHY Reg Address %d Error\n", offset);
++ success = false;
++ }
++ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
++ e_dbg("MDI Write offset error - requested %d, returned %d\n",
++ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
++ success = false;
++ }
+
+- return 0;
++ /* Allow some time after each MDIC transaction to avoid
++ * reading duplicate data in the next MDIC transaction.
++ */
++ if (hw->mac.type == e1000_pch2lan)
++ usleep_range(100, 150);
++
++ if (success)
++ return 0;
++
++ if (retry_counter != retry_max) {
++ e_dbg("Perform retry on PHY transaction...\n");
++ mdelay(10);
++ }
++ }
++
++ return -E1000_ERR_PHY;
+ }
+
+ /**
+@@ -1793,8 +1834,7 @@ s32 e1000e_get_cable_length_m88(struct e1000_hw *hw)
+ if (ret_val)
+ return ret_val;
+
+- index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+- M88E1000_PSSR_CABLE_LENGTH_SHIFT);
++ index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
+
+ if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
+ return -E1000_ERR_PHY;
+@@ -3234,8 +3274,7 @@ s32 e1000_get_cable_length_82577(struct e1000_hw *hw)
+ if (ret_val)
+ return ret_val;
+
+- length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >>
+- I82577_DSTATUS_CABLE_LENGTH_SHIFT);
++ length = FIELD_GET(I82577_DSTATUS_CABLE_LENGTH, phy_data);
+
+ if (length == E1000_CABLE_LENGTH_UNDEFINED)
+ return -E1000_ERR_PHY;
+diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h
+index c48777d095235..049bb325b4b14 100644
+--- a/drivers/net/ethernet/intel/e1000e/phy.h
++++ b/drivers/net/ethernet/intel/e1000e/phy.h
+@@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data);
+ s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data);
+ void e1000_power_up_phy_copper(struct e1000_hw *hw);
+ void e1000_power_down_phy_copper(struct e1000_hw *hw);
++void e1000e_disable_phy_retry(struct e1000_hw *hw);
++void e1000e_enable_phy_retry(struct e1000_hw *hw);
+ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
+ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
+ s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data);
+diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+index af1b0cde36703..aed5e0bf6313e 100644
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2019 Intel Corporation. */
+
++#include <linux/bitfield.h>
+ #include "fm10k_pf.h"
+ #include "fm10k_vf.h"
+
+@@ -1575,8 +1576,7 @@ static s32 fm10k_get_fault_pf(struct fm10k_hw *hw, int type,
+ if (func & FM10K_FAULT_FUNC_PF)
+ fault->func = 0;
+ else
+- fault->func = 1 + ((func & FM10K_FAULT_FUNC_VF_MASK) >>
+- FM10K_FAULT_FUNC_VF_SHIFT);
++ fault->func = 1 + FIELD_GET(FM10K_FAULT_FUNC_VF_MASK, func);
+
+ /* record fault type */
+ fault->type = func & FM10K_FAULT_FUNC_TYPE_MASK;
+diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+index dc8ccd378ec92..7fb1961f29210 100644
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2019 Intel Corporation. */
+
++#include <linux/bitfield.h>
+ #include "fm10k_vf.h"
+
+ /**
+@@ -126,15 +127,14 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
+ hw->mac.max_queues = i;
+
+ /* fetch default VLAN and ITR scale */
+- hw->mac.default_vid = (fm10k_read_reg(hw, FM10K_TXQCTL(0)) &
+- FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT;
++ hw->mac.default_vid = FIELD_GET(FM10K_TXQCTL_VID_MASK,
++ fm10k_read_reg(hw, FM10K_TXQCTL(0)));
+ /* Read the ITR scale from TDLEN. See the definition of
+ * FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is
+ * used here.
+ */
+- hw->mac.itr_scale = (fm10k_read_reg(hw, FM10K_TDLEN(0)) &
+- FM10K_TDLEN_ITR_SCALE_MASK) >>
+- FM10K_TDLEN_ITR_SCALE_SHIFT;
++ hw->mac.itr_scale = FIELD_GET(FM10K_TDLEN_ITR_SCALE_MASK,
++ fm10k_read_reg(hw, FM10K_TDLEN(0)));
+
+ return 0;
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
+index 55bb0b5310d5b..3e6839ac1f0f1 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e.h
++++ b/drivers/net/ethernet/intel/i40e/i40e.h
+@@ -4,47 +4,20 @@
+ #ifndef _I40E_H_
+ #define _I40E_H_
+
+-#include <net/tcp.h>
+-#include <net/udp.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/module.h>
+-#include <linux/pci.h>
+-#include <linux/netdevice.h>
+-#include <linux/ioport.h>
+-#include <linux/iommu.h>
+-#include <linux/slab.h>
+-#include <linux/list.h>
+-#include <linux/hashtable.h>
+-#include <linux/string.h>
+-#include <linux/in.h>
+-#include <linux/ip.h>
+-#include <linux/sctp.h>
+-#include <linux/pkt_sched.h>
+-#include <linux/ipv6.h>
+-#include <net/checksum.h>
+-#include <net/ip6_checksum.h>
+ #include <linux/ethtool.h>
+-#include <linux/if_vlan.h>
+-#include <linux/if_macvlan.h>
+-#include <linux/if_bridge.h>
+-#include <linux/clocksource.h>
+-#include <linux/net_tstamp.h>
++#include <linux/pci.h>
+ #include <linux/ptp_clock_kernel.h>
++#include <linux/types.h>
++#include <linux/avf/virtchnl.h>
++#include <linux/net/intel/i40e_client.h>
+ #include <net/pkt_cls.h>
+-#include <net/pkt_sched.h>
+-#include <net/tc_act/tc_gact.h>
+-#include <net/tc_act/tc_mirred.h>
+ #include <net/udp_tunnel.h>
+-#include <net/xdp_sock.h>
+-#include <linux/bitfield.h>
+-#include "i40e_type.h"
++#include "i40e_dcb.h"
++#include "i40e_debug.h"
++#include "i40e_io.h"
+ #include "i40e_prototype.h"
+-#include <linux/net/intel/i40e_client.h>
+-#include <linux/avf/virtchnl.h>
+-#include "i40e_virtchnl_pf.h"
++#include "i40e_register.h"
+ #include "i40e_txrx.h"
+-#include "i40e_dcb.h"
+
+ /* Useful i40e defaults */
+ #define I40E_MAX_VEB 16
+@@ -108,7 +81,7 @@
+ #define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */
+
+ /* driver state flags */
+-enum i40e_state_t {
++enum i40e_state {
+ __I40E_TESTING,
+ __I40E_CONFIG_BUSY,
+ __I40E_CONFIG_DONE,
+@@ -156,7 +129,7 @@ enum i40e_state_t {
+ BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED)
+
+ /* VSI state flags */
+-enum i40e_vsi_state_t {
++enum i40e_vsi_state {
+ __I40E_VSI_DOWN,
+ __I40E_VSI_NEEDS_RESTART,
+ __I40E_VSI_SYNCING_FILTERS,
+@@ -992,6 +965,7 @@ struct i40e_q_vector {
+ struct rcu_head rcu; /* to avoid race with update stats on free */
+ char name[I40E_INT_NAME_STR_LEN];
+ bool arm_wb_state;
++ bool in_busy_poll;
+ int irq_num; /* IRQ assigned to this q_vector */
+ } ____cacheline_internodealigned_in_smp;
+
+@@ -1321,4 +1295,15 @@ static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
+ return pf->flags & I40E_FLAG_TC_MQPRIO;
+ }
+
++/**
++ * i40e_hw_to_pf - get pf pointer from the hardware structure
++ * @hw: pointer to the device HW structure
++ **/
++static inline struct i40e_pf *i40e_hw_to_pf(struct i40e_hw *hw)
++{
++ return container_of(hw, struct i40e_pf, hw);
++}
++
++struct device *i40e_hw_to_dev(struct i40e_hw *hw);
++
+ #endif /* _I40E_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+index 100eb77b8dfe6..9ce6e633cc2f0 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+@@ -1,9 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+-#include "i40e_type.h"
++#include <linux/delay.h>
++#include "i40e_alloc.h"
+ #include "i40e_register.h"
+-#include "i40e_adminq.h"
+ #include "i40e_prototype.h"
+
+ static void i40e_resume_aq(struct i40e_hw *hw);
+@@ -51,7 +51,6 @@ static int i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
+ int ret_code;
+
+ ret_code = i40e_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
+- i40e_mem_atq_ring,
+ (hw->aq.num_asq_entries *
+ sizeof(struct i40e_aq_desc)),
+ I40E_ADMINQ_DESC_ALIGNMENT);
+@@ -78,7 +77,6 @@ static int i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
+ int ret_code;
+
+ ret_code = i40e_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
+- i40e_mem_arq_ring,
+ (hw->aq.num_arq_entries *
+ sizeof(struct i40e_aq_desc)),
+ I40E_ADMINQ_DESC_ALIGNMENT);
+@@ -136,7 +134,6 @@ static int i40e_alloc_arq_bufs(struct i40e_hw *hw)
+ for (i = 0; i < hw->aq.num_arq_entries; i++) {
+ bi = &hw->aq.arq.r.arq_bi[i];
+ ret_code = i40e_allocate_dma_mem(hw, bi,
+- i40e_mem_arq_buf,
+ hw->aq.arq_buf_size,
+ I40E_ADMINQ_DESC_ALIGNMENT);
+ if (ret_code)
+@@ -198,7 +195,6 @@ static int i40e_alloc_asq_bufs(struct i40e_hw *hw)
+ for (i = 0; i < hw->aq.num_asq_entries; i++) {
+ bi = &hw->aq.asq.r.asq_bi[i];
+ ret_code = i40e_allocate_dma_mem(hw, bi,
+- i40e_mem_asq_buf,
+ hw->aq.asq_buf_size,
+ I40E_ADMINQ_DESC_ALIGNMENT);
+ if (ret_code)
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+index 267f2e0a21ce8..80125bea80a2a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+@@ -4,7 +4,8 @@
+ #ifndef _I40E_ADMINQ_H_
+ #define _I40E_ADMINQ_H_
+
+-#include "i40e_osdep.h"
++#include <linux/mutex.h>
++#include "i40e_alloc.h"
+ #include "i40e_adminq_cmd.h"
+
+ #define I40E_ADMINQ_DESC(R, i) \
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+index 3357d65a906bf..18a1c3b6d72c5 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+@@ -4,6 +4,8 @@
+ #ifndef _I40E_ADMINQ_CMD_H_
+ #define _I40E_ADMINQ_CMD_H_
+
++#include <linux/bits.h>
++
+ /* This header file defines the i40e Admin Queue commands and is shared between
+ * i40e Firmware and Software.
+ *
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+index a6c9a9e343d11..e0dde326255d6 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_alloc.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+@@ -4,25 +4,25 @@
+ #ifndef _I40E_ALLOC_H_
+ #define _I40E_ALLOC_H_
+
++#include <linux/types.h>
++
+ struct i40e_hw;
+
+-/* Memory allocation types */
+-enum i40e_memory_type {
+- i40e_mem_arq_buf = 0, /* ARQ indirect command buffer */
+- i40e_mem_asq_buf = 1,
+- i40e_mem_atq_buf = 2, /* ATQ indirect command buffer */
+- i40e_mem_arq_ring = 3, /* ARQ descriptor ring */
+- i40e_mem_atq_ring = 4, /* ATQ descriptor ring */
+- i40e_mem_pd = 5, /* Page Descriptor */
+- i40e_mem_bp = 6, /* Backing Page - 4KB */
+- i40e_mem_bp_jumbo = 7, /* Backing Page - > 4KB */
+- i40e_mem_reserved
++/* memory allocation tracking */
++struct i40e_dma_mem {
++ void *va;
++ dma_addr_t pa;
++ u32 size;
++};
++
++struct i40e_virt_mem {
++ void *va;
++ u32 size;
+ };
+
+ /* prototype for functions used for dynamic memory allocation */
+ int i40e_allocate_dma_mem(struct i40e_hw *hw,
+ struct i40e_dma_mem *mem,
+- enum i40e_memory_type type,
+ u64 size, u32 alignment);
+ int i40e_free_dma_mem(struct i40e_hw *hw,
+ struct i40e_dma_mem *mem);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
+index 639c5a1ca853b..306758428aefd 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
+@@ -6,7 +6,6 @@
+ #include <linux/net/intel/i40e_client.h>
+
+ #include "i40e.h"
+-#include "i40e_prototype.h"
+
+ static LIST_HEAD(i40e_devices);
+ static DEFINE_MUTEX(i40e_device_mutex);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
+index 1b493854f5229..4d7caa1199719 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
+@@ -1,11 +1,15 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+-#include "i40e.h"
+-#include "i40e_type.h"
+-#include "i40e_adminq.h"
+-#include "i40e_prototype.h"
+ #include <linux/avf/virtchnl.h>
++#include <linux/bitfield.h>
++#include <linux/delay.h>
++#include <linux/etherdevice.h>
++#include <linux/pci.h>
++#include "i40e_adminq_cmd.h"
++#include "i40e_devids.h"
++#include "i40e_prototype.h"
++#include "i40e_register.h"
+
+ /**
+ * i40e_set_mac_type - Sets MAC type
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+index f81e744c0fb36..d57dd30b024fa 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+@@ -1,9 +1,11 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2021 Intel Corporation. */
+
++#include <linux/bitfield.h>
+ #include "i40e_adminq.h"
+-#include "i40e_prototype.h"
++#include "i40e_alloc.h"
+ #include "i40e_dcb.h"
++#include "i40e_prototype.h"
+
+ /**
+ * i40e_get_dcbx_status
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+index 195421d863ab1..077a95dad32cf 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+@@ -2,8 +2,8 @@
+ /* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+ #ifdef CONFIG_I40E_DCB
+-#include "i40e.h"
+ #include <net/dcbnl.h>
++#include "i40e.h"
+
+ #define I40E_DCBNL_STATUS_SUCCESS 0
+ #define I40E_DCBNL_STATUS_ERROR 1
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
+index 0e72abd178ae3..21b3518c40968 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
+@@ -1,9 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
++#include <linux/firmware.h>
+ #include "i40e.h"
+
+-#include <linux/firmware.h>
+
+ /**
+ * i40e_ddp_profiles_eq - checks if DDP profiles are the equivalent
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_debug.h b/drivers/net/ethernet/intel/i40e/i40e_debug.h
+new file mode 100644
+index 0000000000000..27ebc72d8bfe5
+--- /dev/null
++++ b/drivers/net/ethernet/intel/i40e/i40e_debug.h
+@@ -0,0 +1,47 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* Copyright(c) 2023 Intel Corporation. */
++
++#ifndef _I40E_DEBUG_H_
++#define _I40E_DEBUG_H_
++
++#include <linux/dev_printk.h>
++
++/* debug masks - set these bits in hw->debug_mask to control output */
++enum i40e_debug_mask {
++ I40E_DEBUG_INIT = 0x00000001,
++ I40E_DEBUG_RELEASE = 0x00000002,
++
++ I40E_DEBUG_LINK = 0x00000010,
++ I40E_DEBUG_PHY = 0x00000020,
++ I40E_DEBUG_HMC = 0x00000040,
++ I40E_DEBUG_NVM = 0x00000080,
++ I40E_DEBUG_LAN = 0x00000100,
++ I40E_DEBUG_FLOW = 0x00000200,
++ I40E_DEBUG_DCB = 0x00000400,
++ I40E_DEBUG_DIAG = 0x00000800,
++ I40E_DEBUG_FD = 0x00001000,
++ I40E_DEBUG_PACKAGE = 0x00002000,
++ I40E_DEBUG_IWARP = 0x00F00000,
++ I40E_DEBUG_AQ_MESSAGE = 0x01000000,
++ I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000,
++ I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000,
++ I40E_DEBUG_AQ_COMMAND = 0x06000000,
++ I40E_DEBUG_AQ = 0x0F000000,
++
++ I40E_DEBUG_USER = 0xF0000000,
++
++ I40E_DEBUG_ALL = 0xFFFFFFFF
++};
++
++struct i40e_hw;
++struct device *i40e_hw_to_dev(struct i40e_hw *hw);
++
++#define hw_dbg(hw, S, A...) dev_dbg(i40e_hw_to_dev(hw), S, ##A)
++
++#define i40e_debug(h, m, s, ...) \
++do { \
++ if (((m) & (h)->debug_mask)) \
++ dev_info(i40e_hw_to_dev(hw), s, ##__VA_ARGS__); \
++} while (0)
++
++#endif /* _I40E_DEBUG_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+index 1a497cb077100..999c9708def53 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+@@ -5,8 +5,9 @@
+
+ #include <linux/fs.h>
+ #include <linux/debugfs.h>
+-
++#include <linux/if_bridge.h>
+ #include "i40e.h"
++#include "i40e_virtchnl_pf.h"
+
+ static struct dentry *i40e_dbg_root;
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
+index c3ce5f35211f0..ece3a6b9a5c61 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h
+@@ -4,7 +4,10 @@
+ #ifndef _I40E_DIAG_H_
+ #define _I40E_DIAG_H_
+
+-#include "i40e_type.h"
++#include "i40e_adminq_cmd.h"
++
++/* forward-declare the HW struct for the compiler */
++struct i40e_hw;
+
+ enum i40e_lb_mode {
+ I40E_LB_MODE_NONE = 0x0,
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+index bd1321bf7e268..4e90570ba7803 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+@@ -3,9 +3,10 @@
+
+ /* ethtool support for i40e */
+
+-#include "i40e.h"
++#include "i40e_devids.h"
+ #include "i40e_diag.h"
+ #include "i40e_txrx_common.h"
++#include "i40e_virtchnl_pf.h"
+
+ /* ethtool statistics helpers */
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+index 96ee63aca7a10..1742624ca62ed 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+@@ -1,10 +1,8 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+-#include "i40e.h"
+-#include "i40e_osdep.h"
+-#include "i40e_register.h"
+ #include "i40e_alloc.h"
++#include "i40e_debug.h"
+ #include "i40e_hmc.h"
+ #include "i40e_type.h"
+
+@@ -22,7 +20,6 @@ int i40e_add_sd_table_entry(struct i40e_hw *hw,
+ enum i40e_sd_entry_type type,
+ u64 direct_mode_sz)
+ {
+- enum i40e_memory_type mem_type __attribute__((unused));
+ struct i40e_hmc_sd_entry *sd_entry;
+ bool dma_mem_alloc_done = false;
+ struct i40e_dma_mem mem;
+@@ -43,16 +40,13 @@ int i40e_add_sd_table_entry(struct i40e_hw *hw,
+
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+ if (!sd_entry->valid) {
+- if (I40E_SD_TYPE_PAGED == type) {
+- mem_type = i40e_mem_pd;
++ if (type == I40E_SD_TYPE_PAGED)
+ alloc_len = I40E_HMC_PAGED_BP_SIZE;
+- } else {
+- mem_type = i40e_mem_bp_jumbo;
++ else
+ alloc_len = direct_mode_sz;
+- }
+
+ /* allocate a 4K pd page or 2M backing page */
+- ret_code = i40e_allocate_dma_mem(hw, &mem, mem_type, alloc_len,
++ ret_code = i40e_allocate_dma_mem(hw, &mem, alloc_len,
+ I40E_HMC_PD_BP_BUF_ALIGNMENT);
+ if (ret_code)
+ goto exit;
+@@ -140,7 +134,7 @@ int i40e_add_pd_table_entry(struct i40e_hw *hw,
+ page = rsrc_pg;
+ } else {
+ /* allocate a 4K backing page */
+- ret_code = i40e_allocate_dma_mem(hw, page, i40e_mem_bp,
++ ret_code = i40e_allocate_dma_mem(hw, page,
+ I40E_HMC_PAGED_BP_SIZE,
+ I40E_HMC_PD_BP_BUF_ALIGNMENT);
+ if (ret_code)
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+index 9960da07a5732..480e3a883cc7a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+@@ -4,6 +4,10 @@
+ #ifndef _I40E_HMC_H_
+ #define _I40E_HMC_H_
+
++#include "i40e_alloc.h"
++#include "i40e_io.h"
++#include "i40e_register.h"
++
+ #define I40E_HMC_MAX_BP_COUNT 512
+
+ /* forward-declare the HW struct for the compiler */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_io.h b/drivers/net/ethernet/intel/i40e/i40e_io.h
+new file mode 100644
+index 0000000000000..2a2ed9a1d476b
+--- /dev/null
++++ b/drivers/net/ethernet/intel/i40e/i40e_io.h
+@@ -0,0 +1,16 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* Copyright(c) 2023 Intel Corporation. */
++
++#ifndef _I40E_IO_H_
++#define _I40E_IO_H_
++
++/* get readq/writeq support for 32 bit kernels, use the low-first version */
++#include <linux/io-64-nonatomic-lo-hi.h>
++
++#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
++#define rd32(a, reg) readl((a)->hw_addr + (reg))
++
++#define rd64(a, reg) readq((a)->hw_addr + (reg))
++#define i40e_flush(a) readl((a)->hw_addr + I40E_GLGEN_STAT)
++
++#endif /* _I40E_IO_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+index 474365bf06480..beaaf5c309d51 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+@@ -1,13 +1,10 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+-#include "i40e.h"
+-#include "i40e_osdep.h"
+-#include "i40e_register.h"
+-#include "i40e_type.h"
+-#include "i40e_hmc.h"
++#include "i40e_alloc.h"
++#include "i40e_debug.h"
+ #include "i40e_lan_hmc.h"
+-#include "i40e_prototype.h"
++#include "i40e_type.h"
+
+ /* lan specific interface functions */
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+index 9f960404c2b37..305a276953b01 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+@@ -4,6 +4,8 @@
+ #ifndef _I40E_LAN_HMC_H_
+ #define _I40E_LAN_HMC_H_
+
++#include "i40e_hmc.h"
++
+ /* forward-declare the HW struct for the compiler */
+ struct i40e_hw;
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index ae32e83a69902..a21fc92aa2725 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -1,19 +1,22 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+-#include <linux/etherdevice.h>
+-#include <linux/of_net.h>
+-#include <linux/pci.h>
+-#include <linux/bpf.h>
+ #include <generated/utsrelease.h>
+ #include <linux/crash_dump.h>
++#include <linux/if_bridge.h>
++#include <linux/if_macvlan.h>
++#include <linux/module.h>
++#include <net/pkt_cls.h>
++#include <net/xdp_sock_drv.h>
+
+ /* Local includes */
+ #include "i40e.h"
++#include "i40e_devids.h"
+ #include "i40e_diag.h"
++#include "i40e_lan_hmc.h"
++#include "i40e_virtchnl_pf.h"
+ #include "i40e_xsk.h"
+-#include <net/udp_tunnel.h>
+-#include <net/xdp_sock_drv.h>
++
+ /* All i40e tracepoints are defined by the include below, which
+ * must be included exactly once across the whole kernel with
+ * CREATE_TRACE_POINTS defined
+@@ -126,16 +129,27 @@ static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f,
+ }
+
+ /**
+- * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code
++ * i40e_hw_to_dev - get device pointer from the hardware structure
++ * @hw: pointer to the device HW structure
++ **/
++struct device *i40e_hw_to_dev(struct i40e_hw *hw)
++{
++ struct i40e_pf *pf = i40e_hw_to_pf(hw);
++
++ return &pf->pdev->dev;
++}
++
++/**
++ * i40e_allocate_dma_mem - OS specific memory alloc for shared code
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to fill out
+ * @size: size of memory requested
+ * @alignment: what to align the allocation to
+ **/
+-int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
+- u64 size, u32 alignment)
++int i40e_allocate_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem,
++ u64 size, u32 alignment)
+ {
+- struct i40e_pf *pf = (struct i40e_pf *)hw->back;
++ struct i40e_pf *pf = i40e_hw_to_pf(hw);
+
+ mem->size = ALIGN(size, alignment);
+ mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa,
+@@ -147,13 +161,13 @@ int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
+ }
+
+ /**
+- * i40e_free_dma_mem_d - OS specific memory free for shared code
++ * i40e_free_dma_mem - OS specific memory free for shared code
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to free
+ **/
+-int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
++int i40e_free_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem)
+ {
+- struct i40e_pf *pf = (struct i40e_pf *)hw->back;
++ struct i40e_pf *pf = i40e_hw_to_pf(hw);
+
+ dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa);
+ mem->va = NULL;
+@@ -164,13 +178,13 @@ int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
+ }
+
+ /**
+- * i40e_allocate_virt_mem_d - OS specific memory alloc for shared code
++ * i40e_allocate_virt_mem - OS specific memory alloc for shared code
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to fill out
+ * @size: size of memory requested
+ **/
+-int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
+- u32 size)
++int i40e_allocate_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem,
++ u32 size)
+ {
+ mem->size = size;
+ mem->va = kzalloc(size, GFP_KERNEL);
+@@ -182,11 +196,11 @@ int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
+ }
+
+ /**
+- * i40e_free_virt_mem_d - OS specific memory free for shared code
++ * i40e_free_virt_mem - OS specific memory free for shared code
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to free
+ **/
+-int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
++int i40e_free_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem)
+ {
+ /* it's ok to kfree a NULL pointer */
+ kfree(mem->va);
+@@ -1249,8 +1263,11 @@ int i40e_count_filters(struct i40e_vsi *vsi)
+ int bkt;
+ int cnt = 0;
+
+- hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+- ++cnt;
++ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
++ if (f->state == I40E_FILTER_NEW ||
++ f->state == I40E_FILTER_ACTIVE)
++ ++cnt;
++ }
+
+ return cnt;
+ }
+@@ -3905,6 +3922,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
+ q_vector->tx.target_itr >> 1);
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
+
++ /* Set ITR for software interrupts triggered after exiting
++ * busy-loop polling.
++ */
++ wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1),
++ I40E_ITR_20K);
++
+ wr32(hw, I40E_PFINT_RATEN(vector - 1),
+ i40e_intrl_usec_to_reg(vsi->int_rate_limit));
+
+@@ -15644,10 +15667,10 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
+ **/
+ static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw)
+ {
+- struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev;
++ struct i40e_pf *pf = i40e_hw_to_pf(hw);
+
+- hw->subsystem_device_id = pdev->subsystem_device ?
+- pdev->subsystem_device :
++ hw->subsystem_device_id = pf->pdev->subsystem_device ?
++ pf->pdev->subsystem_device :
+ (ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX);
+ }
+
+@@ -15717,7 +15740,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ set_bit(__I40E_DOWN, pf->state);
+
+ hw = &pf->hw;
+- hw->back = pf;
+
+ pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
+ I40E_MAX_CSR_SPACE);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+index 07a46adeab38e..e5aec09d58e27 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+@@ -1,6 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
++#include <linux/bitfield.h>
++#include <linux/delay.h>
++#include "i40e_alloc.h"
+ #include "i40e_prototype.h"
+
+ /**
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+deleted file mode 100644
+index 2bd4de03dafa2..0000000000000
+--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
++++ /dev/null
+@@ -1,59 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+-
+-#ifndef _I40E_OSDEP_H_
+-#define _I40E_OSDEP_H_
+-
+-#include <linux/types.h>
+-#include <linux/if_ether.h>
+-#include <linux/if_vlan.h>
+-#include <linux/tcp.h>
+-#include <linux/pci.h>
+-#include <linux/highuid.h>
+-
+-/* get readq/writeq support for 32 bit kernels, use the low-first version */
+-#include <linux/io-64-nonatomic-lo-hi.h>
+-
+-/* File to be the magic between shared code and
+- * actual OS primitives
+- */
+-
+-#define hw_dbg(hw, S, A...) \
+-do { \
+- dev_dbg(&((struct i40e_pf *)hw->back)->pdev->dev, S, ##A); \
+-} while (0)
+-
+-#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
+-#define rd32(a, reg) readl((a)->hw_addr + (reg))
+-
+-#define rd64(a, reg) readq((a)->hw_addr + (reg))
+-#define i40e_flush(a) readl((a)->hw_addr + I40E_GLGEN_STAT)
+-
+-/* memory allocation tracking */
+-struct i40e_dma_mem {
+- void *va;
+- dma_addr_t pa;
+- u32 size;
+-};
+-
+-#define i40e_allocate_dma_mem(h, m, unused, s, a) \
+- i40e_allocate_dma_mem_d(h, m, s, a)
+-#define i40e_free_dma_mem(h, m) i40e_free_dma_mem_d(h, m)
+-
+-struct i40e_virt_mem {
+- void *va;
+- u32 size;
+-};
+-
+-#define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s)
+-#define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m)
+-
+-#define i40e_debug(h, m, s, ...) \
+-do { \
+- if (((m) & (h)->debug_mask)) \
+- pr_info("i40e %02x:%02x.%x " s, \
+- (h)->bus.bus_id, (h)->bus.device, \
+- (h)->bus.func, ##__VA_ARGS__); \
+-} while (0)
+-
+-#endif /* _I40E_OSDEP_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+index 3eeee224f1fb2..2001fefa0c52d 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+@@ -4,9 +4,9 @@
+ #ifndef _I40E_PROTOTYPE_H_
+ #define _I40E_PROTOTYPE_H_
+
+-#include "i40e_type.h"
+-#include "i40e_alloc.h"
+ #include <linux/avf/virtchnl.h>
++#include "i40e_debug.h"
++#include "i40e_type.h"
+
+ /* Prototypes for shared code functions that are not in
+ * the standard function pointer structures. These are
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+index 8a26811140b47..65c714d0bfffd 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+@@ -1,9 +1,10 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+-#include "i40e.h"
+ #include <linux/ptp_classify.h>
+ #include <linux/posix-clock.h>
++#include "i40e.h"
++#include "i40e_devids.h"
+
+ /* The XL710 timesync is very much like Intel's 82599 design when it comes to
+ * the fundamental clock design. However, the clock operations are much simpler
+@@ -34,7 +35,7 @@ enum i40e_ptp_pin {
+ GPIO_4
+ };
+
+-enum i40e_can_set_pins_t {
++enum i40e_can_set_pins {
+ CANT_DO_PINS = -1,
+ CAN_SET_PINS,
+ CAN_DO_PINS
+@@ -192,7 +193,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw)
+ * return CAN_DO_PINS if pins can be manipulated within a NIC or
+ * return CANT_DO_PINS otherwise.
+ **/
+-static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf)
++static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf)
+ {
+ if (!i40e_is_ptp_pin_dev(&pf->hw)) {
+ dev_warn(&pf->pdev->dev,
+@@ -1070,7 +1071,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf)
+ static int i40e_ptp_set_pins(struct i40e_pf *pf,
+ struct i40e_ptp_pins_settings *pins)
+ {
+- enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf);
++ enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf);
+ int i = 0;
+
+ if (pin_caps == CANT_DO_PINS)
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
+index 7339003aa17cd..989c186824733 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
+@@ -202,7 +202,9 @@
+ #define I40E_GLGEN_MSCA_DEVADD_SHIFT 16
+ #define I40E_GLGEN_MSCA_PHYADD_SHIFT 21
+ #define I40E_GLGEN_MSCA_OPCODE_SHIFT 26
++#define I40E_GLGEN_MSCA_OPCODE_MASK(_i) I40E_MASK(_i, I40E_GLGEN_MSCA_OPCODE_SHIFT)
+ #define I40E_GLGEN_MSCA_STCODE_SHIFT 28
++#define I40E_GLGEN_MSCA_STCODE_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_STCODE_SHIFT)
+ #define I40E_GLGEN_MSCA_MDICMD_SHIFT 30
+ #define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT)
+ #define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31
+@@ -328,8 +330,11 @@
+ #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
+ #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
+ #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
++#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
+ #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
+ #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
++#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
++#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
+ #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
+ #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
+ #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+index 1df2f93388128..c962987d8b51b 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+@@ -1,14 +1,13 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+-#include <linux/prefetch.h>
+ #include <linux/bpf_trace.h>
++#include <linux/prefetch.h>
++#include <linux/sctp.h>
+ #include <net/mpls.h>
+ #include <net/xdp.h>
+-#include "i40e.h"
+-#include "i40e_trace.h"
+-#include "i40e_prototype.h"
+ #include "i40e_txrx_common.h"
++#include "i40e_trace.h"
+ #include "i40e_xsk.h"
+
+ #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
+@@ -2644,7 +2643,22 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
+ return failure ? budget : (int)total_rx_packets;
+ }
+
+-static inline u32 i40e_buildreg_itr(const int type, u16 itr)
++/**
++ * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register
++ * @itr_idx: interrupt throttling index
++ * @interval: interrupt throttling interval value in usecs
++ * @force_swint: force software interrupt
++ *
++ * The function builds a value for I40E_PFINT_DYN_CTLN register that
++ * is used to update interrupt throttling interval for specified ITR index
++ * and optionally enforces a software interrupt. If the @itr_idx is equal
++ * to I40E_ITR_NONE then no interval change is applied and only @force_swint
++ * parameter is taken into account. If the interval change and enforced
++ * software interrupt are not requested then the built value just enables
++ * appropriate vector interrupt.
++ **/
++static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval,
++ bool force_swint)
+ {
+ u32 val;
+
+@@ -2658,23 +2672,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+ * an event in the PBA anyway so we need to rely on the automask
+ * to hold pending events for us until the interrupt is re-enabled
+ *
+- * The itr value is reported in microseconds, and the register
+- * value is recorded in 2 microsecond units. For this reason we
+- * only need to shift by the interval shift - 1 instead of the
+- * full value.
++ * We have to shift the given value as it is reported in microseconds
++ * and the register value is recorded in 2 microsecond units.
+ */
+- itr &= I40E_ITR_MASK;
++ interval >>= 1;
+
++ /* 1. Enable vector interrupt
++ * 2. Update the interval for the specified ITR index
++ * (I40E_ITR_NONE in the register is used to indicate that
++ * no interval update is requested)
++ */
+ val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+- (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
+- (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
++ FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) |
++ FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval);
++
++ /* 3. Enforce software interrupt trigger if requested
++ * (These software interrupts rate is limited by ITR2 that is
++ * set to 20K interrupts per second)
++ */
++ if (force_swint)
++ val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
++ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
++ FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK,
++ I40E_SW_ITR);
+
+ return val;
+ }
+
+-/* a small macro to shorten up some long lines */
+-#define INTREG I40E_PFINT_DYN_CTLN
+-
+ /* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+@@ -2693,8 +2717,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
+ struct i40e_q_vector *q_vector)
+ {
++ enum i40e_dyn_idx itr_idx = I40E_ITR_NONE;
+ struct i40e_hw *hw = &vsi->back->hw;
+- u32 intval;
++ u16 interval = 0;
++ u32 itr_val;
+
+ /* If we don't have MSIX, then we only need to re-enable icr0 */
+ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
+@@ -2716,8 +2742,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
+ */
+ if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+ /* Rx ITR needs to be reduced, this is highest priority */
+- intval = i40e_buildreg_itr(I40E_RX_ITR,
+- q_vector->rx.target_itr);
++ itr_idx = I40E_RX_ITR;
++ interval = q_vector->rx.target_itr;
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+@@ -2726,25 +2752,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
+ /* Tx ITR needs to be reduced, this is second priority
+ * Tx ITR needs to be increased more than Rx, fourth priority
+ */
+- intval = i40e_buildreg_itr(I40E_TX_ITR,
+- q_vector->tx.target_itr);
++ itr_idx = I40E_TX_ITR;
++ interval = q_vector->tx.target_itr;
+ q_vector->tx.current_itr = q_vector->tx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+ /* Rx ITR needs to be increased, third priority */
+- intval = i40e_buildreg_itr(I40E_RX_ITR,
+- q_vector->rx.target_itr);
++ itr_idx = I40E_RX_ITR;
++ interval = q_vector->rx.target_itr;
+ q_vector->rx.current_itr = q_vector->rx.target_itr;
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ } else {
+ /* No ITR update, lowest priority */
+- intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+ if (q_vector->itr_countdown)
+ q_vector->itr_countdown--;
+ }
+
+- if (!test_bit(__I40E_VSI_DOWN, vsi->state))
+- wr32(hw, INTREG(q_vector->reg_idx), intval);
++ /* Do not update interrupt control register if VSI is down */
++ if (test_bit(__I40E_VSI_DOWN, vsi->state))
++ return;
++
++ /* Update ITR interval if necessary and enforce software interrupt
++ * if we are exiting busy poll.
++ */
++ if (q_vector->in_busy_poll) {
++ itr_val = i40e_buildreg_itr(itr_idx, interval, true);
++ q_vector->in_busy_poll = false;
++ } else {
++ itr_val = i40e_buildreg_itr(itr_idx, interval, false);
++ }
++ wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val);
+ }
+
+ /**
+@@ -2859,6 +2896,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
+ */
+ if (likely(napi_complete_done(napi, work_done)))
+ i40e_update_enable_itr(vsi, q_vector);
++ else
++ q_vector->in_busy_poll = true;
+
+ return min(work_done, budget - 1);
+ }
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+index 900b0d9ede9f5..2b1d50873a4d1 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+@@ -5,6 +5,7 @@
+ #define _I40E_TXRX_H_
+
+ #include <net/xdp.h>
++#include "i40e_type.h"
+
+ /* Interrupt Throttling and Rate Limiting Goodies */
+ #define I40E_DEFAULT_IRQ_WORK 256
+@@ -57,7 +58,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl)
+ * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+ * register but instead is a special value meaning "don't update" ITR0/1/2.
+ */
+-enum i40e_dyn_idx_t {
++enum i40e_dyn_idx {
+ I40E_IDX_ITR0 = 0,
+ I40E_IDX_ITR1 = 1,
+ I40E_IDX_ITR2 = 2,
+@@ -67,6 +68,7 @@ enum i40e_dyn_idx_t {
+ /* these are indexes into ITRN registers */
+ #define I40E_RX_ITR I40E_IDX_ITR0
+ #define I40E_TX_ITR I40E_IDX_ITR1
++#define I40E_SW_ITR I40E_IDX_ITR2
+
+ /* Supported RSS offloads */
+ #define I40E_DEFAULT_RSS_HENA ( \
+@@ -305,7 +307,7 @@ struct i40e_rx_queue_stats {
+ u64 page_busy_count;
+ };
+
+-enum i40e_ring_state_t {
++enum i40e_ring_state {
+ __I40E_TX_FDIR_INIT_DONE,
+ __I40E_TX_XPS_INIT_DONE,
+ __I40E_RING_STATE_NBITS /* must be last */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+index 8c5118c8baafb..e26807fd21232 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+@@ -4,6 +4,8 @@
+ #ifndef I40E_TXRX_COMMON_
+ #define I40E_TXRX_COMMON_
+
++#include "i40e.h"
++
+ int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
+ void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+ u64 qword1);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
+index 232131bedc3e7..4092f82bcfb12 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
+@@ -4,12 +4,9 @@
+ #ifndef _I40E_TYPE_H_
+ #define _I40E_TYPE_H_
+
+-#include "i40e_osdep.h"
+-#include "i40e_register.h"
++#include <uapi/linux/if_ether.h>
+ #include "i40e_adminq.h"
+ #include "i40e_hmc.h"
+-#include "i40e_lan_hmc.h"
+-#include "i40e_devids.h"
+
+ /* I40E_MASK is a macro used on 32 bit registers */
+ #define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
+@@ -43,48 +40,14 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *);
+ #define I40E_QTX_CTL_VM_QUEUE 0x1
+ #define I40E_QTX_CTL_PF_QUEUE 0x2
+
+-/* debug masks - set these bits in hw->debug_mask to control output */
+-enum i40e_debug_mask {
+- I40E_DEBUG_INIT = 0x00000001,
+- I40E_DEBUG_RELEASE = 0x00000002,
+-
+- I40E_DEBUG_LINK = 0x00000010,
+- I40E_DEBUG_PHY = 0x00000020,
+- I40E_DEBUG_HMC = 0x00000040,
+- I40E_DEBUG_NVM = 0x00000080,
+- I40E_DEBUG_LAN = 0x00000100,
+- I40E_DEBUG_FLOW = 0x00000200,
+- I40E_DEBUG_DCB = 0x00000400,
+- I40E_DEBUG_DIAG = 0x00000800,
+- I40E_DEBUG_FD = 0x00001000,
+- I40E_DEBUG_PACKAGE = 0x00002000,
+- I40E_DEBUG_IWARP = 0x00F00000,
+- I40E_DEBUG_AQ_MESSAGE = 0x01000000,
+- I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000,
+- I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000,
+- I40E_DEBUG_AQ_COMMAND = 0x06000000,
+- I40E_DEBUG_AQ = 0x0F000000,
+-
+- I40E_DEBUG_USER = 0xF0000000,
+-
+- I40E_DEBUG_ALL = 0xFFFFFFFF
+-};
+-
+-#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_MASK(1, \
+- I40E_GLGEN_MSCA_STCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_MASK(1, \
+- I40E_GLGEN_MSCA_OPCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_MASK(2, \
+- I40E_GLGEN_MSCA_OPCODE_SHIFT)
+-
+-#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_MASK(0, \
+- I40E_GLGEN_MSCA_STCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_MASK(0, \
+- I40E_GLGEN_MSCA_OPCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_MASK(1, \
+- I40E_GLGEN_MSCA_OPCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_MASK(3, \
+- I40E_GLGEN_MSCA_OPCODE_SHIFT)
++#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK
++#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1)
++#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(2)
++
++#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK
++#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_GLGEN_MSCA_OPCODE_MASK(0)
++#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1)
++#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(3)
+
+ #define I40E_PHY_COM_REG_PAGE 0x1E
+ #define I40E_PHY_LED_LINK_MODE_MASK 0xF0
+@@ -525,7 +488,6 @@ struct i40e_dcbx_config {
+ /* Port hardware description */
+ struct i40e_hw {
+ u8 __iomem *hw_addr;
+- void *back;
+
+ /* subsystem structs */
+ struct i40e_phy_info phy;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+index 082c099209995..7d47a05274548 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+@@ -2,6 +2,8 @@
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+ #include "i40e.h"
++#include "i40e_lan_hmc.h"
++#include "i40e_virtchnl_pf.h"
+
+ /*********************notification routines***********************/
+
+@@ -1628,8 +1630,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ {
+ struct i40e_hw *hw = &pf->hw;
+ struct i40e_vf *vf;
+- int i, v;
+ u32 reg;
++ int i;
+
+ /* If we don't have any VFs, then there is nothing to reset */
+ if (!pf->num_alloc_vfs)
+@@ -1640,11 +1642,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ return false;
+
+ /* Begin reset on all VFs at once */
+- for (v = 0; v < pf->num_alloc_vfs; v++) {
+- vf = &pf->vf[v];
++ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+ /* If VF is being reset no need to trigger reset again */
+ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+- i40e_trigger_vf_reset(&pf->vf[v], flr);
++ i40e_trigger_vf_reset(vf, flr);
+ }
+
+ /* HW requires some time to make sure it can flush the FIFO for a VF
+@@ -1653,14 +1654,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ * the VFs using a simple iterator that increments once that VF has
+ * finished resetting.
+ */
+- for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
++ for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) {
+ usleep_range(10000, 20000);
+
+ /* Check each VF in sequence, beginning with the VF to fail
+ * the previous check.
+ */
+- while (v < pf->num_alloc_vfs) {
+- vf = &pf->vf[v];
++ while (vf < &pf->vf[pf->num_alloc_vfs]) {
+ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
+ reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
+ if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
+@@ -1670,7 +1670,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ /* If the current VF has finished resetting, move on
+ * to the next VF in sequence.
+ */
+- v++;
++ ++vf;
+ }
+ }
+
+@@ -1680,39 +1680,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ /* Display a warning if at least one VF didn't manage to reset in
+ * time, but continue on with the operation.
+ */
+- if (v < pf->num_alloc_vfs)
++ if (vf < &pf->vf[pf->num_alloc_vfs])
+ dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
+- pf->vf[v].vf_id);
++ vf->vf_id);
+ usleep_range(10000, 20000);
+
+ /* Begin disabling all the rings associated with VFs, but do not wait
+ * between each VF.
+ */
+- for (v = 0; v < pf->num_alloc_vfs; v++) {
++ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+ /* On initial reset, we don't have any queues to disable */
+- if (pf->vf[v].lan_vsi_idx == 0)
++ if (vf->lan_vsi_idx == 0)
+ continue;
+
+ /* If VF is reset in another thread just continue */
+ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+ continue;
+
+- i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
++ i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]);
+ }
+
+ /* Now that we've notified HW to disable all of the VF rings, wait
+ * until they finish.
+ */
+- for (v = 0; v < pf->num_alloc_vfs; v++) {
++ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+ /* On initial reset, we don't have any queues to disable */
+- if (pf->vf[v].lan_vsi_idx == 0)
++ if (vf->lan_vsi_idx == 0)
+ continue;
+
+ /* If VF is reset in another thread just continue */
+ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+ continue;
+
+- i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
++ i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]);
+ }
+
+ /* Hw may need up to 50ms to finish disabling the RX queues. We
+@@ -1721,12 +1721,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ mdelay(50);
+
+ /* Finish the reset on each VF */
+- for (v = 0; v < pf->num_alloc_vfs; v++) {
++ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+ /* If VF is reset in another thread just continue */
+ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+ continue;
+
+- i40e_cleanup_reset_vf(&pf->vf[v]);
++ i40e_cleanup_reset_vf(vf);
+ }
+
+ i40e_flush(hw);
+@@ -3143,11 +3143,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
+ /* Allow to delete VF primary MAC only if it was not set
+ * administratively by PF or if VF is trusted.
+ */
+- if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
+- i40e_can_vf_change_mac(vf))
+- was_unimac_deleted = true;
+- else
+- continue;
++ if (ether_addr_equal(addr, vf->default_lan_addr.addr)) {
++ if (i40e_can_vf_change_mac(vf))
++ was_unimac_deleted = true;
++ else
++ continue;
++ }
+
+ if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
+ ret = -EINVAL;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+index cf190762421cc..66f95e2f3146a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+@@ -4,7 +4,9 @@
+ #ifndef _I40E_VIRTCHNL_PF_H_
+ #define _I40E_VIRTCHNL_PF_H_
+
+-#include "i40e.h"
++#include <linux/avf/virtchnl.h>
++#include <linux/netdevice.h>
++#include "i40e_type.h"
+
+ #define I40E_MAX_VLANID 4095
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+index 1f8ae6f5d9807..65f38a57b3dfe 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+@@ -2,11 +2,7 @@
+ /* Copyright(c) 2018 Intel Corporation. */
+
+ #include <linux/bpf_trace.h>
+-#include <linux/stringify.h>
+ #include <net/xdp_sock_drv.h>
+-#include <net/xdp.h>
+-
+-#include "i40e.h"
+ #include "i40e_txrx_common.h"
+ #include "i40e_xsk.h"
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+index 821df248f8bee..ef156fad52f26 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+@@ -4,6 +4,8 @@
+ #ifndef _I40E_XSK_H_
+ #define _I40E_XSK_H_
+
++#include <linux/types.h>
++
+ /* This value should match the pragma in the loop_unrolled_for
+ * macro. Why 4? It is strictly empirical. It seems to be a good
+ * compromise between the advantage of having simultaneous outstanding
+@@ -20,7 +22,9 @@
+ #define loop_unrolled_for for
+ #endif
+
++struct i40e_ring;
+ struct i40e_vsi;
++struct net_device;
+ struct xsk_buff_pool;
+
+ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c
+index 1afd761d80520..f7988cf5efa58 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_common.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_common.c
+@@ -1,10 +1,11 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
++#include <linux/avf/virtchnl.h>
++#include <linux/bitfield.h>
+ #include "iavf_type.h"
+ #include "iavf_adminq.h"
+ #include "iavf_prototype.h"
+-#include <linux/avf/virtchnl.h>
+
+ /**
+ * iavf_set_mac_type - Sets MAC type
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 892c6a4f03bb8..1ac97bd606e38 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -1,11 +1,12 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
++#include <linux/bitfield.h>
++#include <linux/uaccess.h>
++
+ /* ethtool support for iavf */
+ #include "iavf.h"
+
+-#include <linux/uaccess.h>
+-
+ /* ethtool statistics helpers */
+
+ /**
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+index 03e774bd2a5b4..65ddcd81c993e 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+@@ -3,6 +3,7 @@
+
+ /* flow director ethtool support for iavf */
+
++#include <linux/bitfield.h>
+ #include "iavf.h"
+
+ #define GTPU_PORT 2152
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+index 8c5f6096b0022..f998ecf743c46 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+
++#include <linux/bitfield.h>
+ #include <linux/prefetch.h>
+
+ #include "iavf.h"
+diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+index 45f3e351653db..72ca2199c9572 100644
+--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
++++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+@@ -592,8 +592,9 @@ struct ice_aqc_recipe_data_elem {
+ struct ice_aqc_recipe_to_profile {
+ __le16 profile_id;
+ u8 rsvd[6];
+- DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES);
++ __le64 recipe_assoc;
+ };
++static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16);
+
+ /* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
+ */
+diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
+index 23e197c3d02a7..4e675c7c199fa 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lag.c
++++ b/drivers/net/ethernet/intel/ice/ice_lag.c
+@@ -2000,14 +2000,14 @@ int ice_init_lag(struct ice_pf *pf)
+ /* associate recipes to profiles */
+ for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
+ err = ice_aq_get_recipe_to_profile(&pf->hw, n,
+- (u8 *)&recipe_bits, NULL);
++ &recipe_bits, NULL);
+ if (err)
+ continue;
+
+ if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) {
+ recipe_bits |= BIT(lag->pf_recipe);
+ ice_aq_map_recipe_to_profile(&pf->hw, n,
+- (u8 *)&recipe_bits, NULL);
++ recipe_bits, NULL);
+ }
+ }
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index 7f4bc110ead44..2004120a58acd 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -3084,27 +3084,26 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
+ }
+
+ /**
+- * ice_vsi_realloc_stat_arrays - Frees unused stat structures
++ * ice_vsi_realloc_stat_arrays - Frees unused stat structures or alloc new ones
+ * @vsi: VSI pointer
+- * @prev_txq: Number of Tx rings before ring reallocation
+- * @prev_rxq: Number of Rx rings before ring reallocation
+ */
+-static void
+-ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
++static int
++ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi)
+ {
++ u16 req_txq = vsi->req_txq ? vsi->req_txq : vsi->alloc_txq;
++ u16 req_rxq = vsi->req_rxq ? vsi->req_rxq : vsi->alloc_rxq;
++ struct ice_ring_stats **tx_ring_stats;
++ struct ice_ring_stats **rx_ring_stats;
+ struct ice_vsi_stats *vsi_stat;
+ struct ice_pf *pf = vsi->back;
++ u16 prev_txq = vsi->alloc_txq;
++ u16 prev_rxq = vsi->alloc_rxq;
+ int i;
+
+- if (!prev_txq || !prev_rxq)
+- return;
+- if (vsi->type == ICE_VSI_CHNL)
+- return;
+-
+ vsi_stat = pf->vsi_stats[vsi->idx];
+
+- if (vsi->num_txq < prev_txq) {
+- for (i = vsi->num_txq; i < prev_txq; i++) {
++ if (req_txq < prev_txq) {
++ for (i = req_txq; i < prev_txq; i++) {
+ if (vsi_stat->tx_ring_stats[i]) {
+ kfree_rcu(vsi_stat->tx_ring_stats[i], rcu);
+ WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL);
+@@ -3112,14 +3111,36 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
+ }
+ }
+
+- if (vsi->num_rxq < prev_rxq) {
+- for (i = vsi->num_rxq; i < prev_rxq; i++) {
++ tx_ring_stats = vsi_stat->tx_ring_stats;
++ vsi_stat->tx_ring_stats =
++ krealloc_array(vsi_stat->tx_ring_stats, req_txq,
++ sizeof(*vsi_stat->tx_ring_stats),
++ GFP_KERNEL | __GFP_ZERO);
++ if (!vsi_stat->tx_ring_stats) {
++ vsi_stat->tx_ring_stats = tx_ring_stats;
++ return -ENOMEM;
++ }
++
++ if (req_rxq < prev_rxq) {
++ for (i = req_rxq; i < prev_rxq; i++) {
+ if (vsi_stat->rx_ring_stats[i]) {
+ kfree_rcu(vsi_stat->rx_ring_stats[i], rcu);
+ WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL);
+ }
+ }
+ }
++
++ rx_ring_stats = vsi_stat->rx_ring_stats;
++ vsi_stat->rx_ring_stats =
++ krealloc_array(vsi_stat->rx_ring_stats, req_rxq,
++ sizeof(*vsi_stat->rx_ring_stats),
++ GFP_KERNEL | __GFP_ZERO);
++ if (!vsi_stat->rx_ring_stats) {
++ vsi_stat->rx_ring_stats = rx_ring_stats;
++ return -ENOMEM;
++ }
++
++ return 0;
+ }
+
+ /**
+@@ -3136,9 +3157,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+ {
+ struct ice_vsi_cfg_params params = {};
+ struct ice_coalesce_stored *coalesce;
+- int ret, prev_txq, prev_rxq;
+- int prev_num_q_vectors = 0;
++ int prev_num_q_vectors;
+ struct ice_pf *pf;
++ int ret;
+
+ if (!vsi)
+ return -EINVAL;
+@@ -3150,6 +3171,15 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+ if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
+ return -EINVAL;
+
++ ret = ice_vsi_realloc_stat_arrays(vsi);
++ if (ret)
++ goto err_vsi_cfg;
++
++ ice_vsi_decfg(vsi);
++ ret = ice_vsi_cfg_def(vsi, &params);
++ if (ret)
++ goto err_vsi_cfg;
++
+ coalesce = kcalloc(vsi->num_q_vectors,
+ sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+ if (!coalesce)
+@@ -3157,14 +3187,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+
+ prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
+
+- prev_txq = vsi->num_txq;
+- prev_rxq = vsi->num_rxq;
+-
+- ice_vsi_decfg(vsi);
+- ret = ice_vsi_cfg_def(vsi, &params);
+- if (ret)
+- goto err_vsi_cfg;
+-
+ ret = ice_vsi_cfg_tc_lan(pf, vsi);
+ if (ret) {
+ if (vsi_flags & ICE_VSI_FLAG_INIT) {
+@@ -3176,8 +3198,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+ return ice_schedule_reset(pf, ICE_RESET_PFR);
+ }
+
+- ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq);
+-
+ ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
+ kfree(coalesce);
+
+@@ -3185,8 +3205,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+
+ err_vsi_cfg_tc_lan:
+ ice_vsi_decfg(vsi);
+-err_vsi_cfg:
+ kfree(coalesce);
++err_vsi_cfg:
+ return ret;
+ }
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index 2f77b684ff765..4c6d58bb2690d 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -2032,12 +2032,12 @@ ice_update_recipe_lkup_idx(struct ice_hw *hw,
+ * ice_aq_map_recipe_to_profile - Map recipe to packet profile
+ * @hw: pointer to the HW struct
+ * @profile_id: package profile ID to associate the recipe with
+- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
++ * @r_assoc: Recipe bitmap filled in and need to be returned as response
+ * @cd: pointer to command details structure or NULL
+ * Recipe to profile association (0x0291)
+ */
+ int
+-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
++ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
+ struct ice_sq_cd *cd)
+ {
+ struct ice_aqc_recipe_to_profile *cmd;
+@@ -2049,7 +2049,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ /* Set the recipe ID bit in the bitmask to let the device know which
+ * profile we are associating the recipe to
+ */
+- memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc));
++ cmd->recipe_assoc = cpu_to_le64(r_assoc);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ }
+@@ -2058,12 +2058,12 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ * ice_aq_get_recipe_to_profile - Map recipe to packet profile
+ * @hw: pointer to the HW struct
+ * @profile_id: package profile ID to associate the recipe with
+- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
++ * @r_assoc: Recipe bitmap filled in and need to be returned as response
+ * @cd: pointer to command details structure or NULL
+ * Associate profile ID with given recipe (0x0293)
+ */
+ int
+-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
++ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
+ struct ice_sq_cd *cd)
+ {
+ struct ice_aqc_recipe_to_profile *cmd;
+@@ -2076,7 +2076,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ if (!status)
+- memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc));
++ *r_assoc = le64_to_cpu(cmd->recipe_assoc);
+
+ return status;
+ }
+@@ -2121,6 +2121,7 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid)
+ static void ice_get_recp_to_prof_map(struct ice_hw *hw)
+ {
+ DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
++ u64 recp_assoc;
+ u16 i;
+
+ for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) {
+@@ -2128,8 +2129,9 @@ static void ice_get_recp_to_prof_map(struct ice_hw *hw)
+
+ bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES);
+ bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES);
+- if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL))
++ if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL))
+ continue;
++ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
+ bitmap_copy(profile_to_recipe[i], r_bitmap,
+ ICE_MAX_NUM_RECIPES);
+ for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES)
+@@ -5431,22 +5433,24 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+ */
+ list_for_each_entry(fvit, &rm->fv_list, list_entry) {
+ DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
++ u64 recp_assoc;
+ u16 j;
+
+ status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id,
+- (u8 *)r_bitmap, NULL);
++ &recp_assoc, NULL);
+ if (status)
+ goto err_unroll;
+
++ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
+ bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap,
+ ICE_MAX_NUM_RECIPES);
+ status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+ if (status)
+ goto err_unroll;
+
++ bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES);
+ status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id,
+- (u8 *)r_bitmap,
+- NULL);
++ recp_assoc, NULL);
+ ice_release_change_lock(hw);
+
+ if (status)
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
+index db7e501b7e0a4..89ffa1b51b5ad 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.h
++++ b/drivers/net/ethernet/intel/ice/ice_switch.h
+@@ -424,10 +424,10 @@ int ice_aq_add_recipe(struct ice_hw *hw,
+ struct ice_aqc_recipe_data_elem *s_recipe_list,
+ u16 num_recipes, struct ice_sq_cd *cd);
+ int
+-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
++ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
+ struct ice_sq_cd *cd);
+ int
+-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
++ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
+ struct ice_sq_cd *cd);
+
+ #endif /* _ICE_SWITCH_H_ */
+diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+index 80dc4bcdd3a41..b3e1bdcb80f84 100644
+--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
++++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+@@ -26,24 +26,22 @@ static void ice_port_vlan_on(struct ice_vsi *vsi)
+ struct ice_vsi_vlan_ops *vlan_ops;
+ struct ice_pf *pf = vsi->back;
+
+- if (ice_is_dvm_ena(&pf->hw)) {
+- vlan_ops = &vsi->outer_vlan_ops;
+-
+- /* setup outer VLAN ops */
+- vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+- vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
++ /* setup inner VLAN ops */
++ vlan_ops = &vsi->inner_vlan_ops;
+
+- /* setup inner VLAN ops */
+- vlan_ops = &vsi->inner_vlan_ops;
++ if (ice_is_dvm_ena(&pf->hw)) {
+ vlan_ops->add_vlan = noop_vlan_arg;
+ vlan_ops->del_vlan = noop_vlan_arg;
+ vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+- } else {
+- vlan_ops = &vsi->inner_vlan_ops;
+
++ /* setup outer VLAN ops */
++ vlan_ops = &vsi->outer_vlan_ops;
++ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
++ vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
++ } else {
+ vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
+ vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
+ }
+diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
+index 8d6e44ee1895a..64dfc362d1dc4 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
++++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
+@@ -222,8 +222,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
+ }
+
+ /* set lan id */
+- hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >>
+- E1000_STATUS_FUNC_SHIFT;
++ hw->bus.func = FIELD_GET(E1000_STATUS_FUNC_MASK, rd32(E1000_STATUS));
+
+ /* Set phy->phy_addr and phy->id. */
+ ret_val = igb_get_phy_id_82575(hw);
+@@ -262,8 +261,8 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
+ if (ret_val)
+ goto out;
+
+- data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >>
+- E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT;
++ data = FIELD_GET(E1000_M88E1112_MAC_CTRL_1_MODE_MASK,
++ data);
+ if (data == E1000_M88E1112_AUTO_COPPER_SGMII ||
+ data == E1000_M88E1112_AUTO_COPPER_BASEX)
+ hw->mac.ops.check_for_link =
+@@ -330,8 +329,7 @@ static s32 igb_init_nvm_params_82575(struct e1000_hw *hw)
+ u32 eecd = rd32(E1000_EECD);
+ u16 size;
+
+- size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+- E1000_EECD_SIZE_EX_SHIFT);
++ size = FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd);
+
+ /* Added to a constant, "size" becomes the left-shift value
+ * for setting word_size.
+@@ -2798,7 +2796,7 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw)
+ return 0;
+
+ hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
+- if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
++ if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg)
+ != NVM_ETS_TYPE_EMC)
+ return E1000_NOT_IMPLEMENTED;
+
+@@ -2808,10 +2806,8 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw)
+
+ for (i = 1; i < num_sensors; i++) {
+ hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
+- sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+- NVM_ETS_DATA_INDEX_SHIFT);
+- sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+- NVM_ETS_DATA_LOC_SHIFT);
++ sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor);
++ sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor);
+
+ if (sensor_location != 0)
+ hw->phy.ops.read_i2c_byte(hw,
+@@ -2859,20 +2855,17 @@ static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw)
+ return 0;
+
+ hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
+- if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
++ if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg)
+ != NVM_ETS_TYPE_EMC)
+ return E1000_NOT_IMPLEMENTED;
+
+- low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >>
+- NVM_ETS_LTHRES_DELTA_SHIFT);
++ low_thresh_delta = FIELD_GET(NVM_ETS_LTHRES_DELTA_MASK, ets_cfg);
+ num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
+
+ for (i = 1; i <= num_sensors; i++) {
+ hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
+- sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+- NVM_ETS_DATA_INDEX_SHIFT);
+- sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+- NVM_ETS_DATA_LOC_SHIFT);
++ sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor);
++ sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor);
+ therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK;
+
+ hw->phy.ops.write_i2c_byte(hw,
+diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
+index b9b9d35494d27..503b239868e8e 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
++++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
+@@ -5,9 +5,9 @@
+ * e1000_i211
+ */
+
+-#include <linux/types.h>
++#include <linux/bitfield.h>
+ #include <linux/if_ether.h>
+-
++#include <linux/types.h>
+ #include "e1000_hw.h"
+ #include "e1000_i210.h"
+
+@@ -473,7 +473,7 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
+ /* Check if we have second version location used */
+ else if ((i == 1) &&
+ ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) {
+- version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
++ version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record);
+ status = 0;
+ break;
+ }
+@@ -483,8 +483,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
+ else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) &&
+ ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) &&
+ (i != 1))) {
+- version = (*next_record & E1000_INVM_VER_FIELD_TWO)
+- >> 13;
++ version = FIELD_GET(E1000_INVM_VER_FIELD_TWO,
++ *next_record);
+ status = 0;
+ break;
+ }
+@@ -493,15 +493,15 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
+ */
+ else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) &&
+ ((*record & 0x3) == 0)) {
+- version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
++ version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record);
+ status = 0;
+ break;
+ }
+ }
+
+ if (!status) {
+- invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK)
+- >> E1000_INVM_MAJOR_SHIFT;
++ invm_ver->invm_major = FIELD_GET(E1000_INVM_MAJOR_MASK,
++ version);
+ invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK;
+ }
+ /* Read Image Type */
+@@ -520,7 +520,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
+ ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) ||
+ ((((*record & 0x3) != 0) && (i != 1)))) {
+ invm_ver->invm_img_type =
+- (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23;
++ FIELD_GET(E1000_INVM_IMGTYPE_FIELD,
++ *next_record);
+ status = 0;
+ break;
+ }
+diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
+index caf91c6f52b4d..ceaec2cf08a43 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
++++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
+@@ -56,7 +56,7 @@ s32 igb_get_bus_info_pcie(struct e1000_hw *hw)
+ }
+
+ reg = rd32(E1000_STATUS);
+- bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
++ bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg);
+
+ return 0;
+ }
+diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c
+index fa136e6e93285..2dcd64d6dec31 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_nvm.c
++++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c
+@@ -1,9 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+-#include <linux/if_ether.h>
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+-
++#include <linux/if_ether.h>
+ #include "e1000_mac.h"
+ #include "e1000_nvm.h"
+
+@@ -708,10 +708,10 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
+ */
+ if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) {
+ hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+- fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+- >> NVM_MAJOR_SHIFT;
+- fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK)
+- >> NVM_MINOR_SHIFT;
++ fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK,
++ fw_version);
++ fw_vers->eep_minor = FIELD_GET(NVM_MINOR_MASK,
++ fw_version);
+ fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK);
+ goto etrack_id;
+ }
+@@ -753,15 +753,13 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
+ return;
+ }
+ hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+- fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+- >> NVM_MAJOR_SHIFT;
++ fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK, fw_version);
+
+ /* check for old style version format in newer images*/
+ if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) {
+ eeprom_verl = (fw_version & NVM_COMB_VER_MASK);
+ } else {
+- eeprom_verl = (fw_version & NVM_MINOR_MASK)
+- >> NVM_MINOR_SHIFT;
++ eeprom_verl = FIELD_GET(NVM_MINOR_MASK, fw_version);
+ }
+ /* Convert minor value to hex before assigning to output struct
+ * Val to be converted will not be higher than 99, per tool output
+diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
+index a018000f7db92..bed94e50a6693 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
++++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
+@@ -1,9 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+-#include <linux/if_ether.h>
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+-
++#include <linux/if_ether.h>
+ #include "e1000_mac.h"
+ #include "e1000_phy.h"
+
+@@ -1682,8 +1682,7 @@ s32 igb_get_cable_length_m88(struct e1000_hw *hw)
+ if (ret_val)
+ goto out;
+
+- index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+- M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++ index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
+ if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) {
+ ret_val = -E1000_ERR_PHY;
+ goto out;
+@@ -1796,8 +1795,7 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
+ if (ret_val)
+ goto out;
+
+- index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+- M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++ index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
+ if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) {
+ ret_val = -E1000_ERR_PHY;
+ goto out;
+@@ -2578,8 +2576,7 @@ s32 igb_get_cable_length_82580(struct e1000_hw *hw)
+ if (ret_val)
+ goto out;
+
+- length = (phy_data & I82580_DSTATUS_CABLE_LENGTH) >>
+- I82580_DSTATUS_CABLE_LENGTH_SHIFT;
++ length = FIELD_GET(I82580_DSTATUS_CABLE_LENGTH, phy_data);
+
+ if (length == E1000_CABLE_LENGTH_UNDEFINED)
+ ret_val = -E1000_ERR_PHY;
+diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
+index 4ee849985e2b8..92b2be06a6e93 100644
+--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
++++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
+@@ -2434,7 +2434,7 @@ static int igb_get_ts_info(struct net_device *dev,
+ }
+ }
+
+-#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
++#define ETHER_TYPE_FULL_MASK cpu_to_be16(FIELD_MAX(U16_MAX))
+ static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter,
+ struct ethtool_rxnfc *cmd)
+ {
+@@ -2733,8 +2733,8 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter,
+ u32 vlapqf;
+
+ vlapqf = rd32(E1000_VLAPQF);
+- vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK)
+- >> VLAN_PRIO_SHIFT;
++ vlan_priority = FIELD_GET(VLAN_PRIO_MASK,
++ ntohs(input->filter.vlan_tci));
+ queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK;
+
+ /* check whether this vlan prio is already set */
+@@ -2817,7 +2817,7 @@ static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter,
+ u8 vlan_priority;
+ u32 vlapqf;
+
+- vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
++ vlan_priority = FIELD_GET(VLAN_PRIO_MASK, vlan_tci);
+
+ vlapqf = rd32(E1000_VLAPQF);
+ vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority);
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index 11921141b6079..4431e7693d45f 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -7283,7 +7283,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+ static int igb_set_vf_multicasts(struct igb_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+ {
+- int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
++ int n = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]);
+ u16 *hash_list = (u16 *)&msgbuf[1];
+ struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+ int i;
+@@ -7543,7 +7543,7 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf,
+
+ static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+ {
+- int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
++ int add = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]);
+ int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
+ int ret;
+
+diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c
+index a3cd7ac48d4b6..d15282ee5ea8f 100644
+--- a/drivers/net/ethernet/intel/igbvf/mbx.c
++++ b/drivers/net/ethernet/intel/igbvf/mbx.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2009 - 2018 Intel Corporation. */
+
++#include <linux/bitfield.h>
+ #include "mbx.h"
+
+ /**
+diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
+index 7ff2752dd763a..c5012fa36af2f 100644
+--- a/drivers/net/ethernet/intel/igbvf/netdev.c
++++ b/drivers/net/ethernet/intel/igbvf/netdev.c
+@@ -3,25 +3,25 @@
+
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+-#include <linux/module.h>
+-#include <linux/types.h>
+-#include <linux/init.h>
+-#include <linux/pci.h>
+-#include <linux/vmalloc.h>
+-#include <linux/pagemap.h>
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+-#include <linux/netdevice.h>
+-#include <linux/tcp.h>
+-#include <linux/ipv6.h>
+-#include <linux/slab.h>
+-#include <net/checksum.h>
+-#include <net/ip6_checksum.h>
+-#include <linux/mii.h>
+ #include <linux/ethtool.h>
+ #include <linux/if_vlan.h>
++#include <linux/init.h>
++#include <linux/ipv6.h>
++#include <linux/mii.h>
++#include <linux/module.h>
++#include <linux/netdevice.h>
++#include <linux/pagemap.h>
++#include <linux/pci.h>
+ #include <linux/prefetch.h>
+ #include <linux/sctp.h>
+-
++#include <linux/slab.h>
++#include <linux/tcp.h>
++#include <linux/types.h>
++#include <linux/vmalloc.h>
++#include <net/checksum.h>
++#include <net/ip6_checksum.h>
+ #include "igbvf.h"
+
+ char igbvf_driver_name[] = "igbvf";
+@@ -273,9 +273,8 @@ static bool igbvf_clean_rx_irq(struct igbvf_adapter *adapter,
+ * that case, it fills the header buffer and spills the rest
+ * into the page.
+ */
+- hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info)
+- & E1000_RXDADV_HDRBUFLEN_MASK) >>
+- E1000_RXDADV_HDRBUFLEN_SHIFT;
++ hlen = le16_get_bits(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info,
++ E1000_RXDADV_HDRBUFLEN_MASK);
+ if (hlen > adapter->rx_ps_hdr_size)
+ hlen = adapter->rx_ps_hdr_size;
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
+index 17546a035ab19..d2562c8e8015e 100644
+--- a/drivers/net/ethernet/intel/igc/igc_i225.c
++++ b/drivers/net/ethernet/intel/igc/igc_i225.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright (c) 2018 Intel Corporation */
+
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+
+ #include "igc_hw.h"
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index fc1de116d5548..e83700ad7e622 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -1640,10 +1640,6 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+
+ if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
+ skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+- /* FIXME: add support for retrieving timestamps from
+- * the other timer registers before skipping the
+- * timestamping request.
+- */
+ unsigned long flags;
+ u32 tstamp_flags;
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
+index 53b77c969c857..d0d9e7170154c 100644
+--- a/drivers/net/ethernet/intel/igc/igc_phy.c
++++ b/drivers/net/ethernet/intel/igc/igc_phy.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright (c) 2018 Intel Corporation */
+
++#include <linux/bitfield.h>
+ #include "igc_phy.h"
+
+ /**
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+index b2a0f2aaa05be..2e6e0365154a1 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+@@ -684,7 +684,7 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
+ u32 reg;
+
+ reg = IXGBE_READ_REG(hw, IXGBE_STATUS);
+- bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT;
++ bus->func = FIELD_GET(IXGBE_STATUS_LAN_ID, reg);
+ bus->lan_id = bus->func;
+
+ /* check for a port swap */
+@@ -695,8 +695,8 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
+ /* Get MAC instance from EEPROM for configuring CS4227 */
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP) {
+ hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_4, &ee_ctrl_4);
+- bus->instance_id = (ee_ctrl_4 & IXGBE_EE_CTRL_4_INST_ID) >>
+- IXGBE_EE_CTRL_4_INST_ID_SHIFT;
++ bus->instance_id = FIELD_GET(IXGBE_EE_CTRL_4_INST_ID,
++ ee_ctrl_4);
+ }
+ }
+
+@@ -870,10 +870,9 @@ s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
+ * SPI EEPROM is assumed here. This code would need to
+ * change if a future EEPROM is not SPI.
+ */
+- eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+- IXGBE_EEC_SIZE_SHIFT);
++ eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec);
+ eeprom->word_size = BIT(eeprom_size +
+- IXGBE_EEPROM_WORD_SIZE_SHIFT);
++ IXGBE_EEPROM_WORD_SIZE_SHIFT);
+ }
+
+ if (eec & IXGBE_EEC_ADDR_SIZE)
+@@ -3935,10 +3934,10 @@ s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
+ if (status)
+ return status;
+
+- sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+- IXGBE_ETS_DATA_INDEX_SHIFT);
+- sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+- IXGBE_ETS_DATA_LOC_SHIFT);
++ sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK,
++ ets_sensor);
++ sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK,
++ ets_sensor);
+
+ if (sensor_location != 0) {
+ status = hw->phy.ops.read_i2c_byte(hw,
+@@ -3982,8 +3981,7 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
+ if (status)
+ return status;
+
+- low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >>
+- IXGBE_ETS_LTHRES_DELTA_SHIFT);
++ low_thresh_delta = FIELD_GET(IXGBE_ETS_LTHRES_DELTA_MASK, ets_cfg);
+ num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
+ if (num_sensors > IXGBE_MAX_SENSORS)
+ num_sensors = IXGBE_MAX_SENSORS;
+@@ -3997,10 +3995,10 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
+ ets_offset + 1 + i);
+ continue;
+ }
+- sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+- IXGBE_ETS_DATA_INDEX_SHIFT);
+- sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+- IXGBE_ETS_DATA_LOC_SHIFT);
++ sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK,
++ ets_sensor);
++ sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK,
++ ets_sensor);
+ therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK;
+
+ hw->phy.ops.write_i2c_byte(hw,
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+index 13a6fca31004a..866024f2b9eeb 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+@@ -914,7 +914,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+ goto err_out;
+ }
+
+- xs = kzalloc(sizeof(*xs), GFP_KERNEL);
++ algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
++ if (unlikely(!algo)) {
++ err = -ENOENT;
++ goto err_out;
++ }
++
++ xs = kzalloc(sizeof(*xs), GFP_ATOMIC);
+ if (unlikely(!xs)) {
+ err = -ENOMEM;
+ goto err_out;
+@@ -930,14 +936,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+ memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4));
+ xs->xso.dev = adapter->netdev;
+
+- algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
+- if (unlikely(!algo)) {
+- err = -ENOENT;
+- goto err_xs;
+- }
+-
+ aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8;
+- xs->aead = kzalloc(aead_len, GFP_KERNEL);
++ xs->aead = kzalloc(aead_len, GFP_ATOMIC);
+ if (unlikely(!xs->aead)) {
+ err = -ENOMEM;
+ goto err_xs;
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+index cb23aad5953b0..f245f3df40fca 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+@@ -11409,7 +11409,7 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev,
+ if ((pf_func & 1) == (pdev->devfn & 1)) {
+ unsigned int device_id;
+
+- vf = (req_id & 0x7F) >> 1;
++ vf = FIELD_GET(0x7F, req_id);
+ e_dev_err("VF %d has caused a PCIe error\n", vf);
+ e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
+ "%8.8x\tdw3: %8.8x\n",
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+index 930dc50719364..f28140a05f091 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+@@ -276,9 +276,8 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
+ return 0;
+
+ if (hw->phy.nw_mng_if_sel) {
+- phy_addr = (hw->phy.nw_mng_if_sel &
+- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
++ phy_addr = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD,
++ hw->phy.nw_mng_if_sel);
+ if (ixgbe_probe_phy(hw, phy_addr))
+ return 0;
+ else
+@@ -1447,8 +1446,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw)
+ ret_val = hw->eeprom.ops.read(hw, data_offset, &eword);
+ if (ret_val)
+ goto err_eeprom;
+- control = (eword & IXGBE_CONTROL_MASK_NL) >>
+- IXGBE_CONTROL_SHIFT_NL;
++ control = FIELD_GET(IXGBE_CONTROL_MASK_NL, eword);
+ edata = eword & IXGBE_DATA_MASK_NL;
+ switch (control) {
+ case IXGBE_DELAY_NL:
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+index 198ab9d97618c..d0a6c220a12ac 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+@@ -363,8 +363,7 @@ int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
+ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+ {
+- int entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK)
+- >> IXGBE_VT_MSGINFO_SHIFT;
++ int entries = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
+ u16 *hash_list = (u16 *)&msgbuf[1];
+ struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+ struct ixgbe_hw *hw = &adapter->hw;
+@@ -971,7 +970,7 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter,
+ static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+ {
+- u32 add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT;
++ u32 add = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
+ u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK);
+ u8 tcs = adapter->hw_tcs;
+
+@@ -994,8 +993,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+ {
+ u8 *new_mac = ((u8 *)(&msgbuf[1]));
+- int index = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
+- IXGBE_VT_MSGINFO_SHIFT;
++ int index = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
+ int err;
+
+ if (adapter->vfinfo[vf].pf_set_mac && !adapter->vfinfo[vf].trusted &&
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+index 15325c549d9b5..57a912e4653fc 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+@@ -187,16 +187,16 @@ s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw)
+ s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
+ {
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+- u32 eec;
+- u16 eeprom_size;
+
+ if (eeprom->type == ixgbe_eeprom_uninitialized) {
++ u16 eeprom_size;
++ u32 eec;
++
+ eeprom->semaphore_delay = 10;
+ eeprom->type = ixgbe_flash;
+
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+- eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+- IXGBE_EEC_SIZE_SHIFT);
++ eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec);
+ eeprom->word_size = BIT(eeprom_size +
+ IXGBE_EEPROM_WORD_SIZE_SHIFT);
+
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+index cdc912bba8089..c1adc94a5a657 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+@@ -630,16 +630,16 @@ static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw)
+ static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw)
+ {
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+- u32 eec;
+- u16 eeprom_size;
+
+ if (eeprom->type == ixgbe_eeprom_uninitialized) {
++ u16 eeprom_size;
++ u32 eec;
++
+ eeprom->semaphore_delay = 10;
+ eeprom->type = ixgbe_flash;
+
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+- eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+- IXGBE_EEC_SIZE_SHIFT);
++ eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec);
+ eeprom->word_size = BIT(eeprom_size +
+ IXGBE_EEPROM_WORD_SIZE_SHIFT);
+
+@@ -714,8 +714,7 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+ ret = ixgbe_iosf_wait(hw, &command);
+
+ if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
+- error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
+- IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
++ error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command);
+ hw_dbg(hw, "Failed to read, error %x\n", error);
+ ret = -EIO;
+ goto out;
+@@ -1415,8 +1414,7 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+ ret = ixgbe_iosf_wait(hw, &command);
+
+ if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
+- error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
+- IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
++ error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command);
+ hw_dbg(hw, "Failed to write, error %x\n", error);
+ return -EIO;
+ }
+@@ -3229,9 +3227,8 @@ static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw)
+ */
+ if (hw->mac.type == ixgbe_mac_x550em_a &&
+ hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) {
+- hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel &
+- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
++ hw->phy.mdio.prtad = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD,
++ hw->phy.nw_mng_if_sel);
+ }
+ }
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+index 6c18d3d2442eb..2539c985f695a 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+@@ -808,6 +808,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id,
+ if (!is_lmac_valid(cgx, lmac_id))
+ return -ENODEV;
+
++ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
++ cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
++ cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0;
++ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
++
+ cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+ cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
+ cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0;
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+index 9181ac5de912a..19075f217d00c 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+@@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
+ continue;
+ lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
+ for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
++ if (iter >= MAX_LMAC_COUNT)
++ continue;
+ lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu),
+ iter);
+ rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+index 55639c133dd02..91a4ea529d077 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+@@ -1669,7 +1669,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz,
+ struct npc_coalesced_kpu_prfl *img_data = NULL;
+ int i = 0, rc = -EINVAL;
+ void __iomem *kpu_prfl_addr;
+- u16 offset;
++ u32 offset;
+
+ img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr;
+ if (le64_to_cpu(img_data->signature) == KPU_SIGN &&
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index b40bd0e467514..3f46d5e0fb2ec 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1933,7 +1933,7 @@ int otx2_open(struct net_device *netdev)
+ * mcam entries are enabled to receive the packets. Hence disable the
+ * packet I/O.
+ */
+- if (err == EIO)
++ if (err == -EIO)
+ goto err_disable_rxtx;
+ else if (err)
+ goto err_tx_stop_queues;
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+index aaf1faed4133e..7bb92e2dacda6 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+@@ -14,6 +14,7 @@
+ #include <linux/module.h>
+ #include <linux/phy.h>
+ #include <linux/platform_device.h>
++#include <linux/rtnetlink.h>
+ #include <linux/skbuff.h>
+
+ #include "mlxbf_gige.h"
+@@ -139,13 +140,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
+ control |= MLXBF_GIGE_CONTROL_PORT_EN;
+ writeq(control, priv->base + MLXBF_GIGE_CONTROL);
+
+- err = mlxbf_gige_request_irqs(priv);
+- if (err)
+- return err;
+ mlxbf_gige_cache_stats(priv);
+ err = mlxbf_gige_clean_port(priv);
+ if (err)
+- goto free_irqs;
++ return err;
+
+ /* Clear driver's valid_polarity to match hardware,
+ * since the above call to clean_port() resets the
+@@ -157,7 +155,7 @@ static int mlxbf_gige_open(struct net_device *netdev)
+
+ err = mlxbf_gige_tx_init(priv);
+ if (err)
+- goto free_irqs;
++ goto phy_deinit;
+ err = mlxbf_gige_rx_init(priv);
+ if (err)
+ goto tx_deinit;
+@@ -166,6 +164,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
+ napi_enable(&priv->napi);
+ netif_start_queue(netdev);
+
++ err = mlxbf_gige_request_irqs(priv);
++ if (err)
++ goto napi_deinit;
++
+ /* Set bits in INT_EN that we care about */
+ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
+ MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
+@@ -182,11 +184,17 @@ static int mlxbf_gige_open(struct net_device *netdev)
+
+ return 0;
+
++napi_deinit:
++ netif_stop_queue(netdev);
++ napi_disable(&priv->napi);
++ netif_napi_del(&priv->napi);
++ mlxbf_gige_rx_deinit(priv);
++
+ tx_deinit:
+ mlxbf_gige_tx_deinit(priv);
+
+-free_irqs:
+- mlxbf_gige_free_irqs(priv);
++phy_deinit:
++ phy_stop(phydev);
+ return err;
+ }
+
+@@ -487,8 +495,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev)
+ {
+ struct mlxbf_gige *priv = platform_get_drvdata(pdev);
+
+- writeq(0, priv->base + MLXBF_GIGE_INT_EN);
+- mlxbf_gige_clean_port(priv);
++ rtnl_lock();
++ netif_device_detach(priv->netdev);
++
++ if (netif_running(priv->netdev))
++ dev_close(priv->netdev);
++
++ rtnl_unlock();
+ }
+
+ static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
+diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
+index c81cdeb4d4e7e..0b6174748d2b4 100644
+--- a/drivers/net/ethernet/microchip/lan743x_main.c
++++ b/drivers/net/ethernet/microchip/lan743x_main.c
+@@ -25,6 +25,8 @@
+ #define PCS_POWER_STATE_DOWN 0x6
+ #define PCS_POWER_STATE_UP 0x4
+
++#define RFE_RD_FIFO_TH_3_DWORDS 0x3
++
+ static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter)
+ {
+ u32 chip_rev;
+@@ -3223,6 +3225,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
+ lan743x_pci_cleanup(adapter);
+ }
+
++static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter)
++{
++ u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_;
++
++ if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) {
++ u32 misc_ctl;
++
++ misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0);
++ misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_;
++ misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_,
++ RFE_RD_FIFO_TH_3_DWORDS);
++ lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl);
++ }
++}
++
+ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+ struct pci_dev *pdev)
+ {
+@@ -3238,6 +3255,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+ pci11x1x_strap_get_status(adapter);
+ spin_lock_init(&adapter->eth_syslock_spinlock);
+ mutex_init(&adapter->sgmii_rw_lock);
++ pci11x1x_set_rfe_rd_fifo_threshold(adapter);
+ } else {
+ adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS;
+ adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS;
+diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
+index 52609fc13ad95..f0b486f85450e 100644
+--- a/drivers/net/ethernet/microchip/lan743x_main.h
++++ b/drivers/net/ethernet/microchip/lan743x_main.h
+@@ -26,6 +26,7 @@
+ #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF)
+ #define ID_REV_CHIP_REV_A0_ (0x00000000)
+ #define ID_REV_CHIP_REV_B0_ (0x00000010)
++#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0)
+
+ #define FPGA_REV (0x04)
+ #define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF)
+@@ -311,6 +312,9 @@
+ #define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8)
+ #define SGMII_CTL_SGMII_POWER_DN_ BIT(1)
+
++#define MISC_CTL_0 (0x920)
++#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4)
++
+ /* Vendor Specific SGMII MMD details */
+ #define SR_VSMMD_PCS_ID1 0x0004
+ #define SR_VSMMD_PCS_ID2 0x0005
+diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
+index 48ea4aeeea5d4..e443d69e39511 100644
+--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
+@@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
+
+ *alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
+
+- *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN);
++ *datasize = mtu + ETH_HLEN;
+ }
+
+ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
+diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
+index 81fd31f6fac46..e6f1da66c4500 100644
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -1201,17 +1201,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
+ RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
+ }
+
++static void rtl_dash_loop_wait(struct rtl8169_private *tp,
++ const struct rtl_cond *c,
++ unsigned long usecs, int n, bool high)
++{
++ if (!tp->dash_enabled)
++ return;
++ rtl_loop_wait(tp, c, usecs, n, high);
++}
++
++static void rtl_dash_loop_wait_high(struct rtl8169_private *tp,
++ const struct rtl_cond *c,
++ unsigned long d, int n)
++{
++ rtl_dash_loop_wait(tp, c, d, n, true);
++}
++
++static void rtl_dash_loop_wait_low(struct rtl8169_private *tp,
++ const struct rtl_cond *c,
++ unsigned long d, int n)
++{
++ rtl_dash_loop_wait(tp, c, d, n, false);
++}
++
+ static void rtl8168dp_driver_start(struct rtl8169_private *tp)
+ {
+ r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
+- rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
++ rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ }
+
+ static void rtl8168ep_driver_start(struct rtl8169_private *tp)
+ {
+ r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
+ r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
+- rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
++ rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
+ }
+
+ static void rtl8168_driver_start(struct rtl8169_private *tp)
+@@ -1225,7 +1248,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp)
+ static void rtl8168dp_driver_stop(struct rtl8169_private *tp)
+ {
+ r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP);
+- rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
++ rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ }
+
+ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
+@@ -1233,7 +1256,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
+ rtl8168ep_stop_cmac(tp);
+ r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
+ r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
+- rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
++ rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
+ }
+
+ static void rtl8168_driver_stop(struct rtl8169_private *tp)
+@@ -5055,6 +5078,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
+ struct mii_bus *new_bus;
+ int ret;
+
++ /* On some boards with this chip version the BIOS is buggy and misses
++ * to reset the PHY page selector. This results in the PHY ID read
++ * accessing registers on a different page, returning a more or
++ * less random value. Fix this by resetting the page selector first.
++ */
++ if (tp->mac_version == RTL_GIGA_MAC_VER_25 ||
++ tp->mac_version == RTL_GIGA_MAC_VER_26)
++ r8169_mdio_write(tp, 0x1f, 0);
++
+ new_bus = devm_mdiobus_alloc(&pdev->dev);
+ if (!new_bus)
+ return -ENOMEM;
+diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
+index 8fec0dbbbe7bb..c6897e6ea362d 100644
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -1288,25 +1288,16 @@ static int ravb_poll(struct napi_struct *napi, int budget)
+ struct net_device *ndev = napi->dev;
+ struct ravb_private *priv = netdev_priv(ndev);
+ const struct ravb_hw_info *info = priv->info;
+- bool gptp = info->gptp || info->ccc_gac;
+- struct ravb_rx_desc *desc;
+ unsigned long flags;
+ int q = napi - priv->napi;
+ int mask = BIT(q);
+ int quota = budget;
+- unsigned int entry;
++ bool unmask;
+
+- if (!gptp) {
+- entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+- desc = &priv->gbeth_rx_ring[entry];
+- }
+ /* Processing RX Descriptor Ring */
+ /* Clear RX interrupt */
+ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
+- if (gptp || desc->die_dt != DT_FEMPTY) {
+- if (ravb_rx(ndev, &quota, q))
+- goto out;
+- }
++ unmask = !ravb_rx(ndev, &quota, q);
+
+ /* Processing TX Descriptor Ring */
+ spin_lock_irqsave(&priv->lock, flags);
+@@ -1316,6 +1307,18 @@ static int ravb_poll(struct napi_struct *napi, int budget)
+ netif_wake_subqueue(ndev, q);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
++ /* Receive error message handling */
++ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
++ if (info->nc_queues)
++ priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
++ if (priv->rx_over_errors != ndev->stats.rx_over_errors)
++ ndev->stats.rx_over_errors = priv->rx_over_errors;
++ if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
++ ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
++
++ if (!unmask)
++ goto out;
++
+ napi_complete(napi);
+
+ /* Re-enable RX/TX interrupts */
+@@ -1329,14 +1332,6 @@ static int ravb_poll(struct napi_struct *napi, int budget)
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+- /* Receive error message handling */
+- priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
+- if (info->nc_queues)
+- priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
+- if (priv->rx_over_errors != ndev->stats.rx_over_errors)
+- ndev->stats.rx_over_errors = priv->rx_over_errors;
+- if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
+- ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
+ out:
+ return budget - quota;
+ }
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+index c6ff1fa0e04d8..683c34e609638 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+@@ -92,19 +92,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw,
+ u32 prio, u32 queue)
+ {
+ void __iomem *ioaddr = hw->pcsr;
+- u32 base_register;
+- u32 value;
++ u32 clear_mask = 0;
++ u32 ctrl2, ctrl3;
++ int i;
+
+- base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
+- if (queue >= 4)
+- queue -= 4;
++ ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2);
++ ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3);
+
+- value = readl(ioaddr + base_register);
++ /* The software must ensure that the same priority
++ * is not mapped to multiple Rx queues
++ */
++ for (i = 0; i < 4; i++)
++ clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) &
++ GMAC_RXQCTRL_PSRQX_MASK(i));
++
++ ctrl2 &= ~clear_mask;
++ ctrl3 &= ~clear_mask;
++
++ /* First assign new priorities to a queue, then
++ * clear them from others queues
++ */
++ if (queue < 4) {
++ ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
++ GMAC_RXQCTRL_PSRQX_MASK(queue);
+
+- value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue);
+- value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
++ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
++ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
++ } else {
++ queue -= 4;
++
++ ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ GMAC_RXQCTRL_PSRQX_MASK(queue);
+- writel(value, ioaddr + base_register);
++
++ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
++ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
++ }
+ }
+
+ static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+index b5509f244ecd1..24c53b7255a2e 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+@@ -105,17 +105,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio,
+ u32 queue)
+ {
+ void __iomem *ioaddr = hw->pcsr;
+- u32 value, reg;
++ u32 clear_mask = 0;
++ u32 ctrl2, ctrl3;
++ int i;
+
+- reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3;
+- if (queue >= 4)
++ ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2);
++ ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3);
++
++ /* The software must ensure that the same priority
++ * is not mapped to multiple Rx queues
++ */
++ for (i = 0; i < 4; i++)
++ clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) &
++ XGMAC_PSRQ(i));
++
++ ctrl2 &= ~clear_mask;
++ ctrl3 &= ~clear_mask;
++
++ /* First assign new priorities to a queue, then
++ * clear them from others queues
++ */
++ if (queue < 4) {
++ ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
++ XGMAC_PSRQ(queue);
++
++ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
++ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
++ } else {
+ queue -= 4;
+
+- value = readl(ioaddr + reg);
+- value &= ~XGMAC_PSRQ(queue);
+- value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue);
++ ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
++ XGMAC_PSRQ(queue);
+
+- writel(value, ioaddr + reg);
++ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
++ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
++ }
+ }
+
+ static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio,
+diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+index e457ac9ae6d88..ad5c213dac077 100644
+--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
++++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+@@ -20,6 +20,8 @@
+ #include "txgbe_phy.h"
+ #include "txgbe_hw.h"
+
++#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw"
++
+ static int txgbe_swnodes_register(struct txgbe *txgbe)
+ {
+ struct txgbe_nodes *nodes = &txgbe->nodes;
+@@ -551,8 +553,8 @@ static int txgbe_clock_register(struct txgbe *txgbe)
+ char clk_name[32];
+ struct clk *clk;
+
+- snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d",
+- pci_dev_id(pdev));
++ snprintf(clk_name, sizeof(clk_name), "%s.%d",
++ TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev));
+
+ clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000);
+ if (IS_ERR(clk))
+@@ -614,7 +616,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe)
+
+ info.parent = &pdev->dev;
+ info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]);
+- info.name = "i2c_designware";
++ info.name = TXGBE_I2C_CLK_DEV_NAME;
+ info.id = pci_dev_id(pdev);
+
+ info.res = &DEFINE_RES_IRQ(pdev->irq);
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index f81c4bcd85a2a..cbd98ea4a84af 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -2388,6 +2388,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+ struct hwtstamp_config config;
+ int txcfg = 0, rxcfg = 0;
+ int pkt_ts_enable;
++ int tx_mod;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+@@ -2437,9 +2438,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+ lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable);
+ lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable);
+
+- if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC)
++ tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD);
++ if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) {
+ lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
+- PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
++ tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
++ } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) {
++ lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
++ tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
++ }
+
+ if (config.rx_filter != HWTSTAMP_FILTER_NONE)
+ lan8814_config_ts_intr(ptp_priv->phydev, true);
+@@ -2497,7 +2503,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts,
+ }
+ }
+
+-static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
++static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
+ {
+ struct ptp_header *ptp_header;
+ u32 type;
+@@ -2507,7 +2513,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
+ ptp_header = ptp_parse_header(skb, type);
+ skb_pull_inline(skb, ETH_HLEN);
+
++ if (!ptp_header)
++ return false;
++
+ *sig = (__force u16)(ntohs(ptp_header->sequence_id));
++ return true;
+ }
+
+ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
+@@ -2519,7 +2529,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
+ bool ret = false;
+ u16 skb_sig;
+
+- lan8814_get_sig_rx(skb, &skb_sig);
++ if (!lan8814_get_sig_rx(skb, &skb_sig))
++ return ret;
+
+ /* Iterate over all RX timestamps and match it with the received skbs */
+ spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags);
+@@ -2799,7 +2810,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm)
+ return 0;
+ }
+
+-static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
++static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
+ {
+ struct ptp_header *ptp_header;
+ u32 type;
+@@ -2807,7 +2818,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
+ type = ptp_classify_raw(skb);
+ ptp_header = ptp_parse_header(skb, type);
+
++ if (!ptp_header)
++ return false;
++
+ *sig = (__force u16)(ntohs(ptp_header->sequence_id));
++ return true;
+ }
+
+ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
+@@ -2821,7 +2836,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
+
+ spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags);
+ skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) {
+- lan8814_get_sig_tx(skb, &skb_sig);
++ if (!lan8814_get_sig_tx(skb, &skb_sig))
++ continue;
+
+ if (memcmp(&skb_sig, &seq_id, sizeof(seq_id)))
+ continue;
+@@ -2875,7 +2891,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv,
+
+ spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags);
+ skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) {
+- lan8814_get_sig_rx(skb, &skb_sig);
++ if (!lan8814_get_sig_rx(skb, &skb_sig))
++ continue;
+
+ if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id)))
+ continue;
+diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
+index d837c18874161..e0e9b4c53cb02 100644
+--- a/drivers/net/usb/ax88179_178a.c
++++ b/drivers/net/usb/ax88179_178a.c
+@@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev)
+
+ if (is_valid_ether_addr(mac)) {
+ eth_hw_addr_set(dev->net, mac);
++ if (!is_local_ether_addr(mac))
++ dev->net->addr_assign_type = NET_ADDR_PERM;
+ } else {
+ netdev_info(dev->net, "invalid MAC address, using random\n");
+ eth_hw_addr_random(dev->net);
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+index 168eda2132fb8..9dcc1506bd0b0 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+@@ -278,7 +278,7 @@ static inline void iwl_free_rxb(struct iwl_rx_cmd_buffer *r)
+ #define IWL_MGMT_TID 15
+ #define IWL_FRAME_LIMIT 64
+ #define IWL_MAX_RX_HW_QUEUES 16
+-#define IWL_9000_MAX_RX_HW_QUEUES 6
++#define IWL_9000_MAX_RX_HW_QUEUES 1
+
+ /**
+ * enum iwl_wowlan_status - WoWLAN image/device status
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+index aaa9840d0d4c5..ee9d14250a261 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+@@ -352,7 +352,9 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
+ ieee80211_hw_set(hw, HAS_RATE_CONTROL);
+ }
+
+- if (iwl_mvm_has_new_rx_api(mvm))
++ /* We want to use the mac80211's reorder buffer for 9000 */
++ if (iwl_mvm_has_new_rx_api(mvm) &&
++ mvm->trans->trans_cfg->device_family > IWL_DEVICE_FAMILY_9000)
+ ieee80211_hw_set(hw, SUPPORTS_REORDERING_BUFFER);
+
+ if (fw_has_capa(&mvm->fw->ucode_capa,
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
+index 2ecd32bed752f..045c862a8fc4f 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
+@@ -132,14 +132,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm)
+ if (ret)
+ return ERR_PTR(ret);
+
+- if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size))
++ if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) !=
++ resp_size)) {
++ iwl_free_resp(&cmd);
+ return ERR_PTR(-EIO);
++ }
+
+ resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL);
++ iwl_free_resp(&cmd);
++
+ if (!resp)
+ return ERR_PTR(-ENOMEM);
+
+- iwl_free_resp(&cmd);
+ return resp;
+ }
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+index bac0228b8c866..e9360b555ac93 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+@@ -236,21 +236,13 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm,
+ static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm,
+ struct napi_struct *napi,
+ struct sk_buff *skb, int queue,
+- struct ieee80211_sta *sta,
+- struct ieee80211_link_sta *link_sta)
++ struct ieee80211_sta *sta)
+ {
+ if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) {
+ kfree_skb(skb);
+ return;
+ }
+
+- if (sta && sta->valid_links && link_sta) {
+- struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
+-
+- rx_status->link_valid = 1;
+- rx_status->link_id = link_sta->link_id;
+- }
+-
+ ieee80211_rx_napi(mvm->hw, sta, skb, napi);
+ }
+
+@@ -636,7 +628,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm,
+ while ((skb = __skb_dequeue(skb_list))) {
+ iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb,
+ reorder_buf->queue,
+- sta, NULL /* FIXME */);
++ sta);
+ reorder_buf->num_stored--;
+ }
+ }
+@@ -963,6 +955,9 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
+ baid = (reorder & IWL_RX_MPDU_REORDER_BAID_MASK) >>
+ IWL_RX_MPDU_REORDER_BAID_SHIFT;
+
++ if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000)
++ return false;
++
+ /*
+ * This also covers the case of receiving a Block Ack Request
+ * outside a BA session; we'll pass it to mac80211 and that
+@@ -2486,6 +2481,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
+ if (IS_ERR(sta))
+ sta = NULL;
+ link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]);
++
++ if (sta && sta->valid_links && link_sta) {
++ rx_status->link_valid = 1;
++ rx_status->link_id = link_sta->link_id;
++ }
+ }
+ } else if (!is_multicast_ether_addr(hdr->addr2)) {
+ /*
+@@ -2621,9 +2621,14 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
+
+ if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc) &&
+ likely(!iwl_mvm_time_sync_frame(mvm, skb, hdr->addr2)) &&
+- likely(!iwl_mvm_mei_filter_scan(mvm, skb)))
+- iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta,
+- link_sta);
++ likely(!iwl_mvm_mei_filter_scan(mvm, skb))) {
++ if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000 &&
++ (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU) &&
++ !(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME))
++ rx_status->flag |= RX_FLAG_AMSDU_MORE;
++
++ iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta);
++ }
+ out:
+ rcu_read_unlock();
+ }
+diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c
+index 9f43f256db1d0..f0a4783baf1f3 100644
+--- a/drivers/net/wwan/t7xx/t7xx_cldma.c
++++ b/drivers/net/wwan/t7xx/t7xx_cldma.c
+@@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno)
+ {
+ u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE;
+
+- return ioread64(hw_info->ap_pdn_base + offset);
++ return ioread64_lo_hi(hw_info->ap_pdn_base + offset);
+ }
+
+ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address,
+@@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn
+
+ reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 :
+ hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0;
+- iowrite64(address, reg + offset);
++ iowrite64_lo_hi(address, reg + offset);
+ }
+
+ void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno,
+diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
+index cc70360364b7d..554ba4669cc8d 100644
+--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
++++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
+@@ -139,8 +139,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool
+ return -ENODEV;
+ }
+
+- gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 +
+- queue->index * sizeof(u64));
++ gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base +
++ REG_CLDMA_DL_CURRENT_ADDRL_0 +
++ queue->index * sizeof(u64));
+ if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100)
+ return 0;
+
+@@ -318,8 +319,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue)
+ struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info;
+
+ /* Check current processing TGPD, 64-bit address is in a table by Q index */
+- ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
+- queue->index * sizeof(u64));
++ ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
++ queue->index * sizeof(u64));
+ if (req->gpd_addr != ul_curr_addr) {
+ spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags);
+ dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n",
+diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
+index 76da4c15e3de1..f071ec7ff23d5 100644
+--- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
++++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
+@@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_
+ for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) {
+ offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i;
+ reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
+- iowrite64(0, reg);
++ iowrite64_lo_hi(0, reg);
+ }
+ }
+
+@@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_
+
+ reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset;
+ value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT;
+- iowrite64(value, reg);
++ iowrite64_lo_hi(value, reg);
+
+ reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset;
+ iowrite32(cfg->trsl_id, reg);
+
+ reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
+ value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0);
+- iowrite64(value, reg);
++ iowrite64_lo_hi(value, reg);
+
+ /* Ensure ATR is set */
+- ioread64(reg);
++ ioread64_lo_hi(reg);
+ return 0;
+ }
+
+diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
+index ad29f370034e4..8d2aee88526c6 100644
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
+ return NULL;
+ }
+ skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
++ skb_mark_for_recycle(skb);
+
+ /* Align ip header to a 16 bytes boundary */
+ skb_reserve(skb, NET_IP_ALIGN);
+diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
+index 3bf27052832f3..4d57a4e341054 100644
+--- a/drivers/of/dynamic.c
++++ b/drivers/of/dynamic.c
+@@ -9,6 +9,7 @@
+
+ #define pr_fmt(fmt) "OF: " fmt
+
++#include <linux/device.h>
+ #include <linux/of.h>
+ #include <linux/spinlock.h>
+ #include <linux/slab.h>
+@@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs)
+ {
+ struct of_changeset_entry *ce, *cen;
+
++ /*
++ * When a device is deleted, the device links to/from it are also queued
++ * for deletion. Until these device links are freed, the devices
++ * themselves aren't freed. If the device being deleted is due to an
++ * overlay change, this device might be holding a reference to a device
++ * node that will be freed. So, wait until all already pending device
++ * links are deleted before freeing a device node. This ensures we don't
++ * free any device node that has a non-zero reference count.
++ */
++ device_link_wait_removal();
++
+ list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
+ __of_changeset_entry_destroy(ce);
+ }
+diff --git a/drivers/of/module.c b/drivers/of/module.c
+index 0e8aa974f0f2b..f58e624953a20 100644
+--- a/drivers/of/module.c
++++ b/drivers/of/module.c
+@@ -16,6 +16,14 @@ ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len)
+ ssize_t csize;
+ ssize_t tsize;
+
++ /*
++ * Prevent a kernel oops in vsnprintf() -- it only allows passing a
++ * NULL ptr when the length is also 0. Also filter out the negative
++ * lengths...
++ */
++ if ((len > 0 && !str) || len < 0)
++ return -EINVAL;
++
+ /* Name & Type */
+ /* %p eats all alphanum characters, so %c must be used here */
+ csize = snprintf(str, len, "of:N%pOFn%c%s", np, 'T',
+diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
+index c78a6fd6c57f6..b4efdddb2ad91 100644
+--- a/drivers/perf/riscv_pmu.c
++++ b/drivers/perf/riscv_pmu.c
+@@ -313,6 +313,10 @@ static int riscv_pmu_event_init(struct perf_event *event)
+ u64 event_config = 0;
+ uint64_t cmask;
+
++ /* driver does not support branch stack sampling */
++ if (has_branch_stack(event))
++ return -EOPNOTSUPP;
++
+ hwc->flags = 0;
+ mapped_event = rvpmu->event_map(event, &event_config);
+ if (mapped_event < 0) {
+diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
+index cd783290bde5e..1148b4ecabdde 100644
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev,
+ }
+ }
+
++/**
++ * qeth_irq() - qeth interrupt handler
++ * @cdev: ccw device
++ * @intparm: expect pointer to iob
++ * @irb: Interruption Response Block
++ *
++ * In the good path:
++ * corresponding qeth channel is locked with last used iob as active_cmd.
++ * But this function is also called for error interrupts.
++ *
++ * Caller ensures that:
++ * Interrupts are disabled; ccw device lock is held;
++ *
++ */
+ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
+ struct irb *irb)
+ {
+@@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
+ iob = (struct qeth_cmd_buffer *) (addr_t)intparm;
+ }
+
+- qeth_unlock_channel(card, channel);
+-
+ rc = qeth_check_irb_error(card, cdev, irb);
+ if (rc) {
+ /* IO was terminated, free its resources. */
++ qeth_unlock_channel(card, channel);
+ if (iob)
+ qeth_cancel_cmd(iob, rc);
+ return;
+@@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
+ rc = qeth_get_problem(card, cdev, irb);
+ if (rc) {
+ card->read_or_write_problem = 1;
++ qeth_unlock_channel(card, channel);
+ if (iob)
+ qeth_cancel_cmd(iob, rc);
+ qeth_clear_ipacmd_list(card);
+@@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
+ }
+ }
+
++ if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) {
++ /* channel command hasn't started: retry.
++ * active_cmd is still set to last iob
++ */
++ QETH_CARD_TEXT(card, 2, "irqcc1");
++ rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob),
++ (addr_t)iob, 0, 0, iob->timeout);
++ if (rc) {
++ QETH_DBF_MESSAGE(2,
++ "ccw retry on %x failed, rc = %i\n",
++ CARD_DEVID(card), rc);
++ QETH_CARD_TEXT_(card, 2, " err%d", rc);
++ qeth_unlock_channel(card, channel);
++ qeth_cancel_cmd(iob, rc);
++ }
++ return;
++ }
++
++ qeth_unlock_channel(card, channel);
++
+ if (iob) {
+ /* sanity check: */
+ if (irb->scsw.cmd.count > iob->length) {
+diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c
+index ca2e932dd9b70..f684eb5e04898 100644
+--- a/drivers/scsi/myrb.c
++++ b/drivers/scsi/myrb.c
+@@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev,
+
+ name = myrb_devstate_name(ldev_info->state);
+ if (name)
+- ret = snprintf(buf, 32, "%s\n", name);
++ ret = snprintf(buf, 64, "%s\n", name);
+ else
+- ret = snprintf(buf, 32, "Invalid (%02X)\n",
++ ret = snprintf(buf, 64, "Invalid (%02X)\n",
+ ldev_info->state);
+ } else {
+ struct myrb_pdev_state *pdev_info = sdev->hostdata;
+@@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev,
+ else
+ name = myrb_devstate_name(pdev_info->state);
+ if (name)
+- ret = snprintf(buf, 32, "%s\n", name);
++ ret = snprintf(buf, 64, "%s\n", name);
+ else
+- ret = snprintf(buf, 32, "Invalid (%02X)\n",
++ ret = snprintf(buf, 64, "Invalid (%02X)\n",
+ pdev_info->state);
+ }
+ return ret;
+@@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev,
+
+ name = myrb_raidlevel_name(ldev_info->raid_level);
+ if (!name)
+- return snprintf(buf, 32, "Invalid (%02X)\n",
++ return snprintf(buf, 64, "Invalid (%02X)\n",
+ ldev_info->state);
+- return snprintf(buf, 32, "%s\n", name);
++ return snprintf(buf, 64, "%s\n", name);
+ }
+- return snprintf(buf, 32, "Physical Drive\n");
++ return snprintf(buf, 64, "Physical Drive\n");
+ }
+ static DEVICE_ATTR_RO(raid_level);
+
+@@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev,
+ unsigned char status;
+
+ if (sdev->channel < myrb_logical_channel(sdev->host))
+- return snprintf(buf, 32, "physical device - not rebuilding\n");
++ return snprintf(buf, 64, "physical device - not rebuilding\n");
+
+ status = myrb_get_rbld_progress(cb, &rbld_buf);
+
+ if (rbld_buf.ldev_num != sdev->id ||
+ status != MYRB_STATUS_SUCCESS)
+- return snprintf(buf, 32, "not rebuilding\n");
++ return snprintf(buf, 64, "not rebuilding\n");
+
+- return snprintf(buf, 32, "rebuilding block %u of %u\n",
++ return snprintf(buf, 64, "rebuilding block %u of %u\n",
+ rbld_buf.ldev_size - rbld_buf.blocks_left,
+ rbld_buf.ldev_size);
+ }
+diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
+index a1eec65a9713f..e824be9d9bbb9 100644
+--- a/drivers/scsi/myrs.c
++++ b/drivers/scsi/myrs.c
+@@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev,
+
+ name = myrs_devstate_name(ldev_info->dev_state);
+ if (name)
+- ret = snprintf(buf, 32, "%s\n", name);
++ ret = snprintf(buf, 64, "%s\n", name);
+ else
+- ret = snprintf(buf, 32, "Invalid (%02X)\n",
++ ret = snprintf(buf, 64, "Invalid (%02X)\n",
+ ldev_info->dev_state);
+ } else {
+ struct myrs_pdev_info *pdev_info;
+@@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev,
+ pdev_info = sdev->hostdata;
+ name = myrs_devstate_name(pdev_info->dev_state);
+ if (name)
+- ret = snprintf(buf, 32, "%s\n", name);
++ ret = snprintf(buf, 64, "%s\n", name);
+ else
+- ret = snprintf(buf, 32, "Invalid (%02X)\n",
++ ret = snprintf(buf, 64, "Invalid (%02X)\n",
+ pdev_info->dev_state);
+ }
+ return ret;
+@@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev,
+ ldev_info = sdev->hostdata;
+ name = myrs_raid_level_name(ldev_info->raid_level);
+ if (!name)
+- return snprintf(buf, 32, "Invalid (%02X)\n",
++ return snprintf(buf, 64, "Invalid (%02X)\n",
+ ldev_info->dev_state);
+
+ } else
+ name = myrs_raid_level_name(MYRS_RAID_PHYSICAL);
+
+- return snprintf(buf, 32, "%s\n", name);
++ return snprintf(buf, 64, "%s\n", name);
+ }
+ static DEVICE_ATTR_RO(raid_level);
+
+@@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev,
+ unsigned char status;
+
+ if (sdev->channel < cs->ctlr_info->physchan_present)
+- return snprintf(buf, 32, "physical device - not rebuilding\n");
++ return snprintf(buf, 64, "physical device - not rebuilding\n");
+
+ ldev_info = sdev->hostdata;
+ ldev_num = ldev_info->ldev_num;
+@@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev,
+ return -EIO;
+ }
+ if (ldev_info->rbld_active) {
+- return snprintf(buf, 32, "rebuilding block %zu of %zu\n",
++ return snprintf(buf, 64, "rebuilding block %zu of %zu\n",
+ (size_t)ldev_info->rbld_lba,
+ (size_t)ldev_info->cfg_devsize);
+ } else
+- return snprintf(buf, 32, "not rebuilding\n");
++ return snprintf(buf, 64, "not rebuilding\n");
+ }
+
+ static ssize_t rebuild_store(struct device *dev,
+@@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev,
+ unsigned short ldev_num;
+
+ if (sdev->channel < cs->ctlr_info->physchan_present)
+- return snprintf(buf, 32, "physical device - not checking\n");
++ return snprintf(buf, 64, "physical device - not checking\n");
+
+ ldev_info = sdev->hostdata;
+ if (!ldev_info)
+@@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev,
+ ldev_num = ldev_info->ldev_num;
+ myrs_get_ldev_info(cs, ldev_num, ldev_info);
+ if (ldev_info->cc_active)
+- return snprintf(buf, 32, "checking block %zu of %zu\n",
++ return snprintf(buf, 64, "checking block %zu of %zu\n",
+ (size_t)ldev_info->cc_lba,
+ (size_t)ldev_info->cfg_devsize);
+ else
+- return snprintf(buf, 32, "not checking\n");
++ return snprintf(buf, 64, "not checking\n");
+ }
+
+ static ssize_t consistency_check_store(struct device *dev,
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index e80c33cdad2b9..c62f677084b4c 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -3754,7 +3754,7 @@ static int sd_probe(struct device *dev)
+
+ error = device_add_disk(dev, gd, NULL);
+ if (error) {
+- put_device(&sdkp->disk_dev);
++ device_unregister(&sdkp->disk_dev);
+ put_disk(gd);
+ goto out;
+ }
+diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
+index 3638e974f5d49..06bf58b7e5d72 100644
+--- a/drivers/spi/spi-pci1xxxx.c
++++ b/drivers/spi/spi-pci1xxxx.c
+@@ -275,6 +275,8 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
+ spi_bus->spi_int[iter] = devm_kzalloc(&pdev->dev,
+ sizeof(struct pci1xxxx_spi_internal),
+ GFP_KERNEL);
++ if (!spi_bus->spi_int[iter])
++ return -ENOMEM;
+ spi_sub_ptr = spi_bus->spi_int[iter];
+ spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller));
+ if (!spi_sub_ptr->spi_host)
+diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
+index 0e48ffd499b9f..652eadbefe24c 100644
+--- a/drivers/spi/spi-s3c64xx.c
++++ b/drivers/spi/spi-s3c64xx.c
+@@ -3,19 +3,20 @@
+ // Copyright (c) 2009 Samsung Electronics Co., Ltd.
+ // Jaswinder Singh <jassi.brar@samsung.com>
+
+-#include <linux/init.h>
+-#include <linux/module.h>
+-#include <linux/interrupt.h>
+-#include <linux/delay.h>
++#include <linux/bitops.h>
++#include <linux/bits.h>
+ #include <linux/clk.h>
++#include <linux/delay.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/dmaengine.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/module.h>
++#include <linux/of.h>
++#include <linux/platform_data/spi-s3c64xx.h>
+ #include <linux/platform_device.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/spi/spi.h>
+-#include <linux/of.h>
+-
+-#include <linux/platform_data/spi-s3c64xx.h>
+
+ #define MAX_SPI_PORTS 12
+ #define S3C64XX_SPI_QUIRK_CS_AUTO (1 << 1)
+@@ -76,6 +77,7 @@
+ #define S3C64XX_SPI_INT_RX_FIFORDY_EN (1<<1)
+ #define S3C64XX_SPI_INT_TX_FIFORDY_EN (1<<0)
+
++#define S3C64XX_SPI_ST_TX_FIFO_LVL_SHIFT 6
+ #define S3C64XX_SPI_ST_RX_OVERRUN_ERR (1<<5)
+ #define S3C64XX_SPI_ST_RX_UNDERRUN_ERR (1<<4)
+ #define S3C64XX_SPI_ST_TX_OVERRUN_ERR (1<<3)
+@@ -106,9 +108,11 @@
+ #define FIFO_LVL_MASK(i) ((i)->port_conf->fifo_lvl_mask[i->port_id])
+ #define S3C64XX_SPI_ST_TX_DONE(v, i) (((v) & \
+ (1 << (i)->port_conf->tx_st_done)) ? 1 : 0)
+-#define TX_FIFO_LVL(v, i) (((v) >> 6) & FIFO_LVL_MASK(i))
+-#define RX_FIFO_LVL(v, i) (((v) >> (i)->port_conf->rx_lvl_offset) & \
+- FIFO_LVL_MASK(i))
++#define TX_FIFO_LVL(v, sdd) (((v) & (sdd)->tx_fifomask) >> \
++ __ffs((sdd)->tx_fifomask))
++#define RX_FIFO_LVL(v, sdd) (((v) & (sdd)->rx_fifomask) >> \
++ __ffs((sdd)->rx_fifomask))
++#define FIFO_DEPTH(i) ((FIFO_LVL_MASK(i) >> 1) + 1)
+
+ #define S3C64XX_SPI_MAX_TRAILCNT 0x3ff
+ #define S3C64XX_SPI_TRAILCNT_OFF 19
+@@ -133,6 +137,10 @@ struct s3c64xx_spi_dma_data {
+ * struct s3c64xx_spi_port_config - SPI Controller hardware info
+ * @fifo_lvl_mask: Bit-mask for {TX|RX}_FIFO_LVL bits in SPI_STATUS register.
+ * @rx_lvl_offset: Bit offset of RX_FIFO_LVL bits in SPI_STATUS regiter.
++ * @rx_fifomask: SPI_STATUS.RX_FIFO_LVL mask. Shifted mask defining the field's
++ * length and position.
++ * @tx_fifomask: SPI_STATUS.TX_FIFO_LVL mask. Shifted mask defining the field's
++ * length and position.
+ * @tx_st_done: Bit offset of TX_DONE bit in SPI_STATUS regiter.
+ * @clk_div: Internal clock divider
+ * @quirks: Bitmask of known quirks
+@@ -150,6 +158,8 @@ struct s3c64xx_spi_dma_data {
+ struct s3c64xx_spi_port_config {
+ int fifo_lvl_mask[MAX_SPI_PORTS];
+ int rx_lvl_offset;
++ u32 rx_fifomask;
++ u32 tx_fifomask;
+ int tx_st_done;
+ int quirks;
+ int clk_div;
+@@ -179,6 +189,11 @@ struct s3c64xx_spi_port_config {
+ * @tx_dma: Local transmit DMA data (e.g. chan and direction)
+ * @port_conf: Local SPI port configuartion data
+ * @port_id: Port identification number
++ * @fifo_depth: depth of the FIFO.
++ * @rx_fifomask: SPI_STATUS.RX_FIFO_LVL mask. Shifted mask defining the field's
++ * length and position.
++ * @tx_fifomask: SPI_STATUS.TX_FIFO_LVL mask. Shifted mask defining the field's
++ * length and position.
+ */
+ struct s3c64xx_spi_driver_data {
+ void __iomem *regs;
+@@ -198,6 +213,9 @@ struct s3c64xx_spi_driver_data {
+ struct s3c64xx_spi_dma_data tx_dma;
+ const struct s3c64xx_spi_port_config *port_conf;
+ unsigned int port_id;
++ unsigned int fifo_depth;
++ u32 rx_fifomask;
++ u32 tx_fifomask;
+ };
+
+ static void s3c64xx_flush_fifo(struct s3c64xx_spi_driver_data *sdd)
+@@ -405,12 +423,10 @@ static bool s3c64xx_spi_can_dma(struct spi_controller *host,
+ {
+ struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
+
+- if (sdd->rx_dma.ch && sdd->tx_dma.ch) {
+- return xfer->len > (FIFO_LVL_MASK(sdd) >> 1) + 1;
+- } else {
+- return false;
+- }
++ if (sdd->rx_dma.ch && sdd->tx_dma.ch)
++ return xfer->len >= sdd->fifo_depth;
+
++ return false;
+ }
+
+ static int s3c64xx_enable_datapath(struct s3c64xx_spi_driver_data *sdd,
+@@ -495,9 +511,7 @@ static u32 s3c64xx_spi_wait_for_timeout(struct s3c64xx_spi_driver_data *sdd,
+ void __iomem *regs = sdd->regs;
+ unsigned long val = 1;
+ u32 status;
+-
+- /* max fifo depth available */
+- u32 max_fifo = (FIFO_LVL_MASK(sdd) >> 1) + 1;
++ u32 max_fifo = sdd->fifo_depth;
+
+ if (timeout_ms)
+ val = msecs_to_loops(timeout_ms);
+@@ -604,7 +618,7 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
+ * For any size less than the fifo size the below code is
+ * executed atleast once.
+ */
+- loops = xfer->len / ((FIFO_LVL_MASK(sdd) >> 1) + 1);
++ loops = xfer->len / sdd->fifo_depth;
+ buf = xfer->rx_buf;
+ do {
+ /* wait for data to be received in the fifo */
+@@ -741,7 +755,7 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host,
+ struct spi_transfer *xfer)
+ {
+ struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
+- const unsigned int fifo_len = (FIFO_LVL_MASK(sdd) >> 1) + 1;
++ const unsigned int fifo_len = sdd->fifo_depth;
+ const void *tx_buf = NULL;
+ void *rx_buf = NULL;
+ int target_len = 0, origin_len = 0;
+@@ -769,10 +783,9 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host,
+ return status;
+ }
+
+- if (!is_polling(sdd) && (xfer->len > fifo_len) &&
++ if (!is_polling(sdd) && xfer->len >= fifo_len &&
+ sdd->rx_dma.ch && sdd->tx_dma.ch) {
+ use_dma = 1;
+-
+ } else if (xfer->len >= fifo_len) {
+ tx_buf = xfer->tx_buf;
+ rx_buf = xfer->rx_buf;
+@@ -1146,6 +1159,23 @@ static inline const struct s3c64xx_spi_port_config *s3c64xx_spi_get_port_config(
+ return (const struct s3c64xx_spi_port_config *)platform_get_device_id(pdev)->driver_data;
+ }
+
++static void s3c64xx_spi_set_fifomask(struct s3c64xx_spi_driver_data *sdd)
++{
++ const struct s3c64xx_spi_port_config *port_conf = sdd->port_conf;
++
++ if (port_conf->rx_fifomask)
++ sdd->rx_fifomask = port_conf->rx_fifomask;
++ else
++ sdd->rx_fifomask = FIFO_LVL_MASK(sdd) <<
++ port_conf->rx_lvl_offset;
++
++ if (port_conf->tx_fifomask)
++ sdd->tx_fifomask = port_conf->tx_fifomask;
++ else
++ sdd->tx_fifomask = FIFO_LVL_MASK(sdd) <<
++ S3C64XX_SPI_ST_TX_FIFO_LVL_SHIFT;
++}
++
+ static int s3c64xx_spi_probe(struct platform_device *pdev)
+ {
+ struct resource *mem_res;
+@@ -1191,6 +1221,10 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
+ sdd->port_id = pdev->id;
+ }
+
++ sdd->fifo_depth = FIFO_DEPTH(sdd);
++
++ s3c64xx_spi_set_fifomask(sdd);
++
+ sdd->cur_bpw = 8;
+
+ sdd->tx_dma.direction = DMA_MEM_TO_DEV;
+@@ -1280,7 +1314,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
+ dev_dbg(&pdev->dev, "Samsung SoC SPI Driver loaded for Bus SPI-%d with %d Targets attached\n",
+ sdd->port_id, host->num_chipselect);
+ dev_dbg(&pdev->dev, "\tIOmem=[%pR]\tFIFO %dbytes\n",
+- mem_res, (FIFO_LVL_MASK(sdd) >> 1) + 1);
++ mem_res, sdd->fifo_depth);
+
+ pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_put_autosuspend(&pdev->dev);
+diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
+index 4853141cd10c8..894622b6556a6 100644
+--- a/drivers/usb/typec/ucsi/ucsi_glink.c
++++ b/drivers/usb/typec/ucsi/ucsi_glink.c
+@@ -254,6 +254,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work)
+ static void pmic_glink_ucsi_register(struct work_struct *work)
+ {
+ struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work);
++ int orientation;
++ int i;
++
++ for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) {
++ if (!ucsi->port_orientation[i])
++ continue;
++ orientation = gpiod_get_value(ucsi->port_orientation[i]);
++
++ if (orientation >= 0) {
++ typec_switch_set(ucsi->port_switch[i],
++ orientation ? TYPEC_ORIENTATION_REVERSE
++ : TYPEC_ORIENTATION_NORMAL);
++ }
++ }
+
+ ucsi_register(ucsi->ucsi);
+ }
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index fc8eb8d86ca25..5acb2cb79d4bf 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -2410,12 +2410,65 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
+ return try_release_extent_state(tree, page, mask);
+ }
+
++struct btrfs_fiemap_entry {
++ u64 offset;
++ u64 phys;
++ u64 len;
++ u32 flags;
++};
++
++/*
++ * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file
++ * range from the inode's io tree, unlock the subvolume tree search path, flush
++ * the fiemap cache and relock the file range and research the subvolume tree.
++ * The value here is something negative that can't be confused with a valid
++ * errno value and different from 1 because that's also a return value from
++ * fiemap_fill_next_extent() and also it's often used to mean some btree search
++ * did not find a key, so make it some distinct negative value.
++ */
++#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1))
++
+ /*
+- * To cache previous fiemap extent
++ * Used to:
+ *
+- * Will be used for merging fiemap extent
++ * - Cache the next entry to be emitted to the fiemap buffer, so that we can
++ * merge extents that are contiguous and can be grouped as a single one;
++ *
++ * - Store extents ready to be written to the fiemap buffer in an intermediary
++ * buffer. This intermediary buffer is to ensure that in case the fiemap
++ * buffer is memory mapped to the fiemap target file, we don't deadlock
++ * during btrfs_page_mkwrite(). This is because during fiemap we are locking
++ * an extent range in order to prevent races with delalloc flushing and
++ * ordered extent completion, which is needed in order to reliably detect
++ * delalloc in holes and prealloc extents. And this can lead to a deadlock
++ * if the fiemap buffer is memory mapped to the file we are running fiemap
++ * against (a silly, useless in practice scenario, but possible) because
++ * btrfs_page_mkwrite() will try to lock the same extent range.
+ */
+ struct fiemap_cache {
++ /* An array of ready fiemap entries. */
++ struct btrfs_fiemap_entry *entries;
++ /* Number of entries in the entries array. */
++ int entries_size;
++ /* Index of the next entry in the entries array to write to. */
++ int entries_pos;
++ /*
++ * Once the entries array is full, this indicates what's the offset for
++ * the next file extent item we must search for in the inode's subvolume
++ * tree after unlocking the extent range in the inode's io tree and
++ * releasing the search path.
++ */
++ u64 next_search_offset;
++ /*
++ * This matches struct fiemap_extent_info::fi_mapped_extents, we use it
++ * to count ourselves emitted extents and stop instead of relying on
++ * fiemap_fill_next_extent() because we buffer ready fiemap entries at
++ * the @entries array, and we want to stop as soon as we hit the max
++ * amount of extents to map, not just to save time but also to make the
++ * logic at extent_fiemap() simpler.
++ */
++ unsigned int extents_mapped;
++ /* Fields for the cached extent (unsubmitted, not ready, extent). */
+ u64 offset;
+ u64 phys;
+ u64 len;
+@@ -2423,6 +2476,28 @@ struct fiemap_cache {
+ bool cached;
+ };
+
++static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo,
++ struct fiemap_cache *cache)
++{
++ for (int i = 0; i < cache->entries_pos; i++) {
++ struct btrfs_fiemap_entry *entry = &cache->entries[i];
++ int ret;
++
++ ret = fiemap_fill_next_extent(fieinfo, entry->offset,
++ entry->phys, entry->len,
++ entry->flags);
++ /*
++ * Ignore 1 (reached max entries) because we keep track of that
++ * ourselves in emit_fiemap_extent().
++ */
++ if (ret < 0)
++ return ret;
++ }
++ cache->entries_pos = 0;
++
++ return 0;
++}
++
+ /*
+ * Helper to submit fiemap extent.
+ *
+@@ -2437,8 +2512,8 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache,
+ u64 offset, u64 phys, u64 len, u32 flags)
+ {
++ struct btrfs_fiemap_entry *entry;
+ u64 cache_end;
+- int ret = 0;
+
+ /* Set at the end of extent_fiemap(). */
+ ASSERT((flags & FIEMAP_EXTENT_LAST) == 0);
+@@ -2451,7 +2526,9 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+ * find an extent that starts at an offset behind the end offset of the
+ * previous extent we processed. This happens if fiemap is called
+ * without FIEMAP_FLAG_SYNC and there are ordered extents completing
+- * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
++ * after we had to unlock the file range, release the search path, emit
++ * the fiemap extents stored in the buffer (cache->entries array) and
++ * the lock the remainder of the range and re-search the btree.
+ *
+ * For example we are in leaf X processing its last item, which is the
+ * file extent item for file range [512K, 1M[, and after
+@@ -2564,11 +2641,35 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+
+ emit:
+ /* Not mergeable, need to submit cached one */
+- ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+- cache->len, cache->flags);
+- cache->cached = false;
+- if (ret)
+- return ret;
++
++ if (cache->entries_pos == cache->entries_size) {
++ /*
++ * We will need to research for the end offset of the last
++ * stored extent and not from the current offset, because after
++ * unlocking the range and releasing the path, if there's a hole
++ * between that end offset and this current offset, a new extent
++ * may have been inserted due to a new write, so we don't want
++ * to miss it.
++ */
++ entry = &cache->entries[cache->entries_size - 1];
++ cache->next_search_offset = entry->offset + entry->len;
++ cache->cached = false;
++
++ return BTRFS_FIEMAP_FLUSH_CACHE;
++ }
++
++ entry = &cache->entries[cache->entries_pos];
++ entry->offset = cache->offset;
++ entry->phys = cache->phys;
++ entry->len = cache->len;
++ entry->flags = cache->flags;
++ cache->entries_pos++;
++ cache->extents_mapped++;
++
++ if (cache->extents_mapped == fieinfo->fi_extents_max) {
++ cache->cached = false;
++ return 1;
++ }
+ assign:
+ cache->cached = true;
+ cache->offset = offset;
+@@ -2694,8 +2795,8 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path
+ * neighbour leaf).
+ * We also need the private clone because holding a read lock on an
+ * extent buffer of the subvolume's b+tree will make lockdep unhappy
+- * when we call fiemap_fill_next_extent(), because that may cause a page
+- * fault when filling the user space buffer with fiemap data.
++ * when we check if extents are shared, as backref walking may need to
++ * lock the same leaf we are processing.
+ */
+ clone = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!clone)
+@@ -2735,34 +2836,16 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
+ * it beyond i_size.
+ */
+ while (cur_offset < end && cur_offset < i_size) {
+- struct extent_state *cached_state = NULL;
+ u64 delalloc_start;
+ u64 delalloc_end;
+ u64 prealloc_start;
+- u64 lockstart;
+- u64 lockend;
+ u64 prealloc_len = 0;
+ bool delalloc;
+
+- lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
+- lockend = round_up(end, inode->root->fs_info->sectorsize);
+-
+- /*
+- * We are only locking for the delalloc range because that's the
+- * only thing that can change here. With fiemap we have a lock
+- * on the inode, so no buffered or direct writes can happen.
+- *
+- * However mmaps and normal page writeback will cause this to
+- * change arbitrarily. We have to lock the extent lock here to
+- * make sure that nobody messes with the tree while we're doing
+- * btrfs_find_delalloc_in_range.
+- */
+- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
+ delalloc_cached_state,
+ &delalloc_start,
+ &delalloc_end);
+- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ if (!delalloc)
+ break;
+
+@@ -2930,6 +3013,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+ {
+ const u64 ino = btrfs_ino(inode);
++ struct extent_state *cached_state = NULL;
+ struct extent_state *delalloc_cached_state = NULL;
+ struct btrfs_path *path;
+ struct fiemap_cache cache = { 0 };
+@@ -2942,18 +3026,23 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ bool stopped = false;
+ int ret;
+
++ cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry);
++ cache.entries = kmalloc_array(cache.entries_size,
++ sizeof(struct btrfs_fiemap_entry),
++ GFP_KERNEL);
+ backref_ctx = btrfs_alloc_backref_share_check_ctx();
+ path = btrfs_alloc_path();
+- if (!backref_ctx || !path) {
++ if (!cache.entries || !backref_ctx || !path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
++restart:
+ range_start = round_down(start, sectorsize);
+ range_end = round_up(start + len, sectorsize);
+ prev_extent_end = range_start;
+
+- btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
++ lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+
+ ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
+ if (ret < 0)
+@@ -3079,7 +3168,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ if (ret < 0) {
+ goto out_unlock;
+ } else if (ret > 0) {
+- /* fiemap_fill_next_extent() told us to stop. */
++ /* emit_fiemap_extent() told us to stop. */
+ stopped = true;
+ break;
+ }
+@@ -3102,16 +3191,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ }
+
+ check_eof_delalloc:
+- /*
+- * Release (and free) the path before emitting any final entries to
+- * fiemap_fill_next_extent() to keep lockdep happy. This is because
+- * once we find no more file extent items exist, we may have a
+- * non-cloned leaf, and fiemap_fill_next_extent() can trigger page
+- * faults when copying data to the user space buffer.
+- */
+- btrfs_free_path(path);
+- path = NULL;
+-
+ if (!stopped && prev_extent_end < range_end) {
+ ret = fiemap_process_hole(inode, fieinfo, &cache,
+ &delalloc_cached_state, backref_ctx,
+@@ -3125,28 +3204,16 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ const u64 i_size = i_size_read(&inode->vfs_inode);
+
+ if (prev_extent_end < i_size) {
+- struct extent_state *cached_state = NULL;
+ u64 delalloc_start;
+ u64 delalloc_end;
+- u64 lockstart;
+- u64 lockend;
+ bool delalloc;
+
+- lockstart = round_down(prev_extent_end, sectorsize);
+- lockend = round_up(i_size, sectorsize);
+-
+- /*
+- * See the comment in fiemap_process_hole as to why
+- * we're doing the locking here.
+- */
+- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ delalloc = btrfs_find_delalloc_in_range(inode,
+ prev_extent_end,
+ i_size - 1,
+ &delalloc_cached_state,
+ &delalloc_start,
+ &delalloc_end);
+- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ if (!delalloc)
+ cache.flags |= FIEMAP_EXTENT_LAST;
+ } else {
+@@ -3154,12 +3221,39 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ }
+ }
+
+- ret = emit_last_fiemap_cache(fieinfo, &cache);
+-
+ out_unlock:
+- btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
++ unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
++
++ if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
++ btrfs_release_path(path);
++ ret = flush_fiemap_cache(fieinfo, &cache);
++ if (ret)
++ goto out;
++ len -= cache.next_search_offset - start;
++ start = cache.next_search_offset;
++ goto restart;
++ } else if (ret < 0) {
++ goto out;
++ }
++
++ /*
++ * Must free the path before emitting to the fiemap buffer because we
++ * may have a non-cloned leaf and if the fiemap buffer is memory mapped
++ * to a file, a write into it (through btrfs_page_mkwrite()) may trigger
++ * waiting for an ordered extent that in order to complete needs to
++ * modify that leaf, therefore leading to a deadlock.
++ */
++ btrfs_free_path(path);
++ path = NULL;
++
++ ret = flush_fiemap_cache(fieinfo, &cache);
++ if (ret)
++ goto out;
++
++ ret = emit_last_fiemap_cache(fieinfo, &cache);
+ out:
+ free_extent_state(delalloc_cached_state);
++ kfree(cache.entries);
+ btrfs_free_backref_share_ctx(backref_ctx);
+ btrfs_free_path(path);
+ return ret;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index ca79c2b8adc46..1ac14223ffb50 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -7813,6 +7813,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
+ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+ {
++ struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+ int ret;
+
+ ret = fiemap_prep(inode, fieinfo, start, &len, 0);
+@@ -7838,7 +7839,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ return ret;
+ }
+
+- return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
++ btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
++
++ /*
++ * We did an initial flush to avoid holding the inode's lock while
++ * triggering writeback and waiting for the completion of IO and ordered
++ * extents. Now after we locked the inode we do it again, because it's
++ * possible a new write may have happened in between those two steps.
++ */
++ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
++ ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
++ if (ret) {
++ btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
++ return ret;
++ }
++ }
++
++ ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
++ btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
++
++ return ret;
+ }
+
+ static int btrfs_writepages(struct address_space *mapping,
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 522596060252f..c7e52d980cd75 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -2886,12 +2886,9 @@ static void
+ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+ {
+ struct nfs4_client *clp = cb->cb_clp;
+- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+- spin_lock(&nn->client_lock);
+ clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+- put_client_renew_locked(clp);
+- spin_unlock(&nn->client_lock);
++ drop_client(clp);
+ }
+
+ static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+@@ -6273,7 +6270,7 @@ deleg_reaper(struct nfsd_net *nn)
+ list_add(&clp->cl_ra_cblist, &cblist);
+
+ /* release in nfsd4_cb_recall_any_release */
+- atomic_inc(&clp->cl_rpc_users);
++ kref_get(&clp->cl_nfsdfs.cl_ref);
+ set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ clp->cl_ra_time = ktime_get_boottime_seconds();
+ }
+diff --git a/fs/pipe.c b/fs/pipe.c
+index a234035cc375d..ba4376341ddd2 100644
+--- a/fs/pipe.c
++++ b/fs/pipe.c
+@@ -425,6 +425,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
+ bool was_empty = false;
+ bool wake_next_writer = false;
+
++ /*
++ * Reject writing to watch queue pipes before the point where we lock
++ * the pipe.
++ * Otherwise, lockdep would be unhappy if the caller already has another
++ * pipe locked.
++ * If we had to support locking a normal pipe and a notification pipe at
++ * the same time, we could set up lockdep annotations for that, but
++ * since we don't actually need that, it's simpler to just bail here.
++ */
++ if (pipe_has_watch_queue(pipe))
++ return -EXDEV;
++
+ /* Null write succeeds. */
+ if (unlikely(total_len == 0))
+ return 0;
+@@ -437,11 +449,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
+ goto out;
+ }
+
+- if (pipe_has_watch_queue(pipe)) {
+- ret = -EXDEV;
+- goto out;
+- }
+-
+ /*
+ * If it wasn't empty we try to merge new data into
+ * the last buffer.
+diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
+index 15e1215bc4e5a..1a9e705d65002 100644
+--- a/fs/smb/client/cached_dir.c
++++ b/fs/smb/client/cached_dir.c
+@@ -401,6 +401,7 @@ smb2_close_cached_fid(struct kref *ref)
+ {
+ struct cached_fid *cfid = container_of(ref, struct cached_fid,
+ refcount);
++ int rc;
+
+ spin_lock(&cfid->cfids->cfid_list_lock);
+ if (cfid->on_list) {
+@@ -414,9 +415,10 @@ smb2_close_cached_fid(struct kref *ref)
+ cfid->dentry = NULL;
+
+ if (cfid->is_open) {
+- SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
++ rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
+ cfid->fid.volatile_fid);
+- atomic_dec(&cfid->tcon->num_remote_opens);
++ if (rc != -EBUSY && rc != -EAGAIN)
++ atomic_dec(&cfid->tcon->num_remote_opens);
+ }
+
+ free_cached_dir(cfid);
+diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
+index 7206167f4184a..6c85edb8635d0 100644
+--- a/fs/smb/client/cifs_debug.c
++++ b/fs/smb/client/cifs_debug.c
+@@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
++ if (cifs_ses_exiting(ses))
++ continue;
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ spin_lock(&tcon->open_file_lock);
+ list_for_each_entry(cfile, &tcon->openFileList, tlist) {
+@@ -654,6 +656,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
+ }
+ #endif /* CONFIG_CIFS_STATS2 */
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
++ if (cifs_ses_exiting(ses))
++ continue;
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ atomic_set(&tcon->num_smbs_sent, 0);
+ spin_lock(&tcon->stat_lock);
+@@ -732,6 +736,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
+ }
+ #endif /* STATS2 */
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
++ if (cifs_ses_exiting(ses))
++ continue;
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ i++;
+ seq_printf(m, "\n%d) %s", i, tcon->tree_name);
+diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
+index 2131638f26d0b..fcb93a66e47cb 100644
+--- a/fs/smb/client/cifsfs.c
++++ b/fs/smb/client/cifsfs.c
+@@ -159,6 +159,7 @@ struct workqueue_struct *decrypt_wq;
+ struct workqueue_struct *fileinfo_put_wq;
+ struct workqueue_struct *cifsoplockd_wq;
+ struct workqueue_struct *deferredclose_wq;
++struct workqueue_struct *serverclose_wq;
+ __u32 cifs_lock_secret;
+
+ /*
+@@ -1877,6 +1878,13 @@ init_cifs(void)
+ goto out_destroy_cifsoplockd_wq;
+ }
+
++ serverclose_wq = alloc_workqueue("serverclose",
++ WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
++ if (!serverclose_wq) {
++ rc = -ENOMEM;
++ goto out_destroy_serverclose_wq;
++ }
++
+ rc = cifs_init_inodecache();
+ if (rc)
+ goto out_destroy_deferredclose_wq;
+@@ -1951,6 +1959,8 @@ init_cifs(void)
+ destroy_workqueue(decrypt_wq);
+ out_destroy_cifsiod_wq:
+ destroy_workqueue(cifsiod_wq);
++out_destroy_serverclose_wq:
++ destroy_workqueue(serverclose_wq);
+ out_clean_proc:
+ cifs_proc_clean();
+ return rc;
+@@ -1980,6 +1990,7 @@ exit_cifs(void)
+ destroy_workqueue(cifsoplockd_wq);
+ destroy_workqueue(decrypt_wq);
+ destroy_workqueue(fileinfo_put_wq);
++ destroy_workqueue(serverclose_wq);
+ destroy_workqueue(cifsiod_wq);
+ cifs_proc_clean();
+ }
+diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
+index 35a12413bbee6..a878b1e5aa313 100644
+--- a/fs/smb/client/cifsglob.h
++++ b/fs/smb/client/cifsglob.h
+@@ -425,10 +425,10 @@ struct smb_version_operations {
+ /* set fid protocol-specific info */
+ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
+ /* close a file */
+- void (*close)(const unsigned int, struct cifs_tcon *,
++ int (*close)(const unsigned int, struct cifs_tcon *,
+ struct cifs_fid *);
+ /* close a file, returning file attributes and timestamps */
+- void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
++ int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *pfile_info);
+ /* send a flush request to the server */
+ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
+@@ -1408,6 +1408,7 @@ struct cifsFileInfo {
+ bool invalidHandle:1; /* file closed via session abend */
+ bool swapfile:1;
+ bool oplock_break_cancelled:1;
++ bool offload:1; /* offload final part of _put to a wq */
+ unsigned int oplock_epoch; /* epoch from the lease break */
+ __u32 oplock_level; /* oplock/lease level from the lease break */
+ int count;
+@@ -1416,6 +1417,7 @@ struct cifsFileInfo {
+ struct cifs_search_info srch_inf;
+ struct work_struct oplock_break; /* work for oplock breaks */
+ struct work_struct put; /* work for the final part of _put */
++ struct work_struct serverclose; /* work for serverclose */
+ struct delayed_work deferred;
+ bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
+ char *symlink_target;
+@@ -2073,6 +2075,7 @@ extern struct workqueue_struct *decrypt_wq;
+ extern struct workqueue_struct *fileinfo_put_wq;
+ extern struct workqueue_struct *cifsoplockd_wq;
+ extern struct workqueue_struct *deferredclose_wq;
++extern struct workqueue_struct *serverclose_wq;
+ extern __u32 cifs_lock_secret;
+
+ extern mempool_t *cifs_mid_poolp;
+@@ -2278,4 +2281,14 @@ struct smb2_compound_vars {
+ struct smb2_file_link_info link_info;
+ };
+
++static inline bool cifs_ses_exiting(struct cifs_ses *ses)
++{
++ bool ret;
++
++ spin_lock(&ses->ses_lock);
++ ret = ses->ses_status == SES_EXITING;
++ spin_unlock(&ses->ses_lock);
++ return ret;
++}
++
+ #endif /* _CIFS_GLOB_H */
+diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
+index 4c958129181d3..97776dd12b6b8 100644
+--- a/fs/smb/client/connect.c
++++ b/fs/smb/client/connect.c
+@@ -178,6 +178,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++ if (cifs_ses_exiting(ses))
++ continue;
+ spin_lock(&ses->chan_lock);
+ for (i = 0; i < ses->chan_count; i++) {
+ if (!ses->chans[i].server)
+@@ -3981,13 +3983,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses)
+ }
+
+ static struct cifs_tcon *
+-cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
++__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+ {
+ int rc;
+ struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon = NULL;
+ struct smb3_fs_context *ctx;
++ char *origin_fullpath = NULL;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (ctx == NULL)
+@@ -4011,6 +4014,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+ ctx->sign = master_tcon->ses->sign;
+ ctx->seal = master_tcon->seal;
+ ctx->witness = master_tcon->use_witness;
++ ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses;
+
+ rc = cifs_set_vol_auth(ctx, master_tcon->ses);
+ if (rc) {
+@@ -4030,12 +4034,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+ goto out;
+ }
+
++#ifdef CONFIG_CIFS_DFS_UPCALL
++ spin_lock(&master_tcon->tc_lock);
++ if (master_tcon->origin_fullpath) {
++ spin_unlock(&master_tcon->tc_lock);
++ origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source);
++ if (IS_ERR(origin_fullpath)) {
++ tcon = ERR_CAST(origin_fullpath);
++ origin_fullpath = NULL;
++ cifs_put_smb_ses(ses);
++ goto out;
++ }
++ } else {
++ spin_unlock(&master_tcon->tc_lock);
++ }
++#endif
++
+ tcon = cifs_get_tcon(ses, ctx);
+ if (IS_ERR(tcon)) {
+ cifs_put_smb_ses(ses);
+ goto out;
+ }
+
++#ifdef CONFIG_CIFS_DFS_UPCALL
++ if (origin_fullpath) {
++ spin_lock(&tcon->tc_lock);
++ tcon->origin_fullpath = origin_fullpath;
++ spin_unlock(&tcon->tc_lock);
++ origin_fullpath = NULL;
++ queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
++ dfs_cache_get_ttl() * HZ);
++ }
++#endif
++
+ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ if (cap_unix(ses))
+ reset_cifs_unix_caps(0, tcon, NULL, ctx);
+@@ -4044,11 +4075,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+ out:
+ kfree(ctx->username);
+ kfree_sensitive(ctx->password);
++ kfree(origin_fullpath);
+ kfree(ctx);
+
+ return tcon;
+ }
+
++static struct cifs_tcon *
++cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
++{
++ struct cifs_tcon *ret;
++
++ cifs_mount_lock();
++ ret = __cifs_construct_tcon(cifs_sb, fsuid);
++ cifs_mount_unlock();
++ return ret;
++}
++
+ struct cifs_tcon *
+ cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
+ {
+diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
+index 580a27a3a7e62..855468a32904e 100644
+--- a/fs/smb/client/dir.c
++++ b/fs/smb/client/dir.c
+@@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ int disposition;
+ struct TCP_Server_Info *server = tcon->ses->server;
+ struct cifs_open_parms oparms;
++ int rdwr_for_fscache = 0;
+
+ *oplock = 0;
+ if (tcon->ses->server->oplocks)
+@@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ return PTR_ERR(full_path);
+ }
+
++ /* If we're caching, we need to be able to fill in around partial writes. */
++ if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY)
++ rdwr_for_fscache = 1;
++
+ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
+ (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+@@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ desired_access |= GENERIC_READ; /* is this too little? */
+ if (OPEN_FMODE(oflags) & FMODE_WRITE)
+ desired_access |= GENERIC_WRITE;
++ if (rdwr_for_fscache == 1)
++ desired_access |= GENERIC_READ;
+
+ disposition = FILE_OVERWRITE_IF;
+ if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+@@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
+ create_options |= CREATE_OPTION_READONLY;
+
++retry_open:
+ oparms = (struct cifs_open_parms) {
+ .tcon = tcon,
+ .cifs_sb = cifs_sb,
+@@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ rc = server->ops->open(xid, &oparms, oplock, buf);
+ if (rc) {
+ cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
++ if (rc == -EACCES && rdwr_for_fscache == 1) {
++ desired_access &= ~GENERIC_READ;
++ rdwr_for_fscache = 2;
++ goto retry_open;
++ }
+ goto out;
+ }
++ if (rdwr_for_fscache == 2)
++ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
+
+ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ /*
+diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
+index c711d5eb2987e..53a8c633221b9 100644
+--- a/fs/smb/client/file.c
++++ b/fs/smb/client/file.c
+@@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
+ */
+ }
+
+-static inline int cifs_convert_flags(unsigned int flags)
++static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
+ {
+ if ((flags & O_ACCMODE) == O_RDONLY)
+ return GENERIC_READ;
+ else if ((flags & O_ACCMODE) == O_WRONLY)
+- return GENERIC_WRITE;
++ return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
+ else if ((flags & O_ACCMODE) == O_RDWR) {
+ /* GENERIC_ALL is too much permission to request
+ can cause unnecessary access denied on create */
+@@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
+ int create_options = CREATE_NOT_DIR;
+ struct TCP_Server_Info *server = tcon->ses->server;
+ struct cifs_open_parms oparms;
++ int rdwr_for_fscache = 0;
+
+ if (!server->ops->open)
+ return -ENOSYS;
+
+- desired_access = cifs_convert_flags(f_flags);
++ /* If we're caching, we need to be able to fill in around partial writes. */
++ if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
++ rdwr_for_fscache = 1;
++
++ desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
+
+ /*********************************************************************
+ * open flag mapping table:
+@@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
+ if (f_flags & O_DIRECT)
+ create_options |= CREATE_NO_BUFFER;
+
++retry_open:
+ oparms = (struct cifs_open_parms) {
+ .tcon = tcon,
+ .cifs_sb = cifs_sb,
+@@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
+ };
+
+ rc = server->ops->open(xid, &oparms, oplock, buf);
+- if (rc)
++ if (rc) {
++ if (rc == -EACCES && rdwr_for_fscache == 1) {
++ desired_access = cifs_convert_flags(f_flags, 0);
++ rdwr_for_fscache = 2;
++ goto retry_open;
++ }
+ return rc;
++ }
++ if (rdwr_for_fscache == 2)
++ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
+
+ /* TODO: Add support for calling posix query info but with passing in fid */
+ if (tcon->unix_ext)
+@@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem)
+ }
+
+ static void cifsFileInfo_put_work(struct work_struct *work);
++void serverclose_work(struct work_struct *work);
+
+ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
+ struct tcon_link *tlink, __u32 oplock,
+@@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
+ cfile->tlink = cifs_get_tlink(tlink);
+ INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
+ INIT_WORK(&cfile->put, cifsFileInfo_put_work);
++ INIT_WORK(&cfile->serverclose, serverclose_work);
+ INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
+ mutex_init(&cfile->fh_mutex);
+ spin_lock_init(&cfile->file_info_lock);
+@@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work)
+ cifsFileInfo_put_final(cifs_file);
+ }
+
++void serverclose_work(struct work_struct *work)
++{
++ struct cifsFileInfo *cifs_file = container_of(work,
++ struct cifsFileInfo, serverclose);
++
++ struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
++
++ struct TCP_Server_Info *server = tcon->ses->server;
++ int rc = 0;
++ int retries = 0;
++ int MAX_RETRIES = 4;
++
++ do {
++ if (server->ops->close_getattr)
++ rc = server->ops->close_getattr(0, tcon, cifs_file);
++ else if (server->ops->close)
++ rc = server->ops->close(0, tcon, &cifs_file->fid);
++
++ if (rc == -EBUSY || rc == -EAGAIN) {
++ retries++;
++ msleep(250);
++ }
++ } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
++ );
++
++ if (retries == MAX_RETRIES)
++ pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
++
++ if (cifs_file->offload)
++ queue_work(fileinfo_put_wq, &cifs_file->put);
++ else
++ cifsFileInfo_put_final(cifs_file);
++}
++
+ /**
+ * cifsFileInfo_put - release a reference of file priv data
+ *
+@@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
+ struct cifs_fid fid = {};
+ struct cifs_pending_open open;
+ bool oplock_break_cancelled;
++ bool serverclose_offloaded = false;
+
+ spin_lock(&tcon->open_file_lock);
+ spin_lock(&cifsi->open_file_lock);
+ spin_lock(&cifs_file->file_info_lock);
++
++ cifs_file->offload = offload;
+ if (--cifs_file->count > 0) {
+ spin_unlock(&cifs_file->file_info_lock);
+ spin_unlock(&cifsi->open_file_lock);
+@@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
+ if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
+ struct TCP_Server_Info *server = tcon->ses->server;
+ unsigned int xid;
++ int rc = 0;
+
+ xid = get_xid();
+ if (server->ops->close_getattr)
+- server->ops->close_getattr(xid, tcon, cifs_file);
++ rc = server->ops->close_getattr(xid, tcon, cifs_file);
+ else if (server->ops->close)
+- server->ops->close(xid, tcon, &cifs_file->fid);
++ rc = server->ops->close(xid, tcon, &cifs_file->fid);
+ _free_xid(xid);
++
++ if (rc == -EBUSY || rc == -EAGAIN) {
++ // Server close failed, hence offloading it as an async op
++ queue_work(serverclose_wq, &cifs_file->serverclose);
++ serverclose_offloaded = true;
++ }
+ }
+
+ if (oplock_break_cancelled)
+@@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
+
+ cifs_del_pending_open(&open);
+
+- if (offload)
+- queue_work(fileinfo_put_wq, &cifs_file->put);
+- else
+- cifsFileInfo_put_final(cifs_file);
++ // if serverclose has been offloaded to wq (on failure), it will
++ // handle offloading put as well. If serverclose not offloaded,
++ // we need to handle offloading put here.
++ if (!serverclose_offloaded) {
++ if (offload)
++ queue_work(fileinfo_put_wq, &cifs_file->put);
++ else
++ cifsFileInfo_put_final(cifs_file);
++ }
+ }
+
+ int cifs_open(struct inode *inode, struct file *file)
+@@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file)
+ use_cache:
+ fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
+ file->f_mode & FMODE_WRITE);
+- if (file->f_flags & O_DIRECT &&
+- (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
+- file->f_flags & O_APPEND))
+- cifs_invalidate_cache(file_inode(file),
+- FSCACHE_INVAL_DIO_WRITE);
++ if (!(file->f_flags & O_DIRECT))
++ goto out;
++ if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
++ goto out;
++ cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
+
+ out:
+ free_dentry_path(page);
+@@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ int disposition = FILE_OPEN;
+ int create_options = CREATE_NOT_DIR;
+ struct cifs_open_parms oparms;
++ int rdwr_for_fscache = 0;
+
+ xid = get_xid();
+ mutex_lock(&cfile->fh_mutex);
+@@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ }
+ #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
+
+- desired_access = cifs_convert_flags(cfile->f_flags);
++ /* If we're caching, we need to be able to fill in around partial writes. */
++ if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
++ rdwr_for_fscache = 1;
++
++ desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
+
+ /* O_SYNC also has bit for O_DSYNC so following check picks up either */
+ if (cfile->f_flags & O_SYNC)
+@@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ if (server->ops->get_lease_key)
+ server->ops->get_lease_key(inode, &cfile->fid);
+
++retry_open:
+ oparms = (struct cifs_open_parms) {
+ .tcon = tcon,
+ .cifs_sb = cifs_sb,
+@@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ /* indicate that we need to relock the file */
+ oparms.reconnect = true;
+ }
++ if (rc == -EACCES && rdwr_for_fscache == 1) {
++ desired_access = cifs_convert_flags(cfile->f_flags, 0);
++ rdwr_for_fscache = 2;
++ goto retry_open;
++ }
+
+ if (rc) {
+ mutex_unlock(&cfile->fh_mutex);
+@@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ goto reopen_error_exit;
+ }
+
++ if (rdwr_for_fscache == 2)
++ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
++
+ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ reopen_success:
+ #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
+diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
+index e4a6b240d2263..58567ae617b9f 100644
+--- a/fs/smb/client/fs_context.c
++++ b/fs/smb/client/fs_context.c
+@@ -37,7 +37,7 @@
+ #include "rfc1002pdu.h"
+ #include "fs_context.h"
+
+-static DEFINE_MUTEX(cifs_mount_mutex);
++DEFINE_MUTEX(cifs_mount_mutex);
+
+ static const match_table_t cifs_smb_version_tokens = {
+ { Smb_1, SMB1_VERSION_STRING },
+@@ -752,9 +752,9 @@ static int smb3_get_tree(struct fs_context *fc)
+
+ if (err)
+ return err;
+- mutex_lock(&cifs_mount_mutex);
++ cifs_mount_lock();
+ ret = smb3_get_tree_common(fc);
+- mutex_unlock(&cifs_mount_mutex);
++ cifs_mount_unlock();
+ return ret;
+ }
+
+diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
+index cf46916286d02..8cfc25b609b6b 100644
+--- a/fs/smb/client/fs_context.h
++++ b/fs/smb/client/fs_context.h
+@@ -293,4 +293,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
+ #define MAX_CACHED_FIDS 16
+ extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
+
++extern struct mutex cifs_mount_mutex;
++
++static inline void cifs_mount_lock(void)
++{
++ mutex_lock(&cifs_mount_mutex);
++}
++
++static inline void cifs_mount_unlock(void)
++{
++ mutex_unlock(&cifs_mount_mutex);
++}
++
+ #endif
+diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
+index e5cad149f5a2d..a4ee801b29394 100644
+--- a/fs/smb/client/fscache.c
++++ b/fs/smb/client/fscache.c
+@@ -12,6 +12,16 @@
+ #include "cifs_fs_sb.h"
+ #include "cifsproto.h"
+
++/*
++ * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode().
++ */
++struct cifs_fscache_inode_key {
++
++ __le64 uniqueid; /* server inode number */
++ __le64 createtime; /* creation time on server */
++ u8 type; /* S_IFMT file type */
++} __packed;
++
+ static void cifs_fscache_fill_volume_coherency(
+ struct cifs_tcon *tcon,
+ struct cifs_fscache_volume_coherency_data *cd)
+@@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
+ void cifs_fscache_get_inode_cookie(struct inode *inode)
+ {
+ struct cifs_fscache_inode_coherency_data cd;
++ struct cifs_fscache_inode_key key;
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+
++ key.uniqueid = cpu_to_le64(cifsi->uniqueid);
++ key.createtime = cpu_to_le64(cifsi->createtime);
++ key.type = (inode->i_mode & S_IFMT) >> 12;
+ cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd);
+
+ cifsi->netfs.cache =
+ fscache_acquire_cookie(tcon->fscache, 0,
+- &cifsi->uniqueid, sizeof(cifsi->uniqueid),
++ &key, sizeof(key),
+ &cd, sizeof(cd),
+ i_size_read(&cifsi->netfs.inode));
+ if (cifsi->netfs.cache)
+diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h
+index a3d73720914f8..1f2ea9f5cc9a8 100644
+--- a/fs/smb/client/fscache.h
++++ b/fs/smb/client/fscache.h
+@@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode,
+ __cifs_readahead_to_fscache(inode, pos, len);
+ }
+
++static inline bool cifs_fscache_enabled(struct inode *inode)
++{
++ return fscache_cookie_enabled(cifs_inode_cookie(inode));
++}
++
+ #else /* CONFIG_CIFS_FSCACHE */
+ static inline
+ void cifs_fscache_fill_coherency(struct inode *inode,
+@@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
+ static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
+ static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
+ static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
++static inline bool cifs_fscache_enabled(struct inode *inode) { return false; }
+
+ static inline int cifs_fscache_query_occupancy(struct inode *inode,
+ pgoff_t first, unsigned int nr_pages,
+diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
+index cb9e719e67ae2..fa6330d586e89 100644
+--- a/fs/smb/client/inode.c
++++ b/fs/smb/client/inode.c
+@@ -1390,6 +1390,8 @@ cifs_find_inode(struct inode *inode, void *opaque)
+ {
+ struct cifs_fattr *fattr = opaque;
+
++ /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */
++
+ /* don't match inode with different uniqueid */
+ if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
+ return 0;
+diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
+index 73ededa8eba5c..204dd7c47126e 100644
+--- a/fs/smb/client/ioctl.c
++++ b/fs/smb/client/ioctl.c
+@@ -246,7 +246,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
+ list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
+- if (ses_it->Suid == out.session_id) {
++ spin_lock(&ses_it->ses_lock);
++ if (ses_it->ses_status != SES_EXITING &&
++ ses_it->Suid == out.session_id) {
+ ses = ses_it;
+ /*
+ * since we are using the session outside the crit
+@@ -254,9 +256,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
+ * so increment its refcount
+ */
+ cifs_smb_ses_inc_refcount(ses);
++ spin_unlock(&ses_it->ses_lock);
+ found = true;
+ goto search_end;
+ }
++ spin_unlock(&ses_it->ses_lock);
+ }
+ }
+ search_end:
+diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
+index c2137ea3c2538..ef573e3f8e52a 100644
+--- a/fs/smb/client/misc.c
++++ b/fs/smb/client/misc.c
+@@ -489,6 +489,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
+ /* look up tcon based on tid & uid */
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++ if (cifs_ses_exiting(ses))
++ continue;
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ if (tcon->tid != buf->Tid)
+ continue;
+diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
+index 64e25233e85de..1aebcf95c1951 100644
+--- a/fs/smb/client/smb1ops.c
++++ b/fs/smb/client/smb1ops.c
+@@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
+ cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
+ }
+
+-static void
++static int
+ cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_fid *fid)
+ {
+- CIFSSMBClose(xid, tcon, fid->netfid);
++ return CIFSSMBClose(xid, tcon, fid->netfid);
+ }
+
+ static int
+diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
+index 82b84a4941dd2..cc72be5a93a93 100644
+--- a/fs/smb/client/smb2misc.c
++++ b/fs/smb/client/smb2misc.c
+@@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
+ /* look up tcon based on tid & uid */
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++ if (cifs_ses_exiting(ses))
++ continue;
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ spin_lock(&tcon->open_file_lock);
+ cifs_stats_inc(
+@@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
+ /* look up tcon based on tid & uid */
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++ if (cifs_ses_exiting(ses))
++ continue;
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+
+ spin_lock(&tcon->open_file_lock);
+diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
+index 978a9f409857a..04fea874d0a33 100644
+--- a/fs/smb/client/smb2ops.c
++++ b/fs/smb/client/smb2ops.c
+@@ -1392,14 +1392,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
+ memcpy(cfile->fid.create_guid, fid->create_guid, 16);
+ }
+
+-static void
++static int
+ smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_fid *fid)
+ {
+- SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
++ return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+ }
+
+-static void
++static int
+ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile)
+ {
+@@ -1410,7 +1410,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
+ rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, &file_inf);
+ if (rc)
+- return;
++ return rc;
+
+ inode = d_inode(cfile->dentry);
+
+@@ -1439,6 +1439,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
+
+ /* End of file and Attributes should not have to be updated on close */
+ spin_unlock(&inode->i_lock);
++ return rc;
+ }
+
+ static int
+@@ -2429,6 +2430,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++ if (cifs_ses_exiting(ses))
++ continue;
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
+ spin_lock(&tcon->tc_lock);
+diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
+index 4d7d0bdf7a472..94bd4c6d2d682 100644
+--- a/fs/smb/client/smb2pdu.c
++++ b/fs/smb/client/smb2pdu.c
+@@ -3549,9 +3549,9 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
+ memcpy(&pbuf->network_open_info,
+ &rsp->network_open_info,
+ sizeof(pbuf->network_open_info));
++ atomic_dec(&tcon->num_remote_opens);
+ }
+
+- atomic_dec(&tcon->num_remote_opens);
+ close_exit:
+ SMB2_close_free(&rqst);
+ free_rsp_buf(resp_buftype, rsp);
+diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
+index 0ebf91ffa2361..4464a62228cf3 100644
+--- a/fs/smb/server/ksmbd_netlink.h
++++ b/fs/smb/server/ksmbd_netlink.h
+@@ -166,7 +166,8 @@ struct ksmbd_share_config_response {
+ __u16 force_uid;
+ __u16 force_gid;
+ __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME];
+- __u32 reserved[112]; /* Reserved room */
++ __u32 reserved[111]; /* Reserved room */
++ __u32 payload_sz;
+ __u32 veto_list_sz;
+ __s8 ____payload[];
+ };
+diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c
+index 328a412259dc1..a2f0a2edceb8a 100644
+--- a/fs/smb/server/mgmt/share_config.c
++++ b/fs/smb/server/mgmt/share_config.c
+@@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
+ share->name = kstrdup(name, GFP_KERNEL);
+
+ if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
+- share->path = kstrdup(ksmbd_share_config_path(resp),
++ int path_len = PATH_MAX;
++
++ if (resp->payload_sz)
++ path_len = resp->payload_sz - resp->veto_list_sz;
++
++ share->path = kstrndup(ksmbd_share_config_path(resp), path_len,
+ GFP_KERNEL);
+ if (share->path)
+ share->path_sz = strlen(share->path);
+diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c
+index 27a9dce3e03ab..8600f32c981a1 100644
+--- a/fs/smb/server/smb2ops.c
++++ b/fs/smb/server/smb2ops.c
+@@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
++ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
++ (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
++ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
++ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
++
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+ }
+@@ -275,11 +280,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
+ SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
+
+- if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+- (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+- conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
+- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+-
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+
+diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
+index 199c31c275e5b..924f08326eef4 100644
+--- a/fs/smb/server/smb2pdu.c
++++ b/fs/smb/server/smb2pdu.c
+@@ -5631,8 +5631,9 @@ static int smb2_rename(struct ksmbd_work *work,
+ if (!file_info->ReplaceIfExists)
+ flags = RENAME_NOREPLACE;
+
+- smb_break_all_levII_oplock(work, fp, 0);
+ rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
++ if (!rc)
++ smb_break_all_levII_oplock(work, fp, 0);
+ out:
+ kfree(new_name);
+ return rc;
+diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
+index f29bb03f0dc47..8752ac82c557b 100644
+--- a/fs/smb/server/transport_ipc.c
++++ b/fs/smb/server/transport_ipc.c
+@@ -65,6 +65,7 @@ struct ipc_msg_table_entry {
+ struct hlist_node ipc_table_hlist;
+
+ void *response;
++ unsigned int msg_sz;
+ };
+
+ static struct delayed_work ipc_timer_work;
+@@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz)
+ }
+
+ memcpy(entry->response, payload, sz);
++ entry->msg_sz = sz;
+ wake_up_interruptible(&entry->wait);
+ ret = 0;
+ break;
+@@ -453,6 +455,34 @@ static int ipc_msg_send(struct ksmbd_ipc_msg *msg)
+ return ret;
+ }
+
++static int ipc_validate_msg(struct ipc_msg_table_entry *entry)
++{
++ unsigned int msg_sz = entry->msg_sz;
++
++ if (entry->type == KSMBD_EVENT_RPC_REQUEST) {
++ struct ksmbd_rpc_command *resp = entry->response;
++
++ msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz;
++ } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) {
++ struct ksmbd_spnego_authen_response *resp = entry->response;
++
++ msg_sz = sizeof(struct ksmbd_spnego_authen_response) +
++ resp->session_key_len + resp->spnego_blob_len;
++ } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) {
++ struct ksmbd_share_config_response *resp = entry->response;
++
++ if (resp->payload_sz) {
++ if (resp->payload_sz < resp->veto_list_sz)
++ return -EINVAL;
++
++ msg_sz = sizeof(struct ksmbd_share_config_response) +
++ resp->payload_sz;
++ }
++ }
++
++ return entry->msg_sz != msg_sz ? -EINVAL : 0;
++}
++
+ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
+ {
+ struct ipc_msg_table_entry entry;
+@@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle
+ ret = wait_event_interruptible_timeout(entry.wait,
+ entry.response != NULL,
+ IPC_WAIT_TIMEOUT);
++ if (entry.response) {
++ ret = ipc_validate_msg(&entry);
++ if (ret) {
++ kvfree(entry.response);
++ entry.response = NULL;
++ }
++ }
+ out:
+ down_write(&ipc_msg_table_lock);
+ hash_del(&entry.ipc_table_hlist);
+diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
+index 1fb8f4df60cbb..9848af78215bf 100644
+--- a/fs/vboxsf/super.c
++++ b/fs/vboxsf/super.c
+@@ -151,7 +151,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
+ if (!sbi->nls) {
+ vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
+ err = -EINVAL;
+- goto fail_free;
++ goto fail_destroy_idr;
+ }
+ }
+
+@@ -224,6 +224,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
+ ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ if (sbi->nls)
+ unload_nls(sbi->nls);
++fail_destroy_idr:
+ idr_destroy(&sbi->ino_idr);
+ kfree(sbi);
+ return err;
+diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
+index 31029f4f7be85..c4aabbf002f7c 100644
+--- a/include/kvm/arm_pmu.h
++++ b/include/kvm/arm_pmu.h
+@@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void);
+ */
+ #define kvm_pmu_update_vcpu_events(vcpu) \
+ do { \
+- if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \
++ if (!has_vhe() && kvm_arm_support_pmu_v3()) \
+ vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
+ } while (0)
+
+diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
+index d0807ad43f933..6e950594215a0 100644
+--- a/include/linux/avf/virtchnl.h
++++ b/include/linux/avf/virtchnl.h
+@@ -4,6 +4,11 @@
+ #ifndef _VIRTCHNL_H_
+ #define _VIRTCHNL_H_
+
++#include <linux/bitops.h>
++#include <linux/bits.h>
++#include <linux/overflow.h>
++#include <uapi/linux/if_ether.h>
++
+ /* Description:
+ * This header file describes the Virtual Function (VF) - Physical Function
+ * (PF) communication protocol used by the drivers for all devices starting
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h
+index 9b08d792fa95a..2ebb5d4d43dc6 100644
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -1524,12 +1524,26 @@ struct bpf_link {
+ enum bpf_link_type type;
+ const struct bpf_link_ops *ops;
+ struct bpf_prog *prog;
+- struct work_struct work;
++ /* rcu is used before freeing, work can be used to schedule that
++ * RCU-based freeing before that, so they never overlap
++ */
++ union {
++ struct rcu_head rcu;
++ struct work_struct work;
++ };
+ };
+
+ struct bpf_link_ops {
+ void (*release)(struct bpf_link *link);
++ /* deallocate link resources callback, called without RCU grace period
++ * waiting
++ */
+ void (*dealloc)(struct bpf_link *link);
++ /* deallocate link resources callback, called after RCU grace period;
++ * if underlying BPF program is sleepable we go through tasks trace
++ * RCU GP and then "classic" RCU GP
++ */
++ void (*dealloc_deferred)(struct bpf_link *link);
+ int (*detach)(struct bpf_link *link);
+ int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog);
+diff --git a/include/linux/device.h b/include/linux/device.h
+index 99496a0a5ddb3..a070160fbcb8e 100644
+--- a/include/linux/device.h
++++ b/include/linux/device.h
+@@ -1250,6 +1250,7 @@ void device_link_del(struct device_link *link);
+ void device_link_remove(void *consumer, struct device *supplier);
+ void device_links_supplier_sync_state_pause(void);
+ void device_links_supplier_sync_state_resume(void);
++void device_link_wait_removal(void);
+
+ /* Create alias, so I can be autoloaded. */
+ #define MODULE_ALIAS_CHARDEV(major,minor) \
+diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
+index 731beb3198c4f..8215e193178aa 100644
+--- a/include/linux/io_uring_types.h
++++ b/include/linux/io_uring_types.h
+@@ -250,7 +250,6 @@ struct io_ring_ctx {
+
+ struct io_submit_state submit_state;
+
+- struct io_buffer_list *io_bl;
+ struct xarray io_bl_xa;
+
+ struct io_hash_table cancel_table_locked;
+diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
+index 35f3a4a8ceb1e..acf7e1a3f3def 100644
+--- a/include/linux/secretmem.h
++++ b/include/linux/secretmem.h
+@@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio)
+ /*
+ * Using folio_mapping() is quite slow because of the actual call
+ * instruction.
+- * We know that secretmem pages are not compound and LRU so we can
++ * We know that secretmem pages are not compound, so we can
+ * save a couple of cycles here.
+ */
+- if (folio_test_large(folio) || !folio_test_lru(folio))
++ if (folio_test_large(folio))
+ return false;
+
+ mapping = (struct address_space *)
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 2922059908cc5..9e61f6df6bc55 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -736,8 +736,6 @@ typedef unsigned char *sk_buff_data_t;
+ * @list: queue head
+ * @ll_node: anchor in an llist (eg socket defer_list)
+ * @sk: Socket we are owned by
+- * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
+- * fragmentation management
+ * @dev: Device we arrived on/are leaving by
+ * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
+ * @cb: Control buffer. Free for use by every layer. Put private vars here
+@@ -860,10 +858,7 @@ struct sk_buff {
+ struct llist_node ll_node;
+ };
+
+- union {
+- struct sock *sk;
+- int ip_defrag_offset;
+- };
++ struct sock *sk;
+
+ union {
+ ktime_t tstamp;
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index d04188714dca1..94e63b2695406 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -140,6 +140,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
+ }
+ }
+
++DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
++#if IS_ENABLED(CONFIG_IPV6)
++DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
++#endif
++
++static inline bool udp_encap_needed(void)
++{
++ if (static_branch_unlikely(&udp_encap_needed_key))
++ return true;
++
++#if IS_ENABLED(CONFIG_IPV6)
++ if (static_branch_unlikely(&udpv6_encap_needed_key))
++ return true;
++#endif
++
++ return false;
++}
++
+ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
+ {
+ if (!skb_is_gso(skb))
+@@ -153,6 +171,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
+ !udp_test_bit(ACCEPT_FRAGLIST, sk))
+ return true;
+
++ /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
++ * land in a tunnel as the socket check in udp_gro_receive cannot be
++ * foolproof.
++ */
++ if (udp_encap_needed() &&
++ READ_ONCE(udp_sk(sk)->encap_rcv) &&
++ !(skb_shinfo(skb)->gso_type &
++ (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
++ return true;
++
+ return false;
+ }
+
+diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
+index 0d231024570a3..03e68a8e229f5 100644
+--- a/include/net/bluetooth/hci.h
++++ b/include/net/bluetooth/hci.h
+@@ -176,6 +176,15 @@ enum {
+ */
+ HCI_QUIRK_USE_BDADDR_PROPERTY,
+
++ /* When this quirk is set, the Bluetooth Device Address provided by
++ * the 'local-bd-address' fwnode property is incorrectly specified in
++ * big-endian order.
++ *
++ * This quirk can be set before hci_register_dev is called or
++ * during the hdev->setup vendor callback.
++ */
++ HCI_QUIRK_BDADDR_PROPERTY_BROKEN,
++
+ /* When this quirk is set, the duplicate filtering during
+ * scanning is based on Bluetooth devices addresses. To allow
+ * RSSI based updates, restart scanning if needed.
+diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
+index 01a73bf74fa19..6ecac01115d9c 100644
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -173,6 +173,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
+ void (*delack_handler)(struct timer_list *),
+ void (*keepalive_handler)(struct timer_list *));
+ void inet_csk_clear_xmit_timers(struct sock *sk);
++void inet_csk_clear_xmit_timers_sync(struct sock *sk);
+
+ static inline void inet_csk_schedule_ack(struct sock *sk)
+ {
+diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
+index 4d43adf186064..cd526fd31b458 100644
+--- a/include/net/mana/mana.h
++++ b/include/net/mana/mana.h
+@@ -39,7 +39,6 @@ enum TRI_STATE {
+ #define COMP_ENTRY_SIZE 64
+
+ #define RX_BUFFERS_PER_QUEUE 512
+-#define MANA_RX_DATA_ALIGN 64
+
+ #define MAX_SEND_BUFFERS_PER_QUEUE 256
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index e70c903b04f30..25780942ec8bf 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1808,6 +1808,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
+ #endif
+ }
+
++static inline void sock_not_owned_by_me(const struct sock *sk)
++{
++#ifdef CONFIG_LOCKDEP
++ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
++#endif
++}
++
+ static inline bool sock_owned_by_user(const struct sock *sk)
+ {
+ sock_owned_by_me(sk);
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index aed10bae50acb..2c0a9a98272ca 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -148,6 +148,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+ static void io_queue_sqe(struct io_kiocb *req);
+
+ struct kmem_cache *req_cachep;
++static struct workqueue_struct *iou_wq __ro_after_init;
+
+ static int __read_mostly sysctl_io_uring_disabled;
+ static int __read_mostly sysctl_io_uring_group = -1;
+@@ -343,7 +344,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
+ err:
+ kfree(ctx->cancel_table.hbs);
+ kfree(ctx->cancel_table_locked.hbs);
+- kfree(ctx->io_bl);
+ xa_destroy(&ctx->io_bl_xa);
+ kfree(ctx);
+ return NULL;
+@@ -2934,7 +2934,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
+ io_wq_put_hash(ctx->hash_map);
+ kfree(ctx->cancel_table.hbs);
+ kfree(ctx->cancel_table_locked.hbs);
+- kfree(ctx->io_bl);
+ xa_destroy(&ctx->io_bl_xa);
+ kfree(ctx);
+ }
+@@ -3182,7 +3181,7 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+ * noise and overhead, there's no discernable change in runtime
+ * over using system_wq.
+ */
+- queue_work(system_unbound_wq, &ctx->exit_work);
++ queue_work(iou_wq, &ctx->exit_work);
+ }
+
+ static int io_uring_release(struct inode *inode, struct file *file)
+@@ -3430,14 +3429,15 @@ static void *io_uring_validate_mmap_request(struct file *file,
+ ptr = ctx->sq_sqes;
+ break;
+ case IORING_OFF_PBUF_RING: {
++ struct io_buffer_list *bl;
+ unsigned int bgid;
+
+ bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
+- rcu_read_lock();
+- ptr = io_pbuf_get_address(ctx, bgid);
+- rcu_read_unlock();
+- if (!ptr)
+- return ERR_PTR(-EINVAL);
++ bl = io_pbuf_get_bl(ctx, bgid);
++ if (IS_ERR(bl))
++ return bl;
++ ptr = bl->buf_ring;
++ io_put_bl(ctx, bl);
+ break;
+ }
+ default:
+@@ -4666,6 +4666,8 @@ static int __init io_uring_init(void)
+ offsetof(struct io_kiocb, cmd.data),
+ sizeof_field(struct io_kiocb, cmd.data), NULL);
+
++ iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
++
+ #ifdef CONFIG_SYSCTL
+ register_sysctl_init("kernel", kernel_io_uring_disabled_table);
+ #endif
+diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
+index e8516f3bbbaaa..26a00920042c4 100644
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -17,8 +17,6 @@
+
+ #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
+
+-#define BGID_ARRAY 64
+-
+ /* BIDs are addressed by a 16-bit field in a CQE */
+ #define MAX_BIDS_PER_BGID (1 << 16)
+
+@@ -31,13 +29,9 @@ struct io_provide_buf {
+ __u16 bid;
+ };
+
+-static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
+- struct io_buffer_list *bl,
+- unsigned int bgid)
++static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
++ unsigned int bgid)
+ {
+- if (bl && bgid < BGID_ARRAY)
+- return &bl[bgid];
+-
+ return xa_load(&ctx->io_bl_xa, bgid);
+ }
+
+@@ -53,7 +47,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
+ {
+ lockdep_assert_held(&ctx->uring_lock);
+
+- return __io_buffer_get_list(ctx, ctx->io_bl, bgid);
++ return __io_buffer_get_list(ctx, bgid);
+ }
+
+ static int io_buffer_add_list(struct io_ring_ctx *ctx,
+@@ -65,11 +59,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
+ * always under the ->uring_lock, but the RCU lookup from mmap does.
+ */
+ bl->bgid = bgid;
+- smp_store_release(&bl->is_ready, 1);
+-
+- if (bgid < BGID_ARRAY)
+- return 0;
+-
++ atomic_set(&bl->refs, 1);
+ return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
+ }
+
+@@ -215,24 +205,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+ return ret;
+ }
+
+-static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
+-{
+- struct io_buffer_list *bl;
+- int i;
+-
+- bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL);
+- if (!bl)
+- return -ENOMEM;
+-
+- for (i = 0; i < BGID_ARRAY; i++) {
+- INIT_LIST_HEAD(&bl[i].buf_list);
+- bl[i].bgid = i;
+- }
+-
+- smp_store_release(&ctx->io_bl, bl);
+- return 0;
+-}
+-
+ /*
+ * Mark the given mapped range as free for reuse
+ */
+@@ -301,22 +273,22 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
+ return i;
+ }
+
++void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
++{
++ if (atomic_dec_and_test(&bl->refs)) {
++ __io_remove_buffers(ctx, bl, -1U);
++ kfree_rcu(bl, rcu);
++ }
++}
++
+ void io_destroy_buffers(struct io_ring_ctx *ctx)
+ {
+ struct io_buffer_list *bl;
+ unsigned long index;
+- int i;
+-
+- for (i = 0; i < BGID_ARRAY; i++) {
+- if (!ctx->io_bl)
+- break;
+- __io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
+- }
+
+ xa_for_each(&ctx->io_bl_xa, index, bl) {
+ xa_erase(&ctx->io_bl_xa, bl->bgid);
+- __io_remove_buffers(ctx, bl, -1U);
+- kfree_rcu(bl, rcu);
++ io_put_bl(ctx, bl);
+ }
+
+ while (!list_empty(&ctx->io_buffers_pages)) {
+@@ -485,12 +457,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
+
+ io_ring_submit_lock(ctx, issue_flags);
+
+- if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
+- ret = io_init_bl_list(ctx);
+- if (ret)
+- goto err;
+- }
+-
+ bl = io_buffer_get_list(ctx, p->bgid);
+ if (unlikely(!bl)) {
+ bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
+@@ -503,14 +469,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
+ if (ret) {
+ /*
+ * Doesn't need rcu free as it was never visible, but
+- * let's keep it consistent throughout. Also can't
+- * be a lower indexed array group, as adding one
+- * where lookup failed cannot happen.
++ * let's keep it consistent throughout.
+ */
+- if (p->bgid >= BGID_ARRAY)
+- kfree_rcu(bl, rcu);
+- else
+- WARN_ON_ONCE(1);
++ kfree_rcu(bl, rcu);
+ goto err;
+ }
+ }
+@@ -675,12 +636,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+ if (reg.ring_entries >= 65536)
+ return -EINVAL;
+
+- if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
+- int ret = io_init_bl_list(ctx);
+- if (ret)
+- return ret;
+- }
+-
+ bl = io_buffer_get_list(ctx, reg.bgid);
+ if (bl) {
+ /* if mapped buffer ring OR classic exists, don't allow */
+@@ -729,31 +684,40 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+ if (!bl->is_mapped)
+ return -EINVAL;
+
+- __io_remove_buffers(ctx, bl, -1U);
+- if (bl->bgid >= BGID_ARRAY) {
+- xa_erase(&ctx->io_bl_xa, bl->bgid);
+- kfree_rcu(bl, rcu);
+- }
++ xa_erase(&ctx->io_bl_xa, bl->bgid);
++ io_put_bl(ctx, bl);
+ return 0;
+ }
+
+-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
++struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
++ unsigned long bgid)
+ {
+ struct io_buffer_list *bl;
++ bool ret;
+
+- bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid);
+-
+- if (!bl || !bl->is_mmap)
+- return NULL;
+ /*
+- * Ensure the list is fully setup. Only strictly needed for RCU lookup
+- * via mmap, and in that case only for the array indexed groups. For
+- * the xarray lookups, it's either visible and ready, or not at all.
++ * We have to be a bit careful here - we're inside mmap and cannot grab
++ * the uring_lock. This means the buffer_list could be simultaneously
++ * going away, if someone is trying to be sneaky. Look it up under rcu
++ * so we know it's not going away, and attempt to grab a reference to
++ * it. If the ref is already zero, then fail the mapping. If successful,
++ * the caller will call io_put_bl() to drop the the reference at at the
++ * end. This may then safely free the buffer_list (and drop the pages)
++ * at that point, vm_insert_pages() would've already grabbed the
++ * necessary vma references.
+ */
+- if (!smp_load_acquire(&bl->is_ready))
+- return NULL;
+-
+- return bl->buf_ring;
++ rcu_read_lock();
++ bl = xa_load(&ctx->io_bl_xa, bgid);
++ /* must be a mmap'able buffer ring and have pages */
++ ret = false;
++ if (bl && bl->is_mmap)
++ ret = atomic_inc_not_zero(&bl->refs);
++ rcu_read_unlock();
++
++ if (ret)
++ return bl;
++
++ return ERR_PTR(-EINVAL);
+ }
+
+ /*
+diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
+index 3d0cb6b8c1ed2..8d7929369501d 100644
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -25,12 +25,12 @@ struct io_buffer_list {
+ __u16 head;
+ __u16 mask;
+
++ atomic_t refs;
++
+ /* ring mapped provided buffers */
+ __u8 is_mapped;
+ /* ring mapped provided buffers, but mmap'ed by application */
+ __u8 is_mmap;
+- /* bl is visible from an RCU point of view for lookup */
+- __u8 is_ready;
+ };
+
+ struct io_buffer {
+@@ -60,7 +60,9 @@ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
+
+ void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+
+-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid);
++void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
++struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
++ unsigned long bgid);
+
+ static inline void io_kbuf_recycle_ring(struct io_kiocb *req)
+ {
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index 4b7d186c7622d..4902a7487f076 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -2866,17 +2866,46 @@ void bpf_link_inc(struct bpf_link *link)
+ atomic64_inc(&link->refcnt);
+ }
+
++static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
++{
++ struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
++
++ /* free bpf_link and its containing memory */
++ link->ops->dealloc_deferred(link);
++}
++
++static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
++{
++ if (rcu_trace_implies_rcu_gp())
++ bpf_link_defer_dealloc_rcu_gp(rcu);
++ else
++ call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
++}
++
+ /* bpf_link_free is guaranteed to be called from process context */
+ static void bpf_link_free(struct bpf_link *link)
+ {
++ bool sleepable = false;
++
+ bpf_link_free_id(link->id);
+ if (link->prog) {
++ sleepable = link->prog->aux->sleepable;
+ /* detach BPF program, clean up used resources */
+ link->ops->release(link);
+ bpf_prog_put(link->prog);
+ }
+- /* free bpf_link and its containing memory */
+- link->ops->dealloc(link);
++ if (link->ops->dealloc_deferred) {
++ /* schedule BPF link deallocation; if underlying BPF program
++ * is sleepable, we need to first wait for RCU tasks trace
++ * sync, then go through "classic" RCU grace period
++ */
++ if (sleepable)
++ call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
++ else
++ call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
++ }
++ if (link->ops->dealloc)
++ link->ops->dealloc(link);
+ }
+
+ static void bpf_link_put_deferred(struct work_struct *work)
+@@ -3381,7 +3410,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
+
+ static const struct bpf_link_ops bpf_raw_tp_link_lops = {
+ .release = bpf_raw_tp_link_release,
+- .dealloc = bpf_raw_tp_link_dealloc,
++ .dealloc_deferred = bpf_raw_tp_link_dealloc,
+ .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
+ .fill_link_info = bpf_raw_tp_link_fill_link_info,
+ };
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 396c4c66932f2..c9fc734989c68 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -6637,6 +6637,11 @@ static int check_stack_access_within_bounds(
+ err = check_stack_slot_within_bounds(env, min_off, state, type);
+ if (!err && max_off > 0)
+ err = -EINVAL; /* out of stack access into non-negative offsets */
++ if (!err && access_size < 0)
++ /* access_size should not be negative (or overflow an int); others checks
++ * along the way should have prevented such an access.
++ */
++ err = -EFAULT; /* invalid negative access size; integer overflow? */
+
+ if (err) {
+ if (tnum_is_const(reg->var_off)) {
+diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
+index 1d76f3b014aee..1e79084a9d9d2 100644
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -2639,7 +2639,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
+
+ static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
+ .release = bpf_kprobe_multi_link_release,
+- .dealloc = bpf_kprobe_multi_link_dealloc,
++ .dealloc_deferred = bpf_kprobe_multi_link_dealloc,
+ .fill_link_info = bpf_kprobe_multi_link_fill_link_info,
+ };
+
+@@ -3065,6 +3065,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+ bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
++ if (umulti_link->task)
++ put_task_struct(umulti_link->task);
++ path_put(&umulti_link->path);
+ }
+
+ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
+@@ -3072,16 +3075,13 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
+ struct bpf_uprobe_multi_link *umulti_link;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+- if (umulti_link->task)
+- put_task_struct(umulti_link->task);
+- path_put(&umulti_link->path);
+ kvfree(umulti_link->uprobes);
+ kfree(umulti_link);
+ }
+
+ static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
+ .release = bpf_uprobe_multi_link_release,
+- .dealloc = bpf_uprobe_multi_link_dealloc,
++ .dealloc_deferred = bpf_uprobe_multi_link_dealloc,
+ };
+
+ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
+diff --git a/mm/memory.c b/mm/memory.c
+index 78e05d3e9e4ac..e44d4d887cf6d 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -5674,6 +5674,10 @@ int follow_phys(struct vm_area_struct *vma,
+ goto out;
+ pte = ptep_get(ptep);
+
++ /* Never return PFNs of anon folios in COW mappings. */
++ if (vm_normal_folio(vma, address, pte))
++ goto unlock;
++
+ if ((flags & FOLL_WRITE) && !pte_write(pte))
+ goto unlock;
+
+diff --git a/net/9p/client.c b/net/9p/client.c
+index e265a0ca6bddd..f7e90b4769bba 100644
+--- a/net/9p/client.c
++++ b/net/9p/client.c
+@@ -1583,7 +1583,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
+ received = rsize;
+ }
+
+- p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
++ p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received);
+
+ if (non_zc) {
+ int n = copy_to_iter(dataptr, received, to);
+@@ -1609,9 +1609,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+ int total = 0;
+ *err = 0;
+
+- p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
+- fid->fid, offset, iov_iter_count(from));
+-
+ while (iov_iter_count(from)) {
+ int count = iov_iter_count(from);
+ int rsize = fid->iounit;
+@@ -1623,6 +1620,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+ if (count < rsize)
+ rsize = count;
+
++ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n",
++ fid->fid, offset, rsize, count);
++
+ /* Don't bother zerocopy for small IO (< 1024) */
+ if (clnt->trans_mod->zc_request && rsize > 1024) {
+ req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0,
+@@ -1650,7 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+ written = rsize;
+ }
+
+- p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
++ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written);
+
+ p9_req_put(clnt, req);
+ iov_iter_revert(from, count - written - iov_iter_count(from));
+diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
+index c5462486dbca1..282ec581c0720 100644
+--- a/net/ax25/ax25_dev.c
++++ b/net/ax25/ax25_dev.c
+@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
+ spin_lock_bh(&ax25_dev_lock);
+
+ #ifdef CONFIG_AX25_DAMA_SLAVE
+- ax25_ds_del_timer(ax25_dev);
++ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
+ #endif
+
+ /*
+diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
+index 233453807b509..ce3ff2fa72e58 100644
+--- a/net/bluetooth/hci_debugfs.c
++++ b/net/bluetooth/hci_debugfs.c
+@@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val)
+ {
+ struct hci_dev *hdev = data;
+
+- if (val == 0 || val > hdev->conn_info_max_age)
++ hci_dev_lock(hdev);
++ if (val == 0 || val > hdev->conn_info_max_age) {
++ hci_dev_unlock(hdev);
+ return -EINVAL;
++ }
+
+- hci_dev_lock(hdev);
+ hdev->conn_info_min_age = val;
+ hci_dev_unlock(hdev);
+
+@@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val)
+ {
+ struct hci_dev *hdev = data;
+
+- if (val == 0 || val < hdev->conn_info_min_age)
++ hci_dev_lock(hdev);
++ if (val == 0 || val < hdev->conn_info_min_age) {
++ hci_dev_unlock(hdev);
+ return -EINVAL;
++ }
+
+- hci_dev_lock(hdev);
+ hdev->conn_info_max_age = val;
+ hci_dev_unlock(hdev);
+
+@@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val)
+ {
+ struct hci_dev *hdev = data;
+
+- if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
++ hci_dev_lock(hdev);
++ if (val == 0 || val % 2 || val > hdev->sniff_max_interval) {
++ hci_dev_unlock(hdev);
+ return -EINVAL;
++ }
+
+- hci_dev_lock(hdev);
+ hdev->sniff_min_interval = val;
+ hci_dev_unlock(hdev);
+
+@@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val)
+ {
+ struct hci_dev *hdev = data;
+
+- if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
++ hci_dev_lock(hdev);
++ if (val == 0 || val % 2 || val < hdev->sniff_min_interval) {
++ hci_dev_unlock(hdev);
+ return -EINVAL;
++ }
+
+- hci_dev_lock(hdev);
+ hdev->sniff_max_interval = val;
+ hci_dev_unlock(hdev);
+
+@@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val)
+ {
+ struct hci_dev *hdev = data;
+
+- if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
++ hci_dev_lock(hdev);
++ if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) {
++ hci_dev_unlock(hdev);
+ return -EINVAL;
++ }
+
+- hci_dev_lock(hdev);
+ hdev->le_conn_min_interval = val;
+ hci_dev_unlock(hdev);
+
+@@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val)
+ {
+ struct hci_dev *hdev = data;
+
+- if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
++ hci_dev_lock(hdev);
++ if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) {
++ hci_dev_unlock(hdev);
+ return -EINVAL;
++ }
+
+- hci_dev_lock(hdev);
+ hdev->le_conn_max_interval = val;
+ hci_dev_unlock(hdev);
+
+@@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val)
+ {
+ struct hci_dev *hdev = data;
+
+- if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
++ hci_dev_lock(hdev);
++ if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) {
++ hci_dev_unlock(hdev);
+ return -EINVAL;
++ }
+
+- hci_dev_lock(hdev);
+ hdev->le_adv_min_interval = val;
+ hci_dev_unlock(hdev);
+
+@@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val)
+ {
+ struct hci_dev *hdev = data;
+
+- if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
++ hci_dev_lock(hdev);
++ if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) {
++ hci_dev_unlock(hdev);
+ return -EINVAL;
++ }
+
+- hci_dev_lock(hdev);
+ hdev->le_adv_max_interval = val;
+ hci_dev_unlock(hdev);
+
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 2bb8ab9302a97..bb0e5902a3e60 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -3219,6 +3219,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
+ if (test_bit(HCI_ENCRYPT, &hdev->flags))
+ set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+
++ /* "Link key request" completed ahead of "connect request" completes */
++ if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
++ ev->link_type == ACL_LINK) {
++ struct link_key *key;
++ struct hci_cp_read_enc_key_size cp;
++
++ key = hci_find_link_key(hdev, &ev->bdaddr);
++ if (key) {
++ set_bit(HCI_CONN_ENCRYPT, &conn->flags);
++
++ if (!(hdev->commands[20] & 0x10)) {
++ conn->enc_key_size = HCI_LINK_KEY_SIZE;
++ } else {
++ cp.handle = cpu_to_le16(conn->handle);
++ if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
++ sizeof(cp), &cp)) {
++ bt_dev_err(hdev, "sending read key size failed");
++ conn->enc_key_size = HCI_LINK_KEY_SIZE;
++ }
++ }
++
++ hci_encrypt_cfm(conn, ev->status);
++ }
++ }
++
+ /* Get remote features */
+ if (conn->type == ACL_LINK) {
+ struct hci_cp_read_remote_features cp;
+diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
+index 9b241eabca3e8..d6c0633bfe5bf 100644
+--- a/net/bluetooth/hci_sync.c
++++ b/net/bluetooth/hci_sync.c
+@@ -3292,7 +3292,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
+ if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
+ return;
+
+- bacpy(&hdev->public_addr, &ba);
++ if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
++ baswap(&hdev->public_addr, &ba);
++ else
++ bacpy(&hdev->public_addr, &ba);
+ }
+
+ struct hci_init_stage {
+diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
+index aa23479b20b2a..ed62c1026fe93 100644
+--- a/net/bridge/netfilter/ebtables.c
++++ b/net/bridge/netfilter/ebtables.c
+@@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ struct ebt_table_info *newinfo;
+ struct ebt_replace tmp;
+
++ if (len < sizeof(tmp))
++ return -EINVAL;
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+@@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len)
+ {
+ struct ebt_replace hlp;
+
++ if (len < sizeof(hlp))
++ return -EINVAL;
+ if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
+ return -EFAULT;
+
+@@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg,
+ {
+ struct compat_ebt_replace hlp;
+
++ if (len < sizeof(hlp))
++ return -EINVAL;
+ if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
+ return -EFAULT;
+
+diff --git a/net/core/gro.c b/net/core/gro.c
+index 0759277dc14ee..cefddf65f7db0 100644
+--- a/net/core/gro.c
++++ b/net/core/gro.c
+@@ -195,8 +195,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
+ }
+
+ merge:
+- /* sk owenrship - if any - completely transferred to the aggregated packet */
++ /* sk ownership - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
++ skb->sk = NULL;
+ delta_truesize = skb->truesize;
+ if (offset > headlen) {
+ unsigned int eat = offset - headlen;
+diff --git a/net/core/sock_map.c b/net/core/sock_map.c
+index 27d733c0f65e1..8598466a38057 100644
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
+ struct sock *sk;
+ int err = 0;
+
++ if (irqs_disabled())
++ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
++
+ spin_lock_bh(&stab->lock);
+ sk = *psk;
+ if (!sk_test || sk_test == sk)
+@@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
+ struct bpf_shtab_elem *elem;
+ int ret = -ENOENT;
+
++ if (irqs_disabled())
++ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
++
+ hash = sock_hash_bucket_hash(key, key_size);
+ bucket = sock_hash_select_bucket(htab, hash);
+
+diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
+index e5742f2a2d522..1b6457f357bdb 100644
+--- a/net/hsr/hsr_slave.c
++++ b/net/hsr/hsr_slave.c
+@@ -220,7 +220,8 @@ void hsr_del_port(struct hsr_port *port)
+ netdev_update_features(master->dev);
+ dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
+ netdev_rx_handler_unregister(port->dev);
+- dev_set_promiscuity(port->dev, -1);
++ if (!port->hsr->fwd_offloaded)
++ dev_set_promiscuity(port->dev, -1);
+ netdev_upper_dev_unlink(port->dev, master->dev);
+ }
+
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 762817d6c8d70..a018981b45142 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -289,6 +289,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
+ struct sock_reuseport *reuseport_cb;
+ struct inet_bind_hashbucket *head2;
+ struct inet_bind2_bucket *tb2;
++ bool conflict = false;
+ bool reuseport_cb_ok;
+
+ rcu_read_lock();
+@@ -301,18 +302,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
+
+ spin_lock(&head2->lock);
+
+- inet_bind_bucket_for_each(tb2, &head2->chain)
+- if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+- break;
++ inet_bind_bucket_for_each(tb2, &head2->chain) {
++ if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
++ continue;
+
+- if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
+- reuseport_ok)) {
+- spin_unlock(&head2->lock);
+- return true;
++ if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok))
++ continue;
++
++ conflict = true;
++ break;
+ }
+
+ spin_unlock(&head2->lock);
+- return false;
++
++ return conflict;
+ }
+
+ /*
+@@ -774,6 +777,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
+ }
+ EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+
++void inet_csk_clear_xmit_timers_sync(struct sock *sk)
++{
++ struct inet_connection_sock *icsk = inet_csk(sk);
++
++ /* ongoing timer handlers need to acquire socket lock. */
++ sock_not_owned_by_me(sk);
++
++ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
++
++ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
++ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
++ sk_stop_timer_sync(sk, &sk->sk_timer);
++}
++
+ void inet_csk_delete_keepalive_timer(struct sock *sk)
+ {
+ sk_stop_timer(sk, &sk->sk_timer);
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index 7072fc0783ef5..c88c9034d6300 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -24,6 +24,8 @@
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+
++#include "../core/sock_destructor.h"
++
+ /* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+@@ -39,6 +41,7 @@ struct ipfrag_skb_cb {
+ };
+ struct sk_buff *next_frag;
+ int frag_run_len;
++ int ip_defrag_offset;
+ };
+
+ #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+@@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+ */
+ if (!last)
+ fragrun_create(q, skb); /* First fragment. */
+- else if (last->ip_defrag_offset + last->len < end) {
++ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) {
+ /* This is the common case: skb goes to the end. */
+ /* Detect and discard overlaps. */
+- if (offset < last->ip_defrag_offset + last->len)
++ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len)
+ return IPFRAG_OVERLAP;
+- if (offset == last->ip_defrag_offset + last->len)
++ if (offset == FRAG_CB(last)->ip_defrag_offset + last->len)
+ fragrun_append_to_last(q, skb);
+ else
+ fragrun_create(q, skb);
+@@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+
+ parent = *rbn;
+ curr = rb_to_skb(parent);
+- curr_run_end = curr->ip_defrag_offset +
++ curr_run_end = FRAG_CB(curr)->ip_defrag_offset +
+ FRAG_CB(curr)->frag_run_len;
+- if (end <= curr->ip_defrag_offset)
++ if (end <= FRAG_CB(curr)->ip_defrag_offset)
+ rbn = &parent->rb_left;
+ else if (offset >= curr_run_end)
+ rbn = &parent->rb_right;
+- else if (offset >= curr->ip_defrag_offset &&
++ else if (offset >= FRAG_CB(curr)->ip_defrag_offset &&
+ end <= curr_run_end)
+ return IPFRAG_DUP;
+ else
+@@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+ }
+
+- skb->ip_defrag_offset = offset;
++ FRAG_CB(skb)->ip_defrag_offset = offset;
+
+ return IPFRAG_OK;
+ }
+@@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ struct sk_buff *parent)
+ {
+ struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
+- struct sk_buff **nextp;
++ void (*destructor)(struct sk_buff *);
++ unsigned int orig_truesize = 0;
++ struct sk_buff **nextp = NULL;
++ struct sock *sk = skb->sk;
+ int delta;
+
++ if (sk && is_skb_wmem(skb)) {
++ /* TX: skb->sk might have been passed as argument to
++ * dst->output and must remain valid until tx completes.
++ *
++ * Move sk to reassembled skb and fix up wmem accounting.
++ */
++ orig_truesize = skb->truesize;
++ destructor = skb->destructor;
++ }
++
+ if (head != skb) {
+ fp = skb_clone(skb, GFP_ATOMIC);
+- if (!fp)
+- return NULL;
++ if (!fp) {
++ head = skb;
++ goto out_restore_sk;
++ }
+ FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+ if (RB_EMPTY_NODE(&skb->rbnode))
+ FRAG_CB(parent)->next_frag = fp;
+@@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ &q->rb_fragments);
+ if (q->fragments_tail == skb)
+ q->fragments_tail = fp;
++
++ if (orig_truesize) {
++ /* prevent skb_morph from releasing sk */
++ skb->sk = NULL;
++ skb->destructor = NULL;
++ }
+ skb_morph(skb, head);
+ FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+ rb_replace_node(&head->rbnode, &skb->rbnode,
+@@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ consume_skb(head);
+ head = skb;
+ }
+- WARN_ON(head->ip_defrag_offset != 0);
++ WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0);
+
+ delta = -head->truesize;
+
+ /* Head of list must not be cloned. */
+ if (skb_unclone(head, GFP_ATOMIC))
+- return NULL;
++ goto out_restore_sk;
+
+ delta += head->truesize;
+ if (delta)
+@@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+
+ clone = alloc_skb(0, GFP_ATOMIC);
+ if (!clone)
+- return NULL;
++ goto out_restore_sk;
+ skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ skb_frag_list_init(head);
+ for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+@@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ nextp = &skb_shinfo(head)->frag_list;
+ }
+
++out_restore_sk:
++ if (orig_truesize) {
++ int ts_delta = head->truesize - orig_truesize;
++
++ /* if this reassembled skb is fragmented later,
++ * fraglist skbs will get skb->sk assigned from head->sk,
++ * and each frag skb will be released via sock_wfree.
++ *
++ * Update sk_wmem_alloc.
++ */
++ head->sk = sk;
++ head->destructor = destructor;
++ refcount_add(ts_delta, &sk->sk_wmem_alloc);
++ }
++
+ return nextp;
+ }
+ EXPORT_SYMBOL(inet_frag_reasm_prepare);
+@@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare);
+ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+ void *reasm_data, bool try_coalesce)
+ {
++ struct sock *sk = is_skb_wmem(head) ? head->sk : NULL;
++ const unsigned int head_truesize = head->truesize;
+ struct sk_buff **nextp = reasm_data;
+ struct rb_node *rbn;
+ struct sk_buff *fp;
+@@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+ head->prev = NULL;
+ head->tstamp = q->stamp;
+ head->mono_delivery_time = q->mono_delivery_time;
++
++ if (sk)
++ refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
+ }
+ EXPORT_SYMBOL(inet_frag_reasm_finish);
+
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index a4941f53b5237..fb947d1613fe2 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ }
+
+ skb_dst_drop(skb);
++ skb_orphan(skb);
+ return -EINPROGRESS;
+
+ insert_error:
+@@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
+ struct ipq *qp;
+
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
+- skb_orphan(skb);
+
+ /* Lookup (or create) queue header */
+ qp = ip_find(net, ip_hdr(skb), user, vif);
+diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
+index 5169c3c72cffe..f21a1a5403723 100644
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ tpi->flags | TUNNEL_NO_KEY,
+ iph->saddr, iph->daddr, 0);
+ } else {
++ if (unlikely(!pskb_may_pull(skb,
++ gre_hdr_len + sizeof(*ershdr))))
++ return PACKET_REJECT;
++
+ ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+ ver = ershdr->ver;
++ iph = ip_hdr(skb);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+ tpi->flags | TUNNEL_KEY,
+ iph->saddr, iph->daddr, tpi->key);
+diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
+index 2407066b0fec1..b150c9929b12e 100644
+--- a/net/ipv4/netfilter/arp_tables.c
++++ b/net/ipv4/netfilter/arp_tables.c
+@@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ void *loc_cpu_entry;
+ struct arpt_entry *iter;
+
++ if (len < sizeof(tmp))
++ return -EINVAL;
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+@@ -1254,6 +1256,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ void *loc_cpu_entry;
+ struct arpt_entry *iter;
+
++ if (len < sizeof(tmp))
++ return -EINVAL;
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
+index 7da1df4997d05..4876707595781 100644
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -1108,6 +1108,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ void *loc_cpu_entry;
+ struct ipt_entry *iter;
+
++ if (len < sizeof(tmp))
++ return -EINVAL;
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+@@ -1492,6 +1494,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ void *loc_cpu_entry;
+ struct ipt_entry *iter;
+
++ if (len < sizeof(tmp))
++ return -EINVAL;
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 68bb8d6bcc113..f8df35f7352a5 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout)
+ lock_sock(sk);
+ __tcp_close(sk, timeout);
+ release_sock(sk);
++ if (!sk->sk_net_refcnt)
++ inet_csk_clear_xmit_timers_sync(sk);
+ sock_put(sk);
+ }
+ EXPORT_SYMBOL(tcp_close);
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 848072793fa98..70a9a4a48216e 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -584,6 +584,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk,
+ }
+
+ DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
++EXPORT_SYMBOL(udp_encap_needed_key);
++
++#if IS_ENABLED(CONFIG_IPV6)
++DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
++EXPORT_SYMBOL(udpv6_encap_needed_key);
++#endif
++
+ void udp_encap_enable(void)
+ {
+ static_branch_inc(&udp_encap_needed_key);
+diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
+index 6c95d28d0c4a7..c3d67423ae189 100644
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+
+- /* sk owenrship - if any - completely transferred to the aggregated packet */
++ /* sk ownership - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
++ skb->sk = NULL;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+@@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ unsigned int off = skb_gro_offset(skb);
+ int flush = 1;
+
+- /* we can do L4 aggregation only if the packet can't land in a tunnel
+- * otherwise we could corrupt the inner stream
++ /* We can do L4 aggregation only if the packet can't land in a tunnel
++ * otherwise we could corrupt the inner stream. Detecting such packets
++ * cannot be foolproof and the aggregation might still happen in some
++ * cases. Such packets should be caught in udp_unexpected_gso later.
+ */
+ NAPI_GRO_CB(skb)->is_flist = 0;
+ if (!sk || !udp_sk(sk)->gro_receive) {
++ /* If the packet was locally encapsulated in a UDP tunnel that
++ * wasn't detected above, do not GRO.
++ */
++ if (skb->encapsulation)
++ goto out;
++
+ if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+ NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
+
+@@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
+ skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+- skb->csum_level++;
+- } else {
+- skb->ip_summed = CHECKSUM_UNNECESSARY;
+- skb->csum_level = 0;
+- }
++ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
+index 4fc2cae0d116c..54294f6a8ec51 100644
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -645,19 +645,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+ if (!w) {
+ /* New dump:
+ *
+- * 1. hook callback destructor.
+- */
+- cb->args[3] = (long)cb->done;
+- cb->done = fib6_dump_done;
+-
+- /*
+- * 2. allocate and initialize walker.
++ * 1. allocate and initialize walker.
+ */
+ w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ if (!w)
+ return -ENOMEM;
+ w->func = fib6_dump_node;
+ cb->args[2] = (long)w;
++
++ /* 2. hook callback destructor.
++ */
++ cb->args[3] = (long)cb->done;
++ cb->done = fib6_dump_done;
++
+ }
+
+ arg.skb = skb;
+diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
+index 070d87abf7c02..26c3287beb29c 100644
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb,
+ struct ip6_tnl *tunnel;
+ u8 ver;
+
++ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
++ return PACKET_REJECT;
++
+ ipv6h = ipv6_hdr(skb);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ ver = ershdr->ver;
+diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
+index fd9f049d6d41e..636b360311c53 100644
+--- a/net/ipv6/netfilter/ip6_tables.c
++++ b/net/ipv6/netfilter/ip6_tables.c
+@@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ void *loc_cpu_entry;
+ struct ip6t_entry *iter;
+
++ if (len < sizeof(tmp))
++ return -EINVAL;
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+@@ -1501,6 +1503,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ void *loc_cpu_entry;
+ struct ip6t_entry *iter;
+
++ if (len < sizeof(tmp))
++ return -EINVAL;
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index b2dd48911c8d6..efbec7ee27d0a 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
+ }
+
+ skb_dst_drop(skb);
++ skb_orphan(skb);
+ return -EINPROGRESS;
+
+ insert_error:
+@@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
+ hdr = ipv6_hdr(skb);
+ fhdr = (struct frag_hdr *)skb_transport_header(skb);
+
+- skb_orphan(skb);
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
+ if (fq == NULL) {
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 438476a31313c..d31beb65db08f 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -450,7 +450,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ goto try_again;
+ }
+
+-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
++DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+ void udpv6_encap_enable(void)
+ {
+ static_branch_inc(&udpv6_encap_needed_key);
+diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
+index 6b95ba241ebe2..626d7b362dc7b 100644
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
+ skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+- skb->csum_level++;
+- } else {
+- skb->ip_summed = CHECKSUM_UNNECESSARY;
+- skb->csum_level = 0;
+- }
++ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index b54951ae07aa9..01ac690af7799 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
+ return READ_ONCE(msk->wnd_end);
+ }
+
+-static bool mptcp_is_tcpsk(struct sock *sk)
++static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
+ {
+- struct socket *sock = sk->sk_socket;
+-
+- if (unlikely(sk->sk_prot == &tcp_prot)) {
+- /* we are being invoked after mptcp_accept() has
+- * accepted a non-mp-capable flow: sk is a tcp_sk,
+- * not an mptcp one.
+- *
+- * Hand the socket over to tcp so all further socket ops
+- * bypass mptcp.
+- */
+- WRITE_ONCE(sock->ops, &inet_stream_ops);
+- return true;
+ #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+- } else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
+- WRITE_ONCE(sock->ops, &inet6_stream_ops);
+- return true;
++ if (sk->sk_prot == &tcpv6_prot)
++ return &inet6_stream_ops;
+ #endif
+- }
+-
+- return false;
++ WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
++ return &inet_stream_ops;
+ }
+
+ static int __mptcp_socket_create(struct mptcp_sock *msk)
+@@ -3328,44 +3314,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
+ msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
+ }
+
+-static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
+- bool kern)
+-{
+- struct sock *newsk;
+-
+- pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+- newsk = inet_csk_accept(ssk, flags, err, kern);
+- if (!newsk)
+- return NULL;
+-
+- pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
+- if (sk_is_mptcp(newsk)) {
+- struct mptcp_subflow_context *subflow;
+- struct sock *new_mptcp_sock;
+-
+- subflow = mptcp_subflow_ctx(newsk);
+- new_mptcp_sock = subflow->conn;
+-
+- /* is_mptcp should be false if subflow->conn is missing, see
+- * subflow_syn_recv_sock()
+- */
+- if (WARN_ON_ONCE(!new_mptcp_sock)) {
+- tcp_sk(newsk)->is_mptcp = 0;
+- goto out;
+- }
+-
+- newsk = new_mptcp_sock;
+- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+- } else {
+- MPTCP_INC_STATS(sock_net(ssk),
+- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+- }
+-
+-out:
+- newsk->sk_kern_sock = kern;
+- return newsk;
+-}
+-
+ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
+ {
+ struct mptcp_subflow_context *subflow, *tmp;
+@@ -3802,7 +3750,6 @@ static struct proto mptcp_prot = {
+ .connect = mptcp_connect,
+ .disconnect = mptcp_disconnect,
+ .close = mptcp_close,
+- .accept = mptcp_accept,
+ .setsockopt = mptcp_setsockopt,
+ .getsockopt = mptcp_getsockopt,
+ .shutdown = mptcp_shutdown,
+@@ -3912,18 +3859,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
+ if (!ssk)
+ return -EINVAL;
+
+- newsk = mptcp_accept(ssk, flags, &err, kern);
++ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
++ newsk = inet_csk_accept(ssk, flags, &err, kern);
+ if (!newsk)
+ return err;
+
+- lock_sock(newsk);
+-
+- __inet_accept(sock, newsock, newsk);
+- if (!mptcp_is_tcpsk(newsock->sk)) {
+- struct mptcp_sock *msk = mptcp_sk(newsk);
++ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
++ if (sk_is_mptcp(newsk)) {
+ struct mptcp_subflow_context *subflow;
++ struct sock *new_mptcp_sock;
++
++ subflow = mptcp_subflow_ctx(newsk);
++ new_mptcp_sock = subflow->conn;
++
++ /* is_mptcp should be false if subflow->conn is missing, see
++ * subflow_syn_recv_sock()
++ */
++ if (WARN_ON_ONCE(!new_mptcp_sock)) {
++ tcp_sk(newsk)->is_mptcp = 0;
++ goto tcpfallback;
++ }
++
++ newsk = new_mptcp_sock;
++ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
++
++ newsk->sk_kern_sock = kern;
++ lock_sock(newsk);
++ __inet_accept(sock, newsock, newsk);
+
+ set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
++ msk = mptcp_sk(newsk);
+ msk->in_accept_queue = 0;
+
+ /* set ssk->sk_socket of accept()ed flows to mptcp socket.
+@@ -3945,6 +3910,19 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
+ if (unlikely(list_is_singular(&msk->conn_list)))
+ mptcp_set_state(newsk, TCP_CLOSE);
+ }
++ } else {
++tcpfallback:
++ newsk->sk_kern_sock = kern;
++ lock_sock(newsk);
++ __inet_accept(sock, newsock, newsk);
++ /* we are being invoked after accepting a non-mp-capable
++ * flow: sk is a tcp_sk, not an mptcp one.
++ *
++ * Hand the socket over to tcp so all further socket ops
++ * bypass mptcp.
++ */
++ WRITE_ONCE(newsock->sk->sk_socket->ops,
++ mptcp_fallback_tcp_ops(newsock->sk));
+ }
+ release_sock(newsk);
+
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index ab41700bee688..23ee96c6abcbf 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -905,6 +905,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
+ return child;
+
+ fallback:
++ if (fallback)
++ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+ mptcp_subflow_drop_ctx(child);
+ return child;
+ }
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index f10419ba6e0bd..2a4649df8f086 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -1200,6 +1200,26 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
+ #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \
+ __NFT_TABLE_F_WAS_AWAKEN)
+
++static bool nft_table_pending_update(const struct nft_ctx *ctx)
++{
++ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
++ struct nft_trans *trans;
++
++ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
++ return true;
++
++ list_for_each_entry(trans, &nft_net->commit_list, list) {
++ if (trans->ctx.table == ctx->table &&
++ ((trans->msg_type == NFT_MSG_NEWCHAIN &&
++ nft_trans_chain_update(trans)) ||
++ (trans->msg_type == NFT_MSG_DELCHAIN &&
++ nft_is_base_chain(trans->ctx.chain))))
++ return true;
++ }
++
++ return false;
++}
++
+ static int nf_tables_updtable(struct nft_ctx *ctx)
+ {
+ struct nft_trans *trans;
+@@ -1223,7 +1243,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
+ return -EOPNOTSUPP;
+
+ /* No dormant off/on/off/on games in single transaction */
+- if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
++ if (nft_table_pending_update(ctx))
+ return -EINVAL;
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+@@ -2420,6 +2440,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ struct nft_stats __percpu *stats = NULL;
+ struct nft_chain_hook hook = {};
+
++ if (table->flags & __NFT_TABLE_F_UPDATE)
++ return -EINVAL;
++
+ if (flags & NFT_CHAIN_BINDING)
+ return -EOPNOTSUPP;
+
+@@ -2621,6 +2644,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+ }
+ }
+
++ if (table->flags & __NFT_TABLE_F_UPDATE &&
++ !list_empty(&hook.list)) {
++ NL_SET_BAD_ATTR(extack, attr);
++ err = -EOPNOTSUPP;
++ goto err_hooks;
++ }
++
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ nft_is_base_chain(chain) &&
+ !list_empty(&hook.list)) {
+@@ -2850,6 +2880,9 @@ static int nft_delchain_hook(struct nft_ctx *ctx,
+ struct nft_trans *trans;
+ int err;
+
++ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
++ return -EOPNOTSUPP;
++
+ err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
+ ctx->family, chain->flags, extack);
+ if (err < 0)
+@@ -2934,7 +2967,8 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
+
+ if (nla[NFTA_CHAIN_HOOK]) {
+- if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
++ if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN ||
++ chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ return -EOPNOTSUPP;
+
+ if (nft_is_base_chain(chain)) {
+@@ -8134,11 +8168,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
+ return err;
+ }
+
++/* call under rcu_read_lock */
+ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
+ {
+ const struct nf_flowtable_type *type;
+
+- list_for_each_entry(type, &nf_tables_flowtables, list) {
++ list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
+ if (family == type->family)
+ return type;
+ }
+@@ -8150,9 +8185,13 @@ nft_flowtable_type_get(struct net *net, u8 family)
+ {
+ const struct nf_flowtable_type *type;
+
++ rcu_read_lock();
+ type = __nft_flowtable_type_get(family);
+- if (type != NULL && try_module_get(type->owner))
++ if (type != NULL && try_module_get(type->owner)) {
++ rcu_read_unlock();
+ return type;
++ }
++ rcu_read_unlock();
+
+ lockdep_nfnl_nft_mutex_not_held();
+ #ifdef CONFIG_MODULES
+@@ -10053,9 +10092,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ if (nft_trans_chain_update(trans)) {
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
+ &nft_trans_chain_hooks(trans));
+- nft_netdev_unregister_hooks(net,
+- &nft_trans_chain_hooks(trans),
+- true);
++ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
++ nft_netdev_unregister_hooks(net,
++ &nft_trans_chain_hooks(trans),
++ true);
++ }
+ } else {
+ nft_chain_del(trans->ctx.chain);
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
+@@ -10294,10 +10335,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ struct nft_trans *trans, *next;
+ LIST_HEAD(set_update_list);
+ struct nft_trans_elem *te;
++ int err = 0;
+
+ if (action == NFNL_ABORT_VALIDATE &&
+ nf_tables_validate(net) < 0)
+- return -EAGAIN;
++ err = -EAGAIN;
+
+ list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
+ list) {
+@@ -10327,9 +10369,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain_update(trans)) {
+- nft_netdev_unregister_hooks(net,
+- &nft_trans_chain_hooks(trans),
+- true);
++ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
++ nft_netdev_unregister_hooks(net,
++ &nft_trans_chain_hooks(trans),
++ true);
++ }
+ free_percpu(nft_trans_chain_stats(trans));
+ kfree(nft_trans_chain_name(trans));
+ nft_trans_destroy(trans);
+@@ -10483,12 +10527,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ nf_tables_abort_release(trans);
+ }
+
+- if (action == NFNL_ABORT_AUTOLOAD)
+- nf_tables_module_autoload(net);
+- else
+- nf_tables_module_autoload_cleanup(net);
+-
+- return 0;
++ return err;
+ }
+
+ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
+@@ -10501,6 +10540,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
+ gc_seq = nft_gc_seq_begin(nft_net);
+ ret = __nf_tables_abort(net, action);
+ nft_gc_seq_end(nft_net, gc_seq);
++
++ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
++
++ /* module autoload needs to happen after GC sequence update because it
++ * temporarily releases and grabs mutex again.
++ */
++ if (action == NFNL_ABORT_AUTOLOAD)
++ nf_tables_module_autoload(net);
++ else
++ nf_tables_module_autoload_cleanup(net);
++
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return ret;
+@@ -11301,9 +11351,10 @@ static void __net_exit nf_tables_exit_net(struct net *net)
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
+- if (!list_empty(&nft_net->commit_list) ||
+- !list_empty(&nft_net->module_list))
+- __nf_tables_abort(net, NFNL_ABORT_NONE);
++ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
++
++ if (!list_empty(&nft_net->module_list))
++ nf_tables_module_autoload_cleanup(net);
+
+ __nft_release_tables(net);
+
+@@ -11395,6 +11446,7 @@ static void __exit nf_tables_module_exit(void)
+ unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+ nft_chain_filter_fini();
+ nft_chain_route_fini();
++ nf_tables_trans_destroy_flush_work();
+ unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_gc_work);
+ cancel_work_sync(&trans_destroy_work);
+diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
+index 12684d835cb53..772ddb5824d9e 100644
+--- a/net/nfc/nci/core.c
++++ b/net/nfc/nci/core.c
+@@ -1516,6 +1516,11 @@ static void nci_rx_work(struct work_struct *work)
+ nfc_send_to_raw_sock(ndev->nfc_dev, skb,
+ RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
+
++ if (!nci_plen(skb->data)) {
++ kfree_skb(skb);
++ break;
++ }
++
+ /* Process frame */
+ switch (nci_mt(skb->data)) {
+ case NCI_MT_RSP_PKT:
+diff --git a/net/rds/rdma.c b/net/rds/rdma.c
+index a4e3c5de998be..00dbcd4d28e68 100644
+--- a/net/rds/rdma.c
++++ b/net/rds/rdma.c
+@@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
+ }
+ ret = PTR_ERR(trans_private);
+ /* Trigger connection so that its ready for the next retry */
+- if (ret == -ENODEV)
++ if (ret == -ENODEV && cp)
+ rds_conn_connect_if_down(cp->cp_conn);
+ goto out;
+ }
+diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
+index dffa990a9629f..e34f1be151645 100644
+--- a/net/sched/act_skbmod.c
++++ b/net/sched/act_skbmod.c
+@@ -241,13 +241,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
+ struct tcf_skbmod *d = to_skbmod(a);
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_skbmod_params *p;
+- struct tc_skbmod opt = {
+- .index = d->tcf_index,
+- .refcnt = refcount_read(&d->tcf_refcnt) - ref,
+- .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
+- };
++ struct tc_skbmod opt;
+ struct tcf_t t;
+
++ memset(&opt, 0, sizeof(opt));
++ opt.index = d->tcf_index;
++ opt.refcnt = refcount_read(&d->tcf_refcnt) - ref,
++ opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
+ spin_lock_bh(&d->tcf_lock);
+ opt.action = d->tcf_action;
+ p = rcu_dereference_protected(d->skbmod_p,
+diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
+index e9eaf637220e9..5f25a2595add5 100644
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -809,7 +809,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
+ notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+ !qdisc_is_offloaded);
+ /* TODO: perform the search on a per txq basis */
+- sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
++ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
+ if (sch == NULL) {
+ WARN_ON_ONCE(parentid != TC_H_ROOT);
+ break;
+diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
+index e0ce4276274be..933e12e3a55c7 100644
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -1216,15 +1216,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
+ * MSG_SPLICE_PAGES is used exclusively to reduce the number of
+ * copy operations in this path. Therefore the caller must ensure
+ * that the pages backing @xdr are unchanging.
+- *
+- * Note that the send is non-blocking. The caller has incremented
+- * the reference count on each page backing the RPC message, and
+- * the network layer will "put" these pages when transmission is
+- * complete.
+- *
+- * This is safe for our RPC services because the memory backing
+- * the head and tail components is never kmalloc'd. These always
+- * come from pages in the svc_rqst::rq_pages array.
+ */
+ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
+ rpc_fraghdr marker, unsigned int *sentp)
+@@ -1254,6 +1245,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ 1 + count, sizeof(marker) + rqstp->rq_res.len);
+ ret = sock_sendmsg(svsk->sk_sock, &msg);
++ page_frag_free(buf);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index acf5bb74fd386..df166f6afad82 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk,
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
+
+- psock = sk_psock_get(sk);
+ err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
+ if (err < 0)
+ return err;
++ psock = sk_psock_get(sk);
+ bpf_strp_enabled = sk_psock_strp_enabled(psock);
+
+ /* If crypto failed the connection is broken */
+@@ -2152,12 +2152,15 @@ int tls_sw_recvmsg(struct sock *sk,
+ }
+
+ /* Drain records from the rx_list & copy if required */
+- if (is_peek || is_kvec)
++ if (is_peek)
+ err = process_rx_list(ctx, msg, &control, copied + peeked,
+ decrypted - peeked, is_peek, NULL);
+ else
+ err = process_rx_list(ctx, msg, &control, 0,
+ async_copy_bytes, is_peek, NULL);
++
++ /* we could have copied less than we wanted, and possibly nothing */
++ decrypted += max(err, 0) - async_copy_bytes;
+ }
+
+ copied += decrypted;
+diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
+index a64bf601b480d..2925f5d27ad3f 100644
+--- a/net/vmw_vsock/virtio_transport.c
++++ b/net/vmw_vsock/virtio_transport.c
+@@ -109,7 +109,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
+ if (!skb)
+ break;
+
+- virtio_transport_deliver_tap_pkt(skb);
+ reply = virtio_vsock_skb_reply(skb);
+
+ sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
+@@ -128,6 +127,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
+ break;
+ }
+
++ virtio_transport_deliver_tap_pkt(skb);
++
+ if (reply) {
+ struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
+ int val;
+diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
+index 0669bac5e900e..3f899cc7e99a9 100755
+--- a/scripts/bpf_doc.py
++++ b/scripts/bpf_doc.py
+@@ -414,8 +414,8 @@ class PrinterRST(Printer):
+ version = version.stdout.decode().rstrip()
+ except:
+ try:
+- version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot,
+- capture_output=True, check=True)
++ version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'],
++ cwd=linuxRoot, capture_output=True, check=True)
+ version = version.stdout.decode().rstrip()
+ except:
+ return 'Linux'
+diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
+index c9e38ad937fd4..3c54125eb3733 100644
+--- a/scripts/mod/Makefile
++++ b/scripts/mod/Makefile
+@@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
+ hostprogs-always-y += modpost mk_elfconfig
+ always-y += empty.o
+
+-modpost-objs := modpost.o file2alias.o sumversion.o
++modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o
+
+ devicetable-offsets-file := devicetable-offsets.h
+
+@@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s
+
+ # dependencies on generated files need to be listed explicitly
+
+-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
++$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
+ $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
+
+ quiet_cmd_elfconfig = MKELF $@
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index 5191fdbd3fa23..7d53942445d75 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -22,7 +22,6 @@
+ #include <errno.h>
+ #include "modpost.h"
+ #include "../../include/linux/license.h"
+-#include "../../include/linux/module_symbol.h"
+
+ static bool module_enabled;
+ /* Are we using CONFIG_MODVERSIONS? */
+@@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
+ *p = TO_NATIVE(*p);
+ }
+
++ symsearch_init(info);
++
+ return 1;
+ }
+
+ static void parse_elf_finish(struct elf_info *info)
+ {
++ symsearch_finish(info);
+ release_file(info->hdr, info->size);
+ }
+
+@@ -1042,75 +1044,16 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
+ return 1;
+ }
+
+-/*
+- * If there's no name there, ignore it; likewise, ignore it if it's
+- * one of the magic symbols emitted used by current tools.
+- *
+- * Otherwise if find_symbols_between() returns those symbols, they'll
+- * fail the whitelist tests and cause lots of false alarms ... fixable
+- * only by merging __exit and __init sections into __text, bloating
+- * the kernel (which is especially evil on embedded platforms).
+- */
+-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
+-{
+- const char *name = elf->strtab + sym->st_name;
+-
+- if (!name || !strlen(name))
+- return 0;
+- return !is_mapping_symbol(name);
+-}
+-
+-/* Look up the nearest symbol based on the section and the address */
+-static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
+- unsigned int secndx, bool allow_negative,
+- Elf_Addr min_distance)
+-{
+- Elf_Sym *sym;
+- Elf_Sym *near = NULL;
+- Elf_Addr sym_addr, distance;
+- bool is_arm = (elf->hdr->e_machine == EM_ARM);
+-
+- for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+- if (get_secindex(elf, sym) != secndx)
+- continue;
+- if (!is_valid_name(elf, sym))
+- continue;
+-
+- sym_addr = sym->st_value;
+-
+- /*
+- * For ARM Thumb instruction, the bit 0 of st_value is set
+- * if the symbol is STT_FUNC type. Mask it to get the address.
+- */
+- if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
+- sym_addr &= ~1;
+-
+- if (addr >= sym_addr)
+- distance = addr - sym_addr;
+- else if (allow_negative)
+- distance = sym_addr - addr;
+- else
+- continue;
+-
+- if (distance <= min_distance) {
+- min_distance = distance;
+- near = sym;
+- }
+-
+- if (min_distance == 0)
+- break;
+- }
+- return near;
+-}
+-
+ static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
+ unsigned int secndx)
+ {
+- return find_nearest_sym(elf, addr, secndx, false, ~0);
++ return symsearch_find_nearest(elf, addr, secndx, false, ~0);
+ }
+
+ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
+ {
++ Elf_Sym *new_sym;
++
+ /* If the supplied symbol has a valid name, return it */
+ if (is_valid_name(elf, sym))
+ return sym;
+@@ -1119,7 +1062,9 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
+ * Strive to find a better symbol name, but the resulting name may not
+ * match the symbol referenced in the original code.
+ */
+- return find_nearest_sym(elf, addr, get_secindex(elf, sym), true, 20);
++ new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
++ true, 20);
++ return new_sym ? new_sym : sym;
+ }
+
+ static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
+diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
+index 5f94c2c9f2d95..6413f26fcb6b4 100644
+--- a/scripts/mod/modpost.h
++++ b/scripts/mod/modpost.h
+@@ -10,6 +10,7 @@
+ #include <fcntl.h>
+ #include <unistd.h>
+ #include <elf.h>
++#include "../../include/linux/module_symbol.h"
+
+ #include "list.h"
+ #include "elfconfig.h"
+@@ -128,6 +129,8 @@ struct elf_info {
+ * take shndx from symtab_shndx_start[N] instead */
+ Elf32_Word *symtab_shndx_start;
+ Elf32_Word *symtab_shndx_stop;
++
++ struct symsearch *symsearch;
+ };
+
+ /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
+@@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
+ return index;
+ }
+
++/*
++ * If there's no name there, ignore it; likewise, ignore it if it's
++ * one of the magic symbols emitted used by current tools.
++ *
++ * Internal symbols created by tools should be ignored by modpost.
++ */
++static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
++{
++ const char *name = elf->strtab + sym->st_name;
++
++ if (!name || !strlen(name))
++ return 0;
++ return !is_mapping_symbol(name);
++}
++
++/* symsearch.c */
++void symsearch_init(struct elf_info *elf);
++void symsearch_finish(struct elf_info *elf);
++Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
++ unsigned int secndx, bool allow_negative,
++ Elf_Addr min_distance);
++
+ /* file2alias.c */
+ void handle_moddevtable(struct module *mod, struct elf_info *info,
+ Elf_Sym *sym, const char *symname);
+diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
+new file mode 100644
+index 0000000000000..aa4ed51f9960c
+--- /dev/null
++++ b/scripts/mod/symsearch.c
+@@ -0,0 +1,199 @@
++// SPDX-License-Identifier: GPL-2.0
++
++/*
++ * Helper functions for finding the symbol in an ELF which is "nearest"
++ * to a given address.
++ */
++
++#include "modpost.h"
++
++struct syminfo {
++ unsigned int symbol_index;
++ unsigned int section_index;
++ Elf_Addr addr;
++};
++
++/*
++ * Container used to hold an entire binary search table.
++ * Entries in table are ascending, sorted first by section_index,
++ * then by addr, and last by symbol_index. The sorting by
++ * symbol_index is used to ensure predictable behavior when
++ * multiple symbols are present with the same address; all
++ * symbols past the first are effectively ignored, by eliding
++ * them in symsearch_fixup().
++ */
++struct symsearch {
++ unsigned int table_size;
++ struct syminfo table[];
++};
++
++static int syminfo_compare(const void *s1, const void *s2)
++{
++ const struct syminfo *sym1 = s1;
++ const struct syminfo *sym2 = s2;
++
++ if (sym1->section_index > sym2->section_index)
++ return 1;
++ if (sym1->section_index < sym2->section_index)
++ return -1;
++ if (sym1->addr > sym2->addr)
++ return 1;
++ if (sym1->addr < sym2->addr)
++ return -1;
++ if (sym1->symbol_index > sym2->symbol_index)
++ return 1;
++ if (sym1->symbol_index < sym2->symbol_index)
++ return -1;
++ return 0;
++}
++
++static unsigned int symbol_count(struct elf_info *elf)
++{
++ unsigned int result = 0;
++
++ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
++ if (is_valid_name(elf, sym))
++ result++;
++ }
++ return result;
++}
++
++/*
++ * Populate the search array that we just allocated.
++ * Be slightly paranoid here. The ELF file is mmap'd and could
++ * conceivably change between symbol_count() and symsearch_populate().
++ * If we notice any difference, bail out rather than potentially
++ * propagating errors or crashing.
++ */
++static void symsearch_populate(struct elf_info *elf,
++ struct syminfo *table,
++ unsigned int table_size)
++{
++ bool is_arm = (elf->hdr->e_machine == EM_ARM);
++
++ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
++ if (is_valid_name(elf, sym)) {
++ if (table_size-- == 0)
++ fatal("%s: size mismatch\n", __func__);
++ table->symbol_index = sym - elf->symtab_start;
++ table->section_index = get_secindex(elf, sym);
++ table->addr = sym->st_value;
++
++ /*
++ * For ARM Thumb instruction, the bit 0 of st_value is
++ * set if the symbol is STT_FUNC type. Mask it to get
++ * the address.
++ */
++ if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
++ table->addr &= ~1;
++
++ table++;
++ }
++ }
++
++ if (table_size != 0)
++ fatal("%s: size mismatch\n", __func__);
++}
++
++/*
++ * Do any fixups on the table after sorting.
++ * For now, this just finds adjacent entries which have
++ * the same section_index and addr, and it propagates
++ * the first symbol_index over the subsequent entries,
++ * so that only one symbol_index is seen for any given
++ * section_index and addr. This ensures that whether
++ * we're looking at an address from "above" or "below"
++ * that we see the same symbol_index.
++ * This does leave some duplicate entries in the table;
++ * in practice, these are a small fraction of the
++ * total number of entries, and they are harmless to
++ * the binary search algorithm other than a few occasional
++ * unnecessary comparisons.
++ */
++static void symsearch_fixup(struct syminfo *table, unsigned int table_size)
++{
++ /* Don't look at index 0, it will never change. */
++ for (unsigned int i = 1; i < table_size; i++) {
++ if (table[i].addr == table[i - 1].addr &&
++ table[i].section_index == table[i - 1].section_index) {
++ table[i].symbol_index = table[i - 1].symbol_index;
++ }
++ }
++}
++
++void symsearch_init(struct elf_info *elf)
++{
++ unsigned int table_size = symbol_count(elf);
++
++ elf->symsearch = NOFAIL(malloc(sizeof(struct symsearch) +
++ sizeof(struct syminfo) * table_size));
++ elf->symsearch->table_size = table_size;
++
++ symsearch_populate(elf, elf->symsearch->table, table_size);
++ qsort(elf->symsearch->table, table_size,
++ sizeof(struct syminfo), syminfo_compare);
++
++ symsearch_fixup(elf->symsearch->table, table_size);
++}
++
++void symsearch_finish(struct elf_info *elf)
++{
++ free(elf->symsearch);
++ elf->symsearch = NULL;
++}
++
++/*
++ * Find the syminfo which is in secndx and "nearest" to addr.
++ * allow_negative: allow returning a symbol whose address is > addr.
++ * min_distance: ignore symbols which are further away than this.
++ *
++ * Returns a pointer into the symbol table for success.
++ * Returns NULL if no legal symbol is found within the requested range.
++ */
++Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
++ unsigned int secndx, bool allow_negative,
++ Elf_Addr min_distance)
++{
++ unsigned int hi = elf->symsearch->table_size;
++ unsigned int lo = 0;
++ struct syminfo *table = elf->symsearch->table;
++ struct syminfo target;
++
++ target.addr = addr;
++ target.section_index = secndx;
++ target.symbol_index = ~0; /* compares greater than any actual index */
++ while (hi > lo) {
++ unsigned int mid = lo + (hi - lo) / 2; /* Avoids overflow */
++
++ if (syminfo_compare(&table[mid], &target) > 0)
++ hi = mid;
++ else
++ lo = mid + 1;
++ }
++
++ /*
++ * table[hi], if it exists, is the first entry in the array which
++ * lies beyond target. table[hi - 1], if it exists, is the last
++ * entry in the array which comes before target, including the
++ * case where it perfectly matches the section and the address.
++ *
++ * Note -- if the address we're looking up falls perfectly
++ * in the middle of two symbols, this is written to always
++ * prefer the symbol with the lower address.
++ */
++ Elf_Sym *result = NULL;
++
++ if (allow_negative &&
++ hi < elf->symsearch->table_size &&
++ table[hi].section_index == secndx &&
++ table[hi].addr - addr <= min_distance) {
++ min_distance = table[hi].addr - addr;
++ result = &elf->symtab_start[table[hi].symbol_index];
++ }
++ if (hi > 0 &&
++ table[hi - 1].section_index == secndx &&
++ addr - table[hi - 1].addr <= min_distance) {
++ result = &elf->symtab_start[table[hi - 1].symbol_index];
++ }
++ return result;
++}
+diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
+index 6fa640263216f..2c23a5a286086 100644
+--- a/security/selinux/selinuxfs.c
++++ b/security/selinux/selinuxfs.c
+@@ -2135,7 +2135,6 @@ static struct file_system_type sel_fs_type = {
+ .kill_sb = sel_kill_sb,
+ };
+
+-static struct vfsmount *selinuxfs_mount __ro_after_init;
+ struct path selinux_null __ro_after_init;
+
+ static int __init init_sel_fs(void)
+@@ -2157,18 +2156,21 @@ static int __init init_sel_fs(void)
+ return err;
+ }
+
+- selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type);
+- if (IS_ERR(selinuxfs_mount)) {
++ selinux_null.mnt = kern_mount(&sel_fs_type);
++ if (IS_ERR(selinux_null.mnt)) {
+ pr_err("selinuxfs: could not mount!\n");
+- err = PTR_ERR(selinuxfs_mount);
+- selinuxfs_mount = NULL;
++ err = PTR_ERR(selinux_null.mnt);
++ selinux_null.mnt = NULL;
++ return err;
+ }
++
+ selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root,
+ &null_name);
+ if (IS_ERR(selinux_null.dentry)) {
+ pr_err("selinuxfs: could not lookup null!\n");
+ err = PTR_ERR(selinux_null.dentry);
+ selinux_null.dentry = NULL;
++ return err;
+ }
+
+ return err;
+diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c
+index d36234b88fb42..941bfbf812ed3 100644
+--- a/sound/pci/emu10k1/emu10k1_callback.c
++++ b/sound/pci/emu10k1/emu10k1_callback.c
+@@ -255,7 +255,7 @@ lookup_voices(struct snd_emux *emu, struct snd_emu10k1 *hw,
+ /* check if sample is finished playing (non-looping only) */
+ if (bp != best + V_OFF && bp != best + V_FREE &&
+ (vp->reg.sample_mode & SNDRV_SFNT_SAMPLE_SINGLESHOT)) {
+- val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch) - 64;
++ val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch);
+ if (val >= vp->reg.loopstart)
+ bp = best + V_OFF;
+ }
+@@ -362,7 +362,7 @@ start_voice(struct snd_emux_voice *vp)
+
+ map = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0);
+
+- addr = vp->reg.start + 64;
++ addr = vp->reg.start;
+ temp = vp->reg.parm.filterQ;
+ ccca = (temp << 28) | addr;
+ if (vp->apitch < 0xe400)
+@@ -430,9 +430,6 @@ start_voice(struct snd_emux_voice *vp)
+ /* Q & current address (Q 4bit value, MSB) */
+ CCCA, ccca,
+
+- /* cache */
+- CCR, REG_VAL_PUT(CCR_CACHEINVALIDSIZE, 64),
+-
+ /* reset volume */
+ VTFT, vtarget | vp->ftarget,
+ CVCF, vtarget | CVCF_CURRENTFILTER_MASK,
+diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
+index 7adc1d373d65c..27848d6469636 100644
+--- a/sound/pci/hda/cs35l56_hda.c
++++ b/sound/pci/hda/cs35l56_hda.c
+@@ -978,14 +978,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int id)
+ pm_runtime_mark_last_busy(cs35l56->base.dev);
+ pm_runtime_enable(cs35l56->base.dev);
+
++ cs35l56->base.init_done = true;
++
+ ret = component_add(cs35l56->base.dev, &cs35l56_hda_comp_ops);
+ if (ret) {
+ dev_err(cs35l56->base.dev, "Register component failed: %d\n", ret);
+ goto pm_err;
+ }
+
+- cs35l56->base.init_done = true;
+-
+ return 0;
+
+ pm_err:
+diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c
+index 757a4d193e0fb..c31f60b0421e5 100644
+--- a/sound/pci/hda/cs35l56_hda_i2c.c
++++ b/sound/pci/hda/cs35l56_hda_i2c.c
+@@ -49,10 +49,19 @@ static const struct i2c_device_id cs35l56_hda_i2c_id[] = {
+ {}
+ };
+
++static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
++ { "CSC3554", 0 },
++ { "CSC3556", 0 },
++ { "CSC3557", 0 },
++ {}
++};
++MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
++
+ static struct i2c_driver cs35l56_hda_i2c_driver = {
+ .driver = {
+- .name = "cs35l56-hda",
+- .pm = &cs35l56_hda_pm_ops,
++ .name = "cs35l56-hda",
++ .acpi_match_table = cs35l56_acpi_hda_match,
++ .pm = &cs35l56_hda_pm_ops,
+ },
+ .id_table = cs35l56_hda_i2c_id,
+ .probe = cs35l56_hda_i2c_probe,
+diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c
+index 756aec342eab7..52c9e04b3c55f 100644
+--- a/sound/pci/hda/cs35l56_hda_spi.c
++++ b/sound/pci/hda/cs35l56_hda_spi.c
+@@ -49,10 +49,19 @@ static const struct spi_device_id cs35l56_hda_spi_id[] = {
+ {}
+ };
+
++static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
++ { "CSC3554", 0 },
++ { "CSC3556", 0 },
++ { "CSC3557", 0 },
++ {}
++};
++MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
++
+ static struct spi_driver cs35l56_hda_spi_driver = {
+ .driver = {
+- .name = "cs35l56-hda",
+- .pm = &cs35l56_hda_pm_ops,
++ .name = "cs35l56-hda",
++ .acpi_match_table = cs35l56_acpi_hda_match,
++ .pm = &cs35l56_hda_pm_ops,
+ },
+ .id_table = cs35l56_hda_spi_id,
+ .probe = cs35l56_hda_spi_probe,
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 0db9326b6f844..b1c2fb43cab69 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10072,7 +10072,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+ SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+ SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+- SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
++ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
+ SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
+ SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
+ SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP),
+@@ -10302,6 +10302,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
++ SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
+diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c
+index a32c14a109b77..223238f662f83 100644
+--- a/sound/soc/amd/acp/acp-pci.c
++++ b/sound/soc/amd/acp/acp-pci.c
+@@ -107,7 +107,10 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
+ goto unregister_dmic_dev;
+ }
+
+- acp_init(chip);
++ ret = acp_init(chip);
++ if (ret)
++ goto unregister_dmic_dev;
++
+ res = devm_kcalloc(&pci->dev, num_res, sizeof(struct resource), GFP_KERNEL);
+ if (!res) {
+ ret = -ENOMEM;
+diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c
+index e67c2e19cb1a7..1fdbef5fd6cba 100644
+--- a/sound/soc/codecs/rt5682-sdw.c
++++ b/sound/soc/codecs/rt5682-sdw.c
+@@ -763,12 +763,12 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev)
+ return 0;
+
+ if (!slave->unattach_request) {
++ mutex_lock(&rt5682->disable_irq_lock);
+ if (rt5682->disable_irq == true) {
+- mutex_lock(&rt5682->disable_irq_lock);
+ sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
+ rt5682->disable_irq = false;
+- mutex_unlock(&rt5682->disable_irq_lock);
+ }
++ mutex_unlock(&rt5682->disable_irq_lock);
+ goto regmap_sync;
+ }
+
+diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c
+index 935e597022d32..b8471b2d8f4f1 100644
+--- a/sound/soc/codecs/rt711-sdca-sdw.c
++++ b/sound/soc/codecs/rt711-sdca-sdw.c
+@@ -438,13 +438,13 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev)
+ return 0;
+
+ if (!slave->unattach_request) {
++ mutex_lock(&rt711->disable_irq_lock);
+ if (rt711->disable_irq == true) {
+- mutex_lock(&rt711->disable_irq_lock);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+ rt711->disable_irq = false;
+- mutex_unlock(&rt711->disable_irq_lock);
+ }
++ mutex_unlock(&rt711->disable_irq_lock);
+ goto regmap_sync;
+ }
+
+diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c
+index 3f5773310ae8c..988451f24a756 100644
+--- a/sound/soc/codecs/rt711-sdw.c
++++ b/sound/soc/codecs/rt711-sdw.c
+@@ -536,12 +536,12 @@ static int __maybe_unused rt711_dev_resume(struct device *dev)
+ return 0;
+
+ if (!slave->unattach_request) {
++ mutex_lock(&rt711->disable_irq_lock);
+ if (rt711->disable_irq == true) {
+- mutex_lock(&rt711->disable_irq_lock);
+ sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
+ rt711->disable_irq = false;
+- mutex_unlock(&rt711->disable_irq_lock);
+ }
++ mutex_unlock(&rt711->disable_irq_lock);
+ goto regmap_sync;
+ }
+
+diff --git a/sound/soc/codecs/rt712-sdca-sdw.c b/sound/soc/codecs/rt712-sdca-sdw.c
+index 6b644a89c5890..ba877432cea61 100644
+--- a/sound/soc/codecs/rt712-sdca-sdw.c
++++ b/sound/soc/codecs/rt712-sdca-sdw.c
+@@ -438,13 +438,14 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev)
+ return 0;
+
+ if (!slave->unattach_request) {
++ mutex_lock(&rt712->disable_irq_lock);
+ if (rt712->disable_irq == true) {
+- mutex_lock(&rt712->disable_irq_lock);
++
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+ rt712->disable_irq = false;
+- mutex_unlock(&rt712->disable_irq_lock);
+ }
++ mutex_unlock(&rt712->disable_irq_lock);
+ goto regmap_sync;
+ }
+
+diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c
+index a38ec58622145..43a4e79e56966 100644
+--- a/sound/soc/codecs/rt722-sdca-sdw.c
++++ b/sound/soc/codecs/rt722-sdca-sdw.c
+@@ -464,13 +464,13 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev)
+ return 0;
+
+ if (!slave->unattach_request) {
++ mutex_lock(&rt722->disable_irq_lock);
+ if (rt722->disable_irq == true) {
+- mutex_lock(&rt722->disable_irq_lock);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+ rt722->disable_irq = false;
+- mutex_unlock(&rt722->disable_irq_lock);
+ }
++ mutex_unlock(&rt722->disable_irq_lock);
+ goto regmap_sync;
+ }
+
+diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
+index 72b90a7ee4b68..b9c20e29fe63e 100644
+--- a/sound/soc/codecs/wm_adsp.c
++++ b/sound/soc/codecs/wm_adsp.c
+@@ -683,11 +683,12 @@ static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl)
+ int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type,
+ unsigned int alg, void *buf, size_t len)
+ {
+- struct cs_dsp_coeff_ctl *cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
++ struct cs_dsp_coeff_ctl *cs_ctl;
+ struct wm_coeff_ctl *ctl;
+ int ret;
+
+ mutex_lock(&dsp->cs_dsp.pwr_lock);
++ cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
+ ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len);
+ mutex_unlock(&dsp->cs_dsp.pwr_lock);
+
+diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
+index 2d25748ca7066..b27e89ff6a167 100644
+--- a/sound/soc/soc-ops.c
++++ b/sound/soc/soc-ops.c
+@@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol,
+ int max = mc->max;
+ int min = mc->min;
+ int sign_bit = mc->sign_bit;
+- unsigned int mask = (1 << fls(max)) - 1;
++ unsigned int mask = (1ULL << fls(max)) - 1;
+ unsigned int invert = mc->invert;
+ int val;
+ int ret;
+diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
+index 4c54ce212de6a..cc006d7038d97 100644
+--- a/sound/soc/sof/amd/acp.c
++++ b/sound/soc/sof/amd/acp.c
+@@ -522,6 +522,10 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
+ goto unregister_dev;
+ }
+
++ ret = acp_init(sdev);
++ if (ret < 0)
++ goto free_smn_dev;
++
+ sdev->ipc_irq = pci->irq;
+ ret = request_threaded_irq(sdev->ipc_irq, acp_irq_handler, acp_irq_thread,
+ IRQF_SHARED, "AudioDSP", sdev);
+@@ -531,10 +535,6 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
+ goto free_smn_dev;
+ }
+
+- ret = acp_init(sdev);
+- if (ret < 0)
+- goto free_ipc_irq;
+-
+ sdev->dsp_box.offset = 0;
+ sdev->dsp_box.size = BOX_SIZE_512;
+
+diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
+index 798e60b5454b7..845a4023ba44e 100644
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -219,7 +219,7 @@
+ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
+ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
+ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
+-#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */
++#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
+ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
+ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
+ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
+diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
+index 897af958cee85..575b7e248e521 100755
+--- a/tools/net/ynl/ynl-gen-c.py
++++ b/tools/net/ynl/ynl-gen-c.py
+@@ -198,8 +198,11 @@ class Type(SpecAttr):
+ presence = ''
+ for i in range(0, len(ref)):
+ presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
+- if self.presence_type() == 'bit':
+- code.append(presence + ' = 1;')
++ # Every layer below last is a nest, so we know it uses bit presence
++ # last layer is "self" and may be a complex type
++ if i == len(ref) - 1 and self.presence_type() != 'bit':
++ continue
++ code.append(presence + ' = 1;')
+ code += self._setter_lines(ri, member, presence)
+
+ func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
+diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
+index c7fa61f0dff8d..0c603bec5e209 100644
+--- a/tools/testing/selftests/mm/vm_util.h
++++ b/tools/testing/selftests/mm/vm_util.h
+@@ -3,7 +3,7 @@
+ #include <stdbool.h>
+ #include <sys/mman.h>
+ #include <err.h>
+-#include <string.h> /* ffsl() */
++#include <strings.h> /* ffsl() */
+ #include <unistd.h> /* _SC_PAGESIZE */
+
+ #define BIT_ULL(nr) (1ULL << (nr))
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+index 3b971d1617d81..7647c74adb26c 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+@@ -1,6 +1,11 @@
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+
++# Double quotes to prevent globbing and word splitting is recommended in new
++# code but we accept it, especially because there were too many before having
++# address all other issues detected by shellcheck.
++#shellcheck disable=SC2086
++
+ . "$(dirname "${0}")/mptcp_lib.sh"
+
+ time_start=$(date +%s)
+@@ -13,7 +18,6 @@ sout=""
+ cin_disconnect=""
+ cin=""
+ cout=""
+-ksft_skip=4
+ capture=false
+ timeout_poll=30
+ timeout_test=$((timeout_poll * 2 + 1))
+@@ -131,6 +135,8 @@ ns4="ns4-$rndh"
+ TEST_COUNT=0
+ TEST_GROUP=""
+
++# This function is used in the cleanup trap
++#shellcheck disable=SC2317
+ cleanup()
+ {
+ rm -f "$cin_disconnect" "$cout_disconnect"
+@@ -225,8 +231,9 @@ set_ethtool_flags() {
+ local dev="$2"
+ local flags="$3"
+
+- ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
+- [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
++ if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then
++ echo "INFO: set $ns dev $dev: ethtool -K $flags"
++ fi
+ }
+
+ set_random_ethtool_flags() {
+@@ -363,7 +370,7 @@ do_transfer()
+ local extra_args="$7"
+
+ local port
+- port=$((10000+$TEST_COUNT))
++ port=$((10000+TEST_COUNT))
+ TEST_COUNT=$((TEST_COUNT+1))
+
+ if [ "$rcvbuf" -gt 0 ]; then
+@@ -420,12 +427,20 @@ do_transfer()
+ nstat -n
+ fi
+
+- local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+- local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+- local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+- local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+- local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+- local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
++ local stat_synrx_last_l
++ local stat_ackrx_last_l
++ local stat_cookietx_last
++ local stat_cookierx_last
++ local stat_csum_err_s
++ local stat_csum_err_c
++ local stat_tcpfb_last_l
++ stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
++ stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
++ stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
++ stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
++ stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
++ stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
++ stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
+
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+@@ -488,11 +503,18 @@ do_transfer()
+ check_transfer $cin $sout "file received by server"
+ rets=$?
+
+- local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+- local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+- local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+- local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+- local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
++ local stat_synrx_now_l
++ local stat_ackrx_now_l
++ local stat_cookietx_now
++ local stat_cookierx_now
++ local stat_ooo_now
++ local stat_tcpfb_now_l
++ stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
++ stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
++ stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
++ stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
++ stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
++ stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
+
+ expect_synrx=$((stat_synrx_last_l))
+ expect_ackrx=$((stat_ackrx_last_l))
+@@ -501,8 +523,8 @@ do_transfer()
+ cookies=${cookies##*=}
+
+ if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
+- expect_synrx=$((stat_synrx_last_l+$connect_per_transfer))
+- expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer))
++ expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
++ expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
+ fi
+
+ if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
+@@ -510,7 +532,7 @@ do_transfer()
+ "${stat_synrx_now_l}" "${expect_synrx}" 1>&2
+ retc=1
+ fi
+- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
++ if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
+ if [ ${stat_ooo_now} -eq 0 ]; then
+ printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
+ "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
+@@ -521,18 +543,20 @@ do_transfer()
+ fi
+
+ if $checksum; then
+- local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+- local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
++ local csum_err_s
++ local csum_err_c
++ csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
++ csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+
+ local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
+ if [ $csum_err_s_nr -gt 0 ]; then
+- printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
++ printf "[ FAIL ]\nserver got %d data checksum error[s]" ${csum_err_s_nr}
+ rets=1
+ fi
+
+ local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
+ if [ $csum_err_c_nr -gt 0 ]; then
+- printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
++ printf "[ FAIL ]\nclient got %d data checksum error[s]" ${csum_err_c_nr}
+ retc=1
+ fi
+ fi
+@@ -544,6 +568,11 @@ do_transfer()
+ mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+ fi
+
++ if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
++ mptcp_lib_pr_fail "unexpected fallback to TCP"
++ rets=1
++ fi
++
+ if [ $cookies -eq 2 ];then
+ if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+ printf " WARN: CookieSent: did not advance"
+@@ -701,7 +730,7 @@ run_test_transparent()
+ return
+ fi
+
+-ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
++ if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
+ flush ruleset
+ table inet mangle {
+ chain divert {
+@@ -712,7 +741,7 @@ table inet mangle {
+ }
+ }
+ EOF
+- if [ $? -ne 0 ]; then
++ then
+ echo "SKIP: $msg, could not load nft ruleset"
+ mptcp_lib_fail_if_expected_feature "nft rules"
+ mptcp_lib_result_skip "${TEST_GROUP}"
+@@ -727,8 +756,7 @@ EOF
+ local_addr="0.0.0.0"
+ fi
+
+- ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
+- if [ $? -ne 0 ]; then
++ if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then
+ ip netns exec "$listener_ns" nft flush ruleset
+ echo "SKIP: $msg, ip $r6flag rule failed"
+ mptcp_lib_fail_if_expected_feature "ip rule"
+@@ -736,8 +764,7 @@ EOF
+ return
+ fi
+
+- ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
+- if [ $? -ne 0 ]; then
++ if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then
+ ip netns exec "$listener_ns" nft flush ruleset
+ ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+ echo "SKIP: $msg, ip route add local $local_addr failed"
+@@ -900,7 +927,7 @@ stop_if_error "Could not even run ping tests"
+ echo -n "INFO: Using loss of $tc_loss "
+ test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+
+-reorder_delay=$(($tc_delay / 4))
++reorder_delay=$((tc_delay / 4))
+
+ if [ -z "${tc_reorder}" ]; then
+ reorder1=$((RANDOM%10))
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 34c3423469679..00cf4efac4c21 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -796,7 +796,7 @@ pm_nl_check_endpoint()
+ [ -n "$_flags" ]; flags="flags $_flags"
+ shift
+ elif [ $1 = "dev" ]; then
+- [ -n "$2" ]; dev="dev $1"
++ [ -n "$2" ]; dev="dev $2"
+ shift
+ elif [ $1 = "id" ]; then
+ _id=$2
+@@ -3507,6 +3507,8 @@ endpoint_tests()
+ local tests_pid=$!
+
+ wait_mpj $ns2
++ pm_nl_check_endpoint "creation" \
++ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
+ chk_subflow_nr "before delete" 2
+ chk_mptcp_info subflows 1 subflows 1
+
+diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
+index 7c5b12664b03b..bfb07dc495186 100644
+--- a/tools/testing/selftests/net/reuseaddr_conflict.c
++++ b/tools/testing/selftests/net/reuseaddr_conflict.c
+@@ -109,6 +109,6 @@ int main(void)
+ fd1 = open_port(0, 1);
+ if (fd1 >= 0)
+ error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+- fprintf(stderr, "Success");
++ fprintf(stderr, "Success\n");
+ return 0;
+ }
+diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
+index 31e5f0f8859d1..be8e66abc74e1 100755
+--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
++++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
+@@ -984,6 +984,7 @@ encap_params_common()
+ local plen=$1; shift
+ local enc_ethtype=$1; shift
+ local grp=$1; shift
++ local grp_dmac=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+
+@@ -1002,11 +1003,11 @@ encap_params_common()
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Destination IP - match"
+
+- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Destination IP - no match"
+
+@@ -1019,20 +1020,20 @@ encap_params_common()
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Default destination port - match"
+
+- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Default destination port - no match"
+
+ run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
+- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Non-default destination port - match"
+
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Non-default destination port - no match"
+
+@@ -1045,11 +1046,11 @@ encap_params_common()
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Default destination VNI - match"
+
+- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Default destination VNI - no match"
+
+@@ -1057,11 +1058,11 @@ encap_params_common()
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Non-default destination VNI - match"
+
+- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Non-default destination VNI - no match"
+
+@@ -1079,6 +1080,7 @@ encap_params_ipv4_ipv4()
+ local plen=32
+ local enc_ethtype="ip"
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local src=192.0.2.129
+
+ echo
+@@ -1086,7 +1088,7 @@ encap_params_ipv4_ipv4()
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+- $grp $src "mausezahn"
++ $grp $grp_dmac $src "mausezahn"
+ }
+
+ encap_params_ipv6_ipv4()
+@@ -1098,6 +1100,7 @@ encap_params_ipv6_ipv4()
+ local plen=32
+ local enc_ethtype="ip"
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local src=2001:db8:100::1
+
+ echo
+@@ -1105,7 +1108,7 @@ encap_params_ipv6_ipv4()
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+- $grp $src "mausezahn -6"
++ $grp $grp_dmac $src "mausezahn -6"
+ }
+
+ encap_params_ipv4_ipv6()
+@@ -1117,6 +1120,7 @@ encap_params_ipv4_ipv6()
+ local plen=128
+ local enc_ethtype="ipv6"
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local src=192.0.2.129
+
+ echo
+@@ -1124,7 +1128,7 @@ encap_params_ipv4_ipv6()
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+- $grp $src "mausezahn"
++ $grp $grp_dmac $src "mausezahn"
+ }
+
+ encap_params_ipv6_ipv6()
+@@ -1136,6 +1140,7 @@ encap_params_ipv6_ipv6()
+ local plen=128
+ local enc_ethtype="ipv6"
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local src=2001:db8:100::1
+
+ echo
+@@ -1143,7 +1148,7 @@ encap_params_ipv6_ipv6()
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+- $grp $src "mausezahn -6"
++ $grp $grp_dmac $src "mausezahn -6"
+ }
+
+ starg_exclude_ir_common()
+@@ -1154,6 +1159,7 @@ starg_exclude_ir_common()
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
++ local grp_dmac=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+@@ -1175,14 +1181,14 @@ starg_exclude_ir_common()
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "Block excluded source - second VTEP"
+
+ # Check that valid source is forwarded to both VTEPs.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1192,14 +1198,14 @@ starg_exclude_ir_common()
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Block excluded source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Block excluded source after removal - second VTEP"
+
+ # Check that valid source is forwarded to the remaining VTEP.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Forward valid source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1214,6 +1220,7 @@ starg_exclude_ir_ipv4_ipv4()
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+@@ -1222,7 +1229,7 @@ starg_exclude_ir_ipv4_ipv4()
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+- $valid_src $invalid_src "mausezahn"
++ $grp_dmac $valid_src $invalid_src "mausezahn"
+ }
+
+ starg_exclude_ir_ipv6_ipv4()
+@@ -1233,6 +1240,7 @@ starg_exclude_ir_ipv6_ipv4()
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+@@ -1241,7 +1249,7 @@ starg_exclude_ir_ipv6_ipv4()
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+- $valid_src $invalid_src "mausezahn -6"
++ $grp_dmac $valid_src $invalid_src "mausezahn -6"
+ }
+
+ starg_exclude_ir_ipv4_ipv6()
+@@ -1252,6 +1260,7 @@ starg_exclude_ir_ipv4_ipv6()
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+@@ -1260,7 +1269,7 @@ starg_exclude_ir_ipv4_ipv6()
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+- $valid_src $invalid_src "mausezahn"
++ $grp_dmac $valid_src $invalid_src "mausezahn"
+ }
+
+ starg_exclude_ir_ipv6_ipv6()
+@@ -1271,6 +1280,7 @@ starg_exclude_ir_ipv6_ipv6()
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+@@ -1279,7 +1289,7 @@ starg_exclude_ir_ipv6_ipv6()
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+- $valid_src $invalid_src "mausezahn -6"
++ $grp_dmac $valid_src $invalid_src "mausezahn -6"
+ }
+
+ starg_include_ir_common()
+@@ -1290,6 +1300,7 @@ starg_include_ir_common()
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
++ local grp_dmac=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+@@ -1311,14 +1322,14 @@ starg_include_ir_common()
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "Block excluded source - second VTEP"
+
+ # Check that valid source is forwarded to both VTEPs.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1328,14 +1339,14 @@ starg_include_ir_common()
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Block excluded source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Block excluded source after removal - second VTEP"
+
+ # Check that valid source is forwarded to the remaining VTEP.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Forward valid source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1350,6 +1361,7 @@ starg_include_ir_ipv4_ipv4()
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+@@ -1358,7 +1370,7 @@ starg_include_ir_ipv4_ipv4()
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+- $valid_src $invalid_src "mausezahn"
++ $grp_dmac $valid_src $invalid_src "mausezahn"
+ }
+
+ starg_include_ir_ipv6_ipv4()
+@@ -1369,6 +1381,7 @@ starg_include_ir_ipv6_ipv4()
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+@@ -1377,7 +1390,7 @@ starg_include_ir_ipv6_ipv4()
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+- $valid_src $invalid_src "mausezahn -6"
++ $grp_dmac $valid_src $invalid_src "mausezahn -6"
+ }
+
+ starg_include_ir_ipv4_ipv6()
+@@ -1388,6 +1401,7 @@ starg_include_ir_ipv4_ipv6()
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+@@ -1396,7 +1410,7 @@ starg_include_ir_ipv4_ipv6()
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+- $valid_src $invalid_src "mausezahn"
++ $grp_dmac $valid_src $invalid_src "mausezahn"
+ }
+
+ starg_include_ir_ipv6_ipv6()
+@@ -1407,6 +1421,7 @@ starg_include_ir_ipv6_ipv6()
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+@@ -1415,7 +1430,7 @@ starg_include_ir_ipv6_ipv6()
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+- $valid_src $invalid_src "mausezahn -6"
++ $grp_dmac $valid_src $invalid_src "mausezahn -6"
+ }
+
+ starg_exclude_p2mp_common()
+@@ -1425,6 +1440,7 @@ starg_exclude_p2mp_common()
+ local mcast_grp=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
++ local grp_dmac=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+@@ -1442,12 +1458,12 @@ starg_exclude_p2mp_common()
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"
+
+ # Check that invalid source is not forwarded.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source"
+
+ # Check that valid source is forwarded.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source"
+
+@@ -1455,7 +1471,7 @@ starg_exclude_p2mp_common()
+ run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+
+ # Check that valid source is not received anymore.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Receive of valid source after removal from group"
+ }
+@@ -1467,6 +1483,7 @@ starg_exclude_p2mp_ipv4_ipv4()
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+@@ -1474,7 +1491,7 @@ starg_exclude_p2mp_ipv4_ipv4()
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ $valid_src $invalid_src "mausezahn"
+ }
+
+@@ -1485,6 +1502,7 @@ starg_exclude_p2mp_ipv6_ipv4()
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+@@ -1492,7 +1510,7 @@ starg_exclude_p2mp_ipv6_ipv4()
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ $valid_src $invalid_src "mausezahn -6"
+ }
+
+@@ -1503,6 +1521,7 @@ starg_exclude_p2mp_ipv4_ipv6()
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+@@ -1510,7 +1529,7 @@ starg_exclude_p2mp_ipv4_ipv6()
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ $valid_src $invalid_src "mausezahn"
+ }
+
+@@ -1521,6 +1540,7 @@ starg_exclude_p2mp_ipv6_ipv6()
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+@@ -1528,7 +1548,7 @@ starg_exclude_p2mp_ipv6_ipv6()
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ $valid_src $invalid_src "mausezahn -6"
+ }
+
+@@ -1539,6 +1559,7 @@ starg_include_p2mp_common()
+ local mcast_grp=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
++ local grp_dmac=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+@@ -1556,12 +1577,12 @@ starg_include_p2mp_common()
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"
+
+ # Check that invalid source is not forwarded.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source"
+
+ # Check that valid source is forwarded.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source"
+
+@@ -1569,7 +1590,7 @@ starg_include_p2mp_common()
+ run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+
+ # Check that valid source is not received anymore.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Receive of valid source after removal from group"
+ }
+@@ -1581,6 +1602,7 @@ starg_include_p2mp_ipv4_ipv4()
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+@@ -1588,7 +1610,7 @@ starg_include_p2mp_ipv4_ipv4()
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ $valid_src $invalid_src "mausezahn"
+ }
+
+@@ -1599,6 +1621,7 @@ starg_include_p2mp_ipv6_ipv4()
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+@@ -1606,7 +1629,7 @@ starg_include_p2mp_ipv6_ipv4()
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ $valid_src $invalid_src "mausezahn -6"
+ }
+
+@@ -1617,6 +1640,7 @@ starg_include_p2mp_ipv4_ipv6()
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+@@ -1624,7 +1648,7 @@ starg_include_p2mp_ipv4_ipv6()
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ $valid_src $invalid_src "mausezahn"
+ }
+
+@@ -1635,6 +1659,7 @@ starg_include_p2mp_ipv6_ipv6()
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+@@ -1642,7 +1667,7 @@ starg_include_p2mp_ipv6_ipv6()
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ $valid_src $invalid_src "mausezahn -6"
+ }
+
+@@ -1654,6 +1679,7 @@ egress_vni_translation_common()
+ local plen=$1; shift
+ local proto=$1; shift
+ local grp=$1; shift
++ local grp_dmac=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+
+@@ -1689,20 +1715,20 @@ egress_vni_translation_common()
+ # Make sure that packets sent from the first VTEP over VLAN 10 are
+ # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
+ # the second VTEP, since it is configured as PVID.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+ log_test $? 0 "Egress VNI translation - PVID configured"
+
+ # Remove PVID flag from VLAN 4000 on the second VTEP and make sure
+ # packets are no longer received by the SVI interface.
+ run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+ log_test $? 0 "Egress VNI translation - no PVID configured"
+
+ # Reconfigure the PVID and make sure packets are received again.
+ run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
+ log_test $? 0 "Egress VNI translation - PVID reconfigured"
+ }
+@@ -1715,6 +1741,7 @@ egress_vni_translation_ipv4_ipv4()
+ local plen=32
+ local proto="ipv4"
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local src=192.0.2.129
+
+ echo
+@@ -1722,7 +1749,7 @@ egress_vni_translation_ipv4_ipv4()
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+- $src "mausezahn"
++ $grp_dmac $src "mausezahn"
+ }
+
+ egress_vni_translation_ipv6_ipv4()
+@@ -1733,6 +1760,7 @@ egress_vni_translation_ipv6_ipv4()
+ local plen=32
+ local proto="ipv6"
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local src=2001:db8:100::1
+
+ echo
+@@ -1740,7 +1768,7 @@ egress_vni_translation_ipv6_ipv4()
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+- $src "mausezahn -6"
++ $grp_dmac $src "mausezahn -6"
+ }
+
+ egress_vni_translation_ipv4_ipv6()
+@@ -1751,6 +1779,7 @@ egress_vni_translation_ipv4_ipv6()
+ local plen=128
+ local proto="ipv4"
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local src=192.0.2.129
+
+ echo
+@@ -1758,7 +1787,7 @@ egress_vni_translation_ipv4_ipv6()
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+- $src "mausezahn"
++ $grp_dmac $src "mausezahn"
+ }
+
+ egress_vni_translation_ipv6_ipv6()
+@@ -1769,6 +1798,7 @@ egress_vni_translation_ipv6_ipv6()
+ local plen=128
+ local proto="ipv6"
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local src=2001:db8:100::1
+
+ echo
+@@ -1776,7 +1806,7 @@ egress_vni_translation_ipv6_ipv6()
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+- $src "mausezahn -6"
++ $grp_dmac $src "mausezahn -6"
+ }
+
+ all_zeros_mdb_common()
+@@ -1789,12 +1819,18 @@ all_zeros_mdb_common()
+ local vtep4_ip=$1; shift
+ local plen=$1; shift
+ local ipv4_grp=239.1.1.1
++ local ipv4_grp_dmac=01:00:5e:01:01:01
+ local ipv4_unreg_grp=239.2.2.2
++ local ipv4_unreg_grp_dmac=01:00:5e:02:02:02
+ local ipv4_ll_grp=224.0.0.100
++ local ipv4_ll_grp_dmac=01:00:5e:00:00:64
+ local ipv4_src=192.0.2.129
+ local ipv6_grp=ff0e::1
++ local ipv6_grp_dmac=33:33:00:00:00:01
+ local ipv6_unreg_grp=ff0e::2
++ local ipv6_unreg_grp_dmac=33:33:00:00:00:02
+ local ipv6_ll_grp=ff02::1
++ local ipv6_ll_grp_dmac=33:33:00:00:00:01
+ local ipv6_src=2001:db8:100::1
+
+ # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
+@@ -1830,7 +1866,7 @@ all_zeros_mdb_common()
+
+ # Send registered IPv4 multicast and make sure it only arrives to the
+ # first VTEP.
+- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Registered IPv4 multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+@@ -1838,7 +1874,7 @@ all_zeros_mdb_common()
+
+ # Send unregistered IPv4 multicast that is not link-local and make sure
+ # it arrives to the first and second VTEPs.
+- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1846,7 +1882,7 @@ all_zeros_mdb_common()
+
+ # Send IPv4 link-local multicast traffic and make sure it does not
+ # arrive to any VTEP.
+- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Link-local IPv4 multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1881,7 +1917,7 @@ all_zeros_mdb_common()
+
+ # Send registered IPv6 multicast and make sure it only arrives to the
+ # third VTEP.
+- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 1
+ log_test $? 0 "Registered IPv6 multicast - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 0
+@@ -1889,7 +1925,7 @@ all_zeros_mdb_common()
+
+ # Send unregistered IPv6 multicast that is not link-local and make sure
+ # it arrives to the third and fourth VTEPs.
+- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+@@ -1897,7 +1933,7 @@ all_zeros_mdb_common()
+
+ # Send IPv6 link-local multicast traffic and make sure it does not
+ # arrive to any VTEP.
+- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Link-local IPv6 multicast - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+@@ -1972,6 +2008,7 @@ mdb_fdb_common()
+ local plen=$1; shift
+ local proto=$1; shift
+ local grp=$1; shift
++ local grp_dmac=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+
+@@ -1995,7 +2032,7 @@ mdb_fdb_common()
+
+ # Send IP multicast traffic and make sure it is forwarded by the MDB
+ # and only arrives to the first VTEP.
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "IP multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+@@ -2012,7 +2049,7 @@ mdb_fdb_common()
+ # Remove the MDB entry and make sure that IP multicast is now forwarded
+ # by the FDB to the second VTEP.
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "IP multicast after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 2
+@@ -2028,14 +2065,15 @@ mdb_fdb_ipv4_ipv4()
+ local plen=32
+ local proto="ipv4"
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
+ echo "------------------------------------------------------"
+
+- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+- "mausezahn"
++ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
++ $grp_dmac $src "mausezahn"
+ }
+
+ mdb_fdb_ipv6_ipv4()
+@@ -2047,14 +2085,15 @@ mdb_fdb_ipv6_ipv4()
+ local plen=32
+ local proto="ipv6"
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
+ echo "------------------------------------------------------"
+
+- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+- "mausezahn -6"
++ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
++ $grp_dmac $src "mausezahn -6"
+ }
+
+ mdb_fdb_ipv4_ipv6()
+@@ -2066,14 +2105,15 @@ mdb_fdb_ipv4_ipv6()
+ local plen=128
+ local proto="ipv4"
+ local grp=239.1.1.1
++ local grp_dmac=01:00:5e:01:01:01
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
+ echo "------------------------------------------------------"
+
+- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+- "mausezahn"
++ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
++ $grp_dmac $src "mausezahn"
+ }
+
+ mdb_fdb_ipv6_ipv6()
+@@ -2085,14 +2125,15 @@ mdb_fdb_ipv6_ipv6()
+ local plen=128
+ local proto="ipv6"
+ local grp=ff0e::1
++ local grp_dmac=33:33:00:00:00:01
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
+ echo "------------------------------------------------------"
+
+- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+- "mausezahn -6"
++ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
++ $grp_dmac $src "mausezahn -6"
+ }
+
+ mdb_grp1_loop()
+@@ -2127,7 +2168,9 @@ mdb_torture_common()
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local grp1=$1; shift
++ local grp1_dmac=$1; shift
+ local grp2=$1; shift
++ local grp2_dmac=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+ local pid1
+@@ -2152,9 +2195,9 @@ mdb_torture_common()
+ pid1=$!
+ mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
+ pid2=$!
+- ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
++ ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ pid3=$!
+- ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
++ ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ pid4=$!
+
+ sleep 30
+@@ -2170,15 +2213,17 @@ mdb_torture_ipv4_ipv4()
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local grp1=239.1.1.1
++ local grp1_dmac=01:00:5e:01:01:01
+ local grp2=239.2.2.2
++ local grp2_dmac=01:00:5e:02:02:02
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------"
+
+- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+- "mausezahn"
++ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
++ $grp2_dmac $src "mausezahn"
+ }
+
+ mdb_torture_ipv6_ipv4()
+@@ -2187,15 +2232,17 @@ mdb_torture_ipv6_ipv4()
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local grp1=ff0e::1
++ local grp1_dmac=33:33:00:00:00:01
+ local grp2=ff0e::2
++ local grp2_dmac=33:33:00:00:00:02
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------"
+
+- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+- "mausezahn -6"
++ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
++ $grp2_dmac $src "mausezahn -6"
+ }
+
+ mdb_torture_ipv4_ipv6()
+@@ -2204,15 +2251,17 @@ mdb_torture_ipv4_ipv6()
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local grp1=239.1.1.1
++ local grp1_dmac=01:00:5e:01:01:01
+ local grp2=239.2.2.2
++ local grp2_dmac=01:00:5e:02:02:02
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------"
+
+- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+- "mausezahn"
++ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
++ $grp2_dmac $src "mausezahn"
+ }
+
+ mdb_torture_ipv6_ipv6()
+@@ -2221,15 +2270,17 @@ mdb_torture_ipv6_ipv6()
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local grp1=ff0e::1
++ local grp1_dmac=33:33:00:00:00:01
+ local grp2=ff0e::2
++ local grp2_dmac=33:33:00:00:00:02
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------"
+
+- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+- "mausezahn -6"
++ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
++ $grp2_dmac $src "mausezahn -6"
+ }
+
+ ################################################################################
+diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
+index 9cd5e885e91f7..f4549e6894dd9 100755
+--- a/tools/testing/selftests/net/udpgro_fwd.sh
++++ b/tools/testing/selftests/net/udpgro_fwd.sh
+@@ -241,7 +241,7 @@ for family in 4 6; do
+
+ create_vxlan_pair
+ ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+- run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
++ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
+ cleanup
+
+ # use NAT to circumvent GRO FWD check
+@@ -254,13 +254,7 @@ for family in 4 6; do
+ # load arp cache before running the test to reduce the amount of
+ # stray traffic on top of the UDP tunnel
+ ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
+- run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
+- cleanup
+-
+- create_vxlan_pair
+- run_bench "UDP tunnel fwd perf" $OL_NET$DST
+- ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+- run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
++ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
+ cleanup
+ done
+