Linux patch 6.6.266.6-32

Signed-off-by: Mike Pagano <mpagano@gentoo.org>
author: Mike Pagano <mpagano@gentoo.org> 2024-04-10 11:09:06 -0400
committer: Mike Pagano <mpagano@gentoo.org> 2024-04-10 11:09:06 -0400
commit: 55f07c32363e30cdd7d8619be719408a3999d536 (patch)
tree: 5a6ae7b94f222c31930b983aa3cc399ef645b16f
parent: Linux patch 6.6.25 (diff)
download: linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.tar.gz
linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.tar.bz2
linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.zip
2 files changed, 15829 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index cfd8c138..7661b44e 100644
--- a/0000_README
+++ b/0000_README
@@ -143,6 +143,10 @@ Patch:  1024_linux-6.6.25.patch
 From:   https://www.kernel.org
 Desc:   Linux 6.6.25
 
+Patch:  1025_linux-6.6.26.patch
+From:   https://www.kernel.org
+Desc:   Linux 6.6.26
+
 Patch:  1510_fs-enable-link-security-restrictions-by-default.patch
 From:   http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch/
 Desc:   Enable link security restrictions by default.
diff --git a/1025_linux-6.6.26.patch b/1025_linux-6.6.26.patch
new file mode 100644
index 00000000..20a79d8e
--- /dev/null
+++ b/1025_linux-6.6.26.patch
@@ -0,0 +1,15825 @@
+diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
+index 32a8893e56177..9edb2860a3e19 100644
+--- a/Documentation/admin-guide/hw-vuln/spectre.rst
++++ b/Documentation/admin-guide/hw-vuln/spectre.rst
+@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
+ the BHB might be shared across privilege levels even in the presence of
+ Enhanced IBRS.
+ 
+-Currently the only known real-world BHB attack vector is via
+-unprivileged eBPF. Therefore, it's highly recommended to not enable
+-unprivileged eBPF, especially when eIBRS is used (without retpolines).
+-For a full mitigation against BHB attacks, it's recommended to use
+-retpolines (or eIBRS combined with retpolines).
++Previously the only known real-world BHB attack vector was via unprivileged
++eBPF. Further research has found attacks that don't require unprivileged eBPF.
++For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
++use the BHB clearing sequence.
+ 
+ Attack scenarios
+ ----------------
+@@ -430,6 +429,23 @@ The possible values in this file are:
+   'PBRSB-eIBRS: Not affected'  CPU is not affected by PBRSB
+   ===========================  =======================================================
+ 
++  - Branch History Injection (BHI) protection status:
++
++.. list-table::
++
++ * - BHI: Not affected
++   - System is not affected
++ * - BHI: Retpoline
++   - System is protected by retpoline
++ * - BHI: BHI_DIS_S
++   - System is protected by BHI_DIS_S
++ * - BHI: SW loop; KVM SW loop
++   - System is protected by software clearing sequence
++ * - BHI: Syscall hardening
++   - Syscalls are hardened against BHI
++ * - BHI: Syscall hardening; KVM: SW loop
++   - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence
++
+ Full mitigation might require a microcode update from the CPU
+ vendor. When the necessary microcode is not available, the kernel will
+ report vulnerability.
+@@ -484,7 +500,11 @@ Spectre variant 2
+ 
+    Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
+    boot, by setting the IBRS bit, and they're automatically protected against
+-   Spectre v2 variant attacks.
++   some Spectre v2 variant attacks. The BHB can still influence the choice of
++   indirect branch predictor entry, and although branch predictor entries are
++   isolated between modes when eIBRS is enabled, the BHB itself is not isolated
++   between modes. Systems which support BHI_DIS_S will set it to protect against
++   BHI attacks.
+ 
+    On Intel's enhanced IBRS systems, this includes cross-thread branch target
+    injections on SMT systems (STIBP). In other words, Intel eIBRS enables
+@@ -638,6 +658,22 @@ kernel command line.
+ 		spectre_v2=off. Spectre variant 1 mitigations
+ 		cannot be disabled.
+ 
++	spectre_bhi=
++
++		[X86] Control mitigation of Branch History Injection
++		(BHI) vulnerability. Syscalls are hardened against BHI
++		regardless of this setting. This setting affects the deployment
++		of the HW BHI control and the SW BHB clearing sequence.
++
++		on
++			unconditionally enable.
++		off
++			unconditionally disable.
++		auto
++			enable if hardware mitigation
++			control(BHI_DIS_S) is available, otherwise
++			enable alternate mitigation in KVM.
++
+ For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
+ 
+ Mitigation selection guide
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 7a36124dde5e5..61199466c0437 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5920,6 +5920,18 @@
+ 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
+ 			See Documentation/admin-guide/laptops/sonypi.rst
+ 
++	spectre_bhi=	[X86] Control mitigation of Branch History Injection
++			(BHI) vulnerability. Syscalls are hardened against BHI
++			reglardless of this setting. This setting affects the
++			deployment of the HW BHI control and the SW BHB
++			clearing sequence.
++
++			on   - unconditionally enable.
++			off  - unconditionally disable.
++			auto - (default) enable hardware mitigation
++			       (BHI_DIS_S) if available, otherwise enable
++			       alternate mitigation in KVM.
++
+ 	spectre_v2=	[X86] Control mitigation of Spectre variant 2
+ 			(indirect branch speculation) vulnerability.
+ 			The default operation protects the kernel from
+diff --git a/Makefile b/Makefile
+index 022af2a9a6d9b..77ad41bd298e0 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 6
+ PATCHLEVEL = 6
+-SUBLEVEL = 25
++SUBLEVEL = 26
+ EXTRAVERSION =
+ NAME = Hurr durr I'ma ninja sloth
+ 
+diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+index 5a33e16a8b677..c2f5e9f6679d6 100644
+--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
++++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+@@ -970,6 +970,8 @@ bluetooth: bluetooth {
+ 		vddrf-supply = <&pp1300_l2c>;
+ 		vddch0-supply = <&pp3300_l10c>;
+ 		max-speed = <3200000>;
++
++		qcom,local-bd-address-broken;
+ 	};
+ };
+ 
+diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
+index c94c0f8c9a737..d95416b93a9dd 100644
+--- a/arch/arm64/kernel/ptrace.c
++++ b/arch/arm64/kernel/ptrace.c
+@@ -728,7 +728,6 @@ static void sve_init_header_from_task(struct user_sve_header *header,
+ {
+ 	unsigned int vq;
+ 	bool active;
+-	bool fpsimd_only;
+ 	enum vec_type task_type;
+ 
+ 	memset(header, 0, sizeof(*header));
+@@ -744,12 +743,10 @@ static void sve_init_header_from_task(struct user_sve_header *header,
+ 	case ARM64_VEC_SVE:
+ 		if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
+ 			header->flags |= SVE_PT_VL_INHERIT;
+-		fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
+ 		break;
+ 	case ARM64_VEC_SME:
+ 		if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
+ 			header->flags |= SVE_PT_VL_INHERIT;
+-		fpsimd_only = false;
+ 		break;
+ 	default:
+ 		WARN_ON_ONCE(1);
+@@ -757,7 +754,7 @@ static void sve_init_header_from_task(struct user_sve_header *header,
+ 	}
+ 
+ 	if (active) {
+-		if (fpsimd_only) {
++		if (target->thread.fp_type == FP_STATE_FPSIMD) {
+ 			header->flags |= SVE_PT_REGS_FPSIMD;
+ 		} else {
+ 			header->flags |= SVE_PT_REGS_SVE;
+diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
+index f155b8c9e98c7..15aa9bad1c280 100644
+--- a/arch/arm64/kvm/hyp/pgtable.c
++++ b/arch/arm64/kvm/hyp/pgtable.c
+@@ -805,12 +805,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
+ 		 * Perform the appropriate TLB invalidation based on the
+ 		 * evicted pte value (if any).
+ 		 */
+-		if (kvm_pte_table(ctx->old, ctx->level))
+-			kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+-						kvm_granule_size(ctx->level));
+-		else if (kvm_pte_valid(ctx->old))
++		if (kvm_pte_table(ctx->old, ctx->level)) {
++			u64 size = kvm_granule_size(ctx->level);
++			u64 addr = ALIGN_DOWN(ctx->addr, size);
++
++			kvm_tlb_flush_vmid_range(mmu, addr, size);
++		} else if (kvm_pte_valid(ctx->old)) {
+ 			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+ 				     ctx->addr, ctx->level);
++		}
+ 	}
+ 
+ 	if (stage2_pte_is_counted(ctx->old))
+diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
+index 150d1c6543f7f..29196dce9b91d 100644
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -876,7 +876,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+ 			emit(A64_UXTH(is64, dst, dst), ctx);
+ 			break;
+ 		case 32:
+-			emit(A64_REV32(is64, dst, dst), ctx);
++			emit(A64_REV32(0, dst, dst), ctx);
+ 			/* upper 32 bits already cleared */
+ 			break;
+ 		case 64:
+@@ -1189,7 +1189,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+ 			} else {
+ 				emit_a64_mov_i(1, tmp, off, ctx);
+ 				if (sign_extend)
+-					emit(A64_LDRSW(dst, src_adj, off_adj), ctx);
++					emit(A64_LDRSW(dst, src, tmp), ctx);
+ 				else
+ 					emit(A64_LDR32(dst, src, tmp), ctx);
+ 			}
+diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
+index 926bec775f41c..9822366dc186e 100644
+--- a/arch/powerpc/mm/book3s64/pgtable.c
++++ b/arch/powerpc/mm/book3s64/pgtable.c
+@@ -130,7 +130,7 @@ void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ 
+ 	WARN_ON(pte_hw_valid(pud_pte(*pudp)));
+ 	assert_spin_locked(pud_lockptr(mm, pudp));
+-	WARN_ON(!(pud_large(pud)));
++	WARN_ON(!(pud_leaf(pud)));
+ #endif
+ 	trace_hugepage_set_pud(addr, pud_val(pud));
+ 	return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
+diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
+index ec0cab9fbddd0..72ec1d9bd3f31 100644
+--- a/arch/riscv/include/asm/uaccess.h
++++ b/arch/riscv/include/asm/uaccess.h
+@@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
+ 
+ #define __get_kernel_nofault(dst, src, type, err_label)			\
+ do {									\
+-	long __kr_err;							\
++	long __kr_err = 0;						\
+ 									\
+ 	__get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err);	\
+ 	if (unlikely(__kr_err))						\
+@@ -328,7 +328,7 @@ do {									\
+ 
+ #define __put_kernel_nofault(dst, src, type, err_label)			\
+ do {									\
+-	long __kr_err;							\
++	long __kr_err = 0;						\
+ 									\
+ 	__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err);	\
+ 	if (unlikely(__kr_err))						\
+diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
+index 10aaa83db89ef..95050ebe9ad00 100644
+--- a/arch/riscv/include/uapi/asm/auxvec.h
++++ b/arch/riscv/include/uapi/asm/auxvec.h
+@@ -34,7 +34,7 @@
+ #define AT_L3_CACHEGEOMETRY	47
+ 
+ /* entries in ARCH_DLINFO */
+-#define AT_VECTOR_SIZE_ARCH	9
++#define AT_VECTOR_SIZE_ARCH	10
+ #define AT_MINSIGSTKSZ		51
+ 
+ #endif /* _UAPI_ASM_RISCV_AUXVEC_H */
+diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
+index 37e87fdcf6a00..30e12b310cab7 100644
+--- a/arch/riscv/kernel/patch.c
++++ b/arch/riscv/kernel/patch.c
+@@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
+ 	 */
+ 	lockdep_assert_held(&text_mutex);
+ 
++	preempt_disable();
++
+ 	if (across_pages)
+ 		patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
+ 
+@@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
+ 	if (across_pages)
+ 		patch_unmap(FIX_TEXT_POKE1);
+ 
++	preempt_enable();
++
+ 	return 0;
+ }
+ NOKPROBE_SYMBOL(__patch_insn_set);
+@@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
+ 	if (!riscv_patch_in_stop_machine)
+ 		lockdep_assert_held(&text_mutex);
+ 
++	preempt_disable();
++
+ 	if (across_pages)
+ 		patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
+ 
+@@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
+ 	if (across_pages)
+ 		patch_unmap(FIX_TEXT_POKE1);
+ 
++	preempt_enable();
++
+ 	return ret;
+ }
+ NOKPROBE_SYMBOL(__patch_insn_write);
+diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
+index e32d737e039fd..83e223318822a 100644
+--- a/arch/riscv/kernel/process.c
++++ b/arch/riscv/kernel/process.c
+@@ -26,8 +26,6 @@
+ #include <asm/cpuidle.h>
+ #include <asm/vector.h>
+ 
+-register unsigned long gp_in_global __asm__("gp");
+-
+ #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
+ #include <linux/stackprotector.h>
+ unsigned long __stack_chk_guard __read_mostly;
+@@ -186,7 +184,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
+ 	if (unlikely(args->fn)) {
+ 		/* Kernel thread */
+ 		memset(childregs, 0, sizeof(struct pt_regs));
+-		childregs->gp = gp_in_global;
+ 		/* Supervisor/Machine, irqs on: */
+ 		childregs->status = SR_PP | SR_PIE;
+ 
+diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
+index 39e72aa016a4c..b467ba5ed9100 100644
+--- a/arch/riscv/kvm/aia_aplic.c
++++ b/arch/riscv/kvm/aia_aplic.c
+@@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
+ 	raw_spin_lock_irqsave(&irqd->lock, flags);
+ 
+ 	sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
+-	if (!pending &&
+-	    ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
+-	     (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
++	if (sm == APLIC_SOURCECFG_SM_INACTIVE)
+ 		goto skip_write_pending;
+ 
++	if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH ||
++	    sm == APLIC_SOURCECFG_SM_LEVEL_LOW) {
++		if (!pending)
++			goto skip_write_pending;
++		if ((irqd->state & APLIC_IRQ_STATE_INPUT) &&
++		    sm == APLIC_SOURCECFG_SM_LEVEL_LOW)
++			goto skip_write_pending;
++		if (!(irqd->state & APLIC_IRQ_STATE_INPUT) &&
++		    sm == APLIC_SOURCECFG_SM_LEVEL_HIGH)
++			goto skip_write_pending;
++	}
++
+ 	if (pending)
+ 		irqd->state |= APLIC_IRQ_STATE_PENDING;
+ 	else
+@@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
+ 
+ static bool aplic_read_input(struct aplic *aplic, u32 irq)
+ {
+-	bool ret;
+-	unsigned long flags;
++	u32 sourcecfg, sm, raw_input, irq_inverted;
+ 	struct aplic_irq *irqd;
++	unsigned long flags;
++	bool ret = false;
+ 
+ 	if (!irq || aplic->nr_irqs <= irq)
+ 		return false;
+ 	irqd = &aplic->irqs[irq];
+ 
+ 	raw_spin_lock_irqsave(&irqd->lock, flags);
+-	ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
++
++	sourcecfg = irqd->sourcecfg;
++	if (sourcecfg & APLIC_SOURCECFG_D)
++		goto skip;
++
++	sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
++	if (sm == APLIC_SOURCECFG_SM_INACTIVE)
++		goto skip;
++
++	raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0;
++	irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
++			sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
++	ret = !!(raw_input ^ irq_inverted);
++
++skip:
+ 	raw_spin_unlock_irqrestore(&irqd->lock, flags);
+ 
+ 	return ret;
+diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
+index 442a74f113cbf..14e1a73ffcfe6 100644
+--- a/arch/s390/boot/vmem.c
++++ b/arch/s390/boot/vmem.c
+@@ -360,7 +360,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
+ 			}
+ 			pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ 			pud_populate(&init_mm, pud, pmd);
+-		} else if (pud_large(*pud)) {
++		} else if (pud_leaf(*pud)) {
+ 			continue;
+ 		}
+ 		pgtable_pmd_populate(pud, addr, next, mode);
+diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
+index fb3ee7758b765..38290b0078c56 100644
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -729,7 +729,7 @@ static inline int pud_bad(pud_t pud)
+ {
+ 	unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
+ 
+-	if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
++	if (type > _REGION_ENTRY_TYPE_R3 || pud_leaf(pud))
+ 		return 1;
+ 	if (type < _REGION_ENTRY_TYPE_R3)
+ 		return 0;
+@@ -1396,7 +1396,7 @@ static inline unsigned long pud_deref(pud_t pud)
+ 	unsigned long origin_mask;
+ 
+ 	origin_mask = _REGION_ENTRY_ORIGIN;
+-	if (pud_large(pud))
++	if (pud_leaf(pud))
+ 		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+ 	return (unsigned long)__va(pud_val(pud) & origin_mask);
+ }
+diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
+index 49a11f6dd7ae9..26c08ee877407 100644
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -653,6 +653,7 @@ SYM_DATA_START_LOCAL(daton_psw)
+ SYM_DATA_END(daton_psw)
+ 
+ 	.section .rodata, "a"
++	.balign	8
+ #define SYSCALL(esame,emu)	.quad __s390x_ ## esame
+ SYM_DATA_START(sys_call_table)
+ #include "asm/syscall_table.h"
+diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
+index 157e0a8d5157d..d17bb1ef63f41 100644
+--- a/arch/s390/mm/gmap.c
++++ b/arch/s390/mm/gmap.c
+@@ -596,7 +596,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+ 	pud = pud_offset(p4d, vmaddr);
+ 	VM_BUG_ON(pud_none(*pud));
+ 	/* large puds cannot yet be handled */
+-	if (pud_large(*pud))
++	if (pud_leaf(*pud))
+ 		return -EFAULT;
+ 	pmd = pmd_offset(pud, vmaddr);
+ 	VM_BUG_ON(pmd_none(*pmd));
+diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
+index 297a6d897d5a0..5f64f3d0fafbb 100644
+--- a/arch/s390/mm/hugetlbpage.c
++++ b/arch/s390/mm/hugetlbpage.c
+@@ -224,7 +224,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
+ 		if (p4d_present(*p4dp)) {
+ 			pudp = pud_offset(p4dp, addr);
+ 			if (pud_present(*pudp)) {
+-				if (pud_large(*pudp))
++				if (pud_leaf(*pudp))
+ 					return (pte_t *) pudp;
+ 				pmdp = pmd_offset(pudp, addr);
+ 			}
+@@ -240,7 +240,7 @@ int pmd_huge(pmd_t pmd)
+ 
+ int pud_huge(pud_t pud)
+ {
+-	return pud_large(pud);
++	return pud_leaf(pud);
+ }
+ 
+ bool __init arch_hugetlb_valid_size(unsigned long size)
+diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
+index b87e96c64b61d..441f654d048d2 100644
+--- a/arch/s390/mm/pageattr.c
++++ b/arch/s390/mm/pageattr.c
+@@ -274,7 +274,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
+ 		if (pud_none(*pudp))
+ 			return -EINVAL;
+ 		next = pud_addr_end(addr, end);
+-		if (pud_large(*pudp)) {
++		if (pud_leaf(*pudp)) {
+ 			need_split  = !!(flags & SET_MEMORY_4K);
+ 			need_split |= !!(addr & ~PUD_MASK);
+ 			need_split |= !!(addr + PUD_SIZE > next);
+diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
+index 5cb92941540b3..5e349869590a8 100644
+--- a/arch/s390/mm/pgtable.c
++++ b/arch/s390/mm/pgtable.c
+@@ -479,7 +479,7 @@ static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
+ 		return -ENOENT;
+ 
+ 	/* Large PUDs are not supported yet. */
+-	if (pud_large(*pud))
++	if (pud_leaf(*pud))
+ 		return -EFAULT;
+ 
+ 	*pmdp = pmd_offset(pud, addr);
+diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
+index 6d276103c6d58..2d3f65da56eea 100644
+--- a/arch/s390/mm/vmem.c
++++ b/arch/s390/mm/vmem.c
+@@ -322,7 +322,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
+ 		if (!add) {
+ 			if (pud_none(*pud))
+ 				continue;
+-			if (pud_large(*pud)) {
++			if (pud_leaf(*pud)) {
+ 				if (IS_ALIGNED(addr, PUD_SIZE) &&
+ 				    IS_ALIGNED(next, PUD_SIZE)) {
+ 					pud_clear(pud);
+@@ -343,7 +343,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
+ 			if (!pmd)
+ 				goto out;
+ 			pud_populate(&init_mm, pud, pmd);
+-		} else if (pud_large(*pud)) {
++		} else if (pud_leaf(*pud)) {
+ 			continue;
+ 		}
+ 		ret = modify_pmd_table(pud, addr, next, add, direct);
+@@ -586,7 +586,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
+ 		if (!pmd)
+ 			goto out;
+ 		pud_populate(&init_mm, pud, pmd);
+-	} else if (WARN_ON_ONCE(pud_large(*pud))) {
++	} else if (WARN_ON_ONCE(pud_leaf(*pud))) {
+ 		goto out;
+ 	}
+ 	pmd = pmd_offset(pud, addr);
+diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
+index e507692e51e71..8af02176f68bf 100644
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size)
+  * PLT for hotpatchable calls. The calling convention is the same as for the
+  * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
+  */
+-extern const char bpf_plt[];
+-extern const char bpf_plt_ret[];
+-extern const char bpf_plt_target[];
+-extern const char bpf_plt_end[];
+-#define BPF_PLT_SIZE 32
++struct bpf_plt {
++	char code[16];
++	void *ret;
++	void *target;
++} __packed;
++extern const struct bpf_plt bpf_plt;
+ asm(
+ 	".pushsection .rodata\n"
+ 	"	.balign 8\n"
+@@ -531,15 +532,14 @@ asm(
+ 	"	.balign 8\n"
+ 	"bpf_plt_ret: .quad 0\n"
+ 	"bpf_plt_target: .quad 0\n"
+-	"bpf_plt_end:\n"
+ 	"	.popsection\n"
+ );
+ 
+-static void bpf_jit_plt(void *plt, void *ret, void *target)
++static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
+ {
+-	memcpy(plt, bpf_plt, BPF_PLT_SIZE);
+-	*(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
+-	*(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
++	memcpy(plt, &bpf_plt, sizeof(*plt));
++	plt->ret = ret;
++	plt->target = target;
+ }
+ 
+ /*
+@@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
+ 	jit->prg = ALIGN(jit->prg, 8);
+ 	jit->prologue_plt = jit->prg;
+ 	if (jit->prg_buf)
+-		bpf_jit_plt(jit->prg_buf + jit->prg,
++		bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
+ 			    jit->prg_buf + jit->prologue_plt_ret, NULL);
+-	jit->prg += BPF_PLT_SIZE;
++	jit->prg += sizeof(struct bpf_plt);
+ }
+ 
+ static int get_probe_mem_regno(const u8 *insn)
+@@ -1901,9 +1901,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+ 	struct bpf_jit jit;
+ 	int pass;
+ 
+-	if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
+-		return orig_fp;
+-
+ 	if (!fp->jit_requested)
+ 		return orig_fp;
+ 
+@@ -2009,14 +2006,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
+ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ 		       void *old_addr, void *new_addr)
+ {
++	struct bpf_plt expected_plt, current_plt, new_plt, *plt;
+ 	struct {
+ 		u16 opc;
+ 		s32 disp;
+ 	} __packed insn;
+-	char expected_plt[BPF_PLT_SIZE];
+-	char current_plt[BPF_PLT_SIZE];
+-	char new_plt[BPF_PLT_SIZE];
+-	char *plt;
+ 	char *ret;
+ 	int err;
+ 
+@@ -2035,18 +2029,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ 		 */
+ 	} else {
+ 		/* Verify the PLT. */
+-		plt = (char *)ip + (insn.disp << 1);
+-		err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
++		plt = ip + (insn.disp << 1);
++		err = copy_from_kernel_nofault(&current_plt, plt,
++					       sizeof(current_plt));
+ 		if (err < 0)
+ 			return err;
+ 		ret = (char *)ip + 6;
+-		bpf_jit_plt(expected_plt, ret, old_addr);
+-		if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
++		bpf_jit_plt(&expected_plt, ret, old_addr);
++		if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
+ 			return -EINVAL;
+ 		/* Adjust the call address. */
+-		bpf_jit_plt(new_plt, ret, new_addr);
+-		s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
+-				  new_plt + (bpf_plt_target - bpf_plt),
++		bpf_jit_plt(&new_plt, ret, new_addr);
++		s390_kernel_write(&plt->target, &new_plt.target,
+ 				  sizeof(void *));
+ 	}
+ 
+diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
+index f83017992eaae..d7db4e737218c 100644
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -1665,7 +1665,7 @@ bool kern_addr_valid(unsigned long addr)
+ 	if (pud_none(*pud))
+ 		return false;
+ 
+-	if (pud_large(*pud))
++	if (pud_leaf(*pud))
+ 		return pfn_valid(pud_pfn(*pud));
+ 
+ 	pmd = pmd_offset(pud, addr);
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 4b81e884a6147..b4e6859542a39 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -2566,6 +2566,31 @@ config MITIGATION_RFDS
+ 	  stored in floating point, vector and integer registers.
+ 	  See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+ 
++choice
++	prompt "Clear branch history"
++	depends on CPU_SUP_INTEL
++	default SPECTRE_BHI_ON
++	help
++	  Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
++	  where the branch history buffer is poisoned to speculatively steer
++	  indirect branches.
++	  See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
++
++config SPECTRE_BHI_ON
++	bool "on"
++	help
++	  Equivalent to setting spectre_bhi=on command line parameter.
++config SPECTRE_BHI_OFF
++	bool "off"
++	help
++	  Equivalent to setting spectre_bhi=off command line parameter.
++config SPECTRE_BHI_AUTO
++	bool "auto"
++	help
++	  Equivalent to setting spectre_bhi=auto command line parameter.
++
++endchoice
++
+ endif
+ 
+ config ARCH_HAS_ADD_PAGES
+diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
+index 71fc531b95b4e..583c11664c63b 100644
+--- a/arch/x86/boot/compressed/Makefile
++++ b/arch/x86/boot/compressed/Makefile
+@@ -84,7 +84,7 @@ LDFLAGS_vmlinux += -T
+ hostprogs	:= mkpiggy
+ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+ 
+-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
++sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+ 
+ quiet_cmd_voffset = VOFFSET $@
+       cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
+diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
+index f711f2a85862e..b5ecbd32a46fa 100644
+--- a/arch/x86/boot/compressed/misc.c
++++ b/arch/x86/boot/compressed/misc.c
+@@ -330,6 +330,7 @@ static size_t parse_elf(void *output)
+ 	return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
+ }
+ 
++const unsigned long kernel_text_size = VO___start_rodata - VO__text;
+ const unsigned long kernel_total_size = VO__end - VO__text;
+ 
+ static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
+@@ -357,6 +358,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+ 	return entry;
+ }
+ 
++/*
++ * Set the memory encryption xloadflag based on the mem_encrypt= command line
++ * parameter, if provided.
++ */
++static void parse_mem_encrypt(struct setup_header *hdr)
++{
++	int on = cmdline_find_option_bool("mem_encrypt=on");
++	int off = cmdline_find_option_bool("mem_encrypt=off");
++
++	if (on > off)
++		hdr->xloadflags |= XLF_MEM_ENCRYPTION;
++}
++
+ /*
+  * The compressed kernel image (ZO), has been moved so that its position
+  * is against the end of the buffer used to hold the uncompressed kernel
+@@ -387,6 +401,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
+ 	/* Clear flags intended for solely in-kernel use. */
+ 	boot_params->hdr.loadflags &= ~KASLR_FLAG;
+ 
++	parse_mem_encrypt(&boot_params->hdr);
++
+ 	sanitize_boot_params(boot_params);
+ 
+ 	if (boot_params->screen_info.orig_video_mode == 7) {
+diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
+index 80d76aea1f7bf..0a49218a516a2 100644
+--- a/arch/x86/boot/compressed/sev.c
++++ b/arch/x86/boot/compressed/sev.c
+@@ -116,6 +116,9 @@ static bool fault_in_kernel_space(unsigned long address)
+ #undef __init
+ #define __init
+ 
++#undef __head
++#define __head
++
+ #define __BOOT_COMPRESSED
+ 
+ /* Basic instruction decoding support needed */
+diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
+index d07be9d05cd03..ddd4efdc79d66 100644
+--- a/arch/x86/coco/core.c
++++ b/arch/x86/coco/core.c
+@@ -3,13 +3,17 @@
+  * Confidential Computing Platform Capability checks
+  *
+  * Copyright (C) 2021 Advanced Micro Devices, Inc.
++ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+  *
+  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+  */
+ 
+ #include <linux/export.h>
+ #include <linux/cc_platform.h>
++#include <linux/string.h>
++#include <linux/random.h>
+ 
++#include <asm/archrandom.h>
+ #include <asm/coco.h>
+ #include <asm/processor.h>
+ 
+@@ -148,3 +152,40 @@ u64 cc_mkdec(u64 val)
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(cc_mkdec);
++
++__init void cc_random_init(void)
++{
++	/*
++	 * The seed is 32 bytes (in units of longs), which is 256 bits, which
++	 * is the security level that the RNG is targeting.
++	 */
++	unsigned long rng_seed[32 / sizeof(long)];
++	size_t i, longs;
++
++	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
++		return;
++
++	/*
++	 * Since the CoCo threat model includes the host, the only reliable
++	 * source of entropy that can be neither observed nor manipulated is
++	 * RDRAND. Usually, RDRAND failure is considered tolerable, but since
++	 * CoCo guests have no other unobservable source of entropy, it's
++	 * important to at least ensure the RNG gets some initial random seeds.
++	 */
++	for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) {
++		longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i);
++
++		/*
++		 * A zero return value means that the guest doesn't have RDRAND
++		 * or the CPU is physically broken, and in both cases that
++		 * means most crypto inside of the CoCo instance will be
++		 * broken, defeating the purpose of CoCo in the first place. So
++		 * just panic here because it's absolutely unsafe to continue
++		 * executing.
++		 */
++		if (longs == 0)
++			panic("RDRAND is defective.");
++	}
++	add_device_randomness(rng_seed, sizeof(rng_seed));
++	memzero_explicit(rng_seed, sizeof(rng_seed));
++}
+diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
+index 9c0b26ae51069..e72dac092245a 100644
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -48,7 +48,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
+ 
+ 	if (likely(unr < NR_syscalls)) {
+ 		unr = array_index_nospec(unr, NR_syscalls);
+-		regs->ax = sys_call_table[unr](regs);
++		regs->ax = x64_sys_call(regs, unr);
+ 		return true;
+ 	}
+ 	return false;
+@@ -65,7 +65,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
+ 
+ 	if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
+ 		xnr = array_index_nospec(xnr, X32_NR_syscalls);
+-		regs->ax = x32_sys_call_table[xnr](regs);
++		regs->ax = x32_sys_call(regs, xnr);
+ 		return true;
+ 	}
+ 	return false;
+@@ -114,7 +114,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
+ 
+ 	if (likely(unr < IA32_NR_syscalls)) {
+ 		unr = array_index_nospec(unr, IA32_NR_syscalls);
+-		regs->ax = ia32_sys_call_table[unr](regs);
++		regs->ax = ia32_sys_call(regs, unr);
+ 	} else if (nr != -1) {
+ 		regs->ax = __ia32_sys_ni_syscall(regs);
+ 	}
+@@ -141,7 +141,7 @@ static __always_inline bool int80_is_external(void)
+ }
+ 
+ /**
+- * int80_emulation - 32-bit legacy syscall entry
++ * do_int80_emulation - 32-bit legacy syscall C entry from asm
+  *
+  * This entry point can be used by 32-bit and 64-bit programs to perform
+  * 32-bit system calls.  Instances of INT $0x80 can be found inline in
+@@ -159,7 +159,7 @@ static __always_inline bool int80_is_external(void)
+  *   eax:				system call number
+  *   ebx, ecx, edx, esi, edi, ebp:	arg1 - arg 6
+  */
+-DEFINE_IDTENTRY_RAW(int80_emulation)
++__visible noinstr void do_int80_emulation(struct pt_regs *regs)
+ {
+ 	int nr;
+ 
+diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
+index 9f97a8bd11e81..5d96561c0d6ad 100644
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
+ 	/* clobbers %rax, make sure it is after saving the syscall nr */
+ 	IBRS_ENTER
+ 	UNTRAIN_RET
++	CLEAR_BRANCH_HISTORY
+ 
+ 	call	do_syscall_64		/* returns with IRQs disabled */
+ 
+@@ -1549,3 +1550,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
+ 	call	make_task_dead
+ SYM_CODE_END(rewind_stack_and_make_dead)
+ .popsection
++
++/*
++ * This sequence executes branches in order to remove user branch information
++ * from the branch history tracker in the Branch Predictor, therefore removing
++ * user influence on subsequent BTB lookups.
++ *
++ * It should be used on parts prior to Alder Lake. Newer parts should use the
++ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
++ * virtualized on newer hardware the VMM should protect against BHI attacks by
++ * setting BHI_DIS_S for the guests.
++ *
++ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
++ * and not clearing the branch history. The call tree looks like:
++ *
++ * call 1
++ *    call 2
++ *      call 2
++ *        call 2
++ *          call 2
++ * 	      call 2
++ * 	      ret
++ * 	    ret
++ *        ret
++ *      ret
++ *    ret
++ * ret
++ *
++ * This means that the stack is non-constant and ORC can't unwind it with %rsp
++ * alone.  Therefore we unconditionally set up the frame pointer, which allows
++ * ORC to unwind properly.
++ *
++ * The alignment is for performance and not for safety, and may be safely
++ * refactored in the future if needed.
++ */
++SYM_FUNC_START(clear_bhb_loop)
++	push	%rbp
++	mov	%rsp, %rbp
++	movl	$5, %ecx
++	ANNOTATE_INTRA_FUNCTION_CALL
++	call	1f
++	jmp	5f
++	.align 64, 0xcc
++	ANNOTATE_INTRA_FUNCTION_CALL
++1:	call	2f
++	RET
++	.align 64, 0xcc
++2:	movl	$5, %eax
++3:	jmp	4f
++	nop
++4:	sub	$1, %eax
++	jnz	3b
++	sub	$1, %ecx
++	jnz	1b
++	RET
++5:	lfence
++	pop	%rbp
++	RET
++SYM_FUNC_END(clear_bhb_loop)
++EXPORT_SYMBOL_GPL(clear_bhb_loop)
++STACK_FRAME_NON_STANDARD(clear_bhb_loop)
+diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
+index 306181e4fcb90..4c1dfc51c56e4 100644
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+ 
+ 	IBRS_ENTER
+ 	UNTRAIN_RET
++	CLEAR_BRANCH_HISTORY
+ 
+ 	/*
+ 	 * SYSENTER doesn't filter flags, so we need to clear NT and AC
+@@ -209,6 +210,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
+ 
+ 	IBRS_ENTER
+ 	UNTRAIN_RET
++	CLEAR_BRANCH_HISTORY
+ 
+ 	movq	%rsp, %rdi
+ 	call	do_fast_syscall_32
+@@ -277,3 +279,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
+ 	ANNOTATE_NOENDBR
+ 	int3
+ SYM_CODE_END(entry_SYSCALL_compat)
++
++/*
++ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
++ * point to C routines, however since this is a system call interface the branch
++ * history needs to be scrubbed to protect against BHI attacks, and that
++ * scrubbing needs to take place in assembly code prior to entering any C
++ * routines.
++ */
++SYM_CODE_START(int80_emulation)
++	ANNOTATE_NOENDBR
++	UNWIND_HINT_FUNC
++	CLEAR_BRANCH_HISTORY
++	jmp do_int80_emulation
++SYM_CODE_END(int80_emulation)
+diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
+index 8cfc9bc73e7f8..c2235bae17ef6 100644
+--- a/arch/x86/entry/syscall_32.c
++++ b/arch/x86/entry/syscall_32.c
+@@ -18,8 +18,25 @@
+ #include <asm/syscalls_32.h>
+ #undef __SYSCALL
+ 
++/*
++ * The sys_call_table[] is no longer used for system calls, but
++ * kernel/trace/trace_syscalls.c still wants to know the system
++ * call address.
++ */
++#ifdef CONFIG_X86_32
+ #define __SYSCALL(nr, sym) __ia32_##sym,
+-
+-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
++const sys_call_ptr_t sys_call_table[] = {
+ #include <asm/syscalls_32.h>
+ };
++#undef __SYSCALL
++#endif
++
++#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
++
++long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
++{
++	switch (nr) {
++	#include <asm/syscalls_32.h>
++	default: return __ia32_sys_ni_syscall(regs);
++	}
++};
+diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
+index be120eec1fc9f..33b3f09e6f151 100644
+--- a/arch/x86/entry/syscall_64.c
++++ b/arch/x86/entry/syscall_64.c
+@@ -11,8 +11,23 @@
+ #include <asm/syscalls_64.h>
+ #undef __SYSCALL
+ 
++/*
++ * The sys_call_table[] is no longer used for system calls, but
++ * kernel/trace/trace_syscalls.c still wants to know the system
++ * call address.
++ */
+ #define __SYSCALL(nr, sym) __x64_##sym,
+-
+-asmlinkage const sys_call_ptr_t sys_call_table[] = {
++const sys_call_ptr_t sys_call_table[] = {
+ #include <asm/syscalls_64.h>
+ };
++#undef __SYSCALL
++
++#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
++
++long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
++{
++	switch (nr) {
++	#include <asm/syscalls_64.h>
++	default: return __x64_sys_ni_syscall(regs);
++	}
++};
+diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
+index bdd0e03a1265d..03de4a9321318 100644
+--- a/arch/x86/entry/syscall_x32.c
++++ b/arch/x86/entry/syscall_x32.c
+@@ -11,8 +11,12 @@
+ #include <asm/syscalls_x32.h>
+ #undef __SYSCALL
+ 
+-#define __SYSCALL(nr, sym) __x64_##sym,
++#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+ 
+-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
+-#include <asm/syscalls_x32.h>
++long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
++{
++	switch (nr) {
++	#include <asm/syscalls_x32.h>
++	default: return __x64_sys_ni_syscall(regs);
++	}
+ };
+diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
+index 5365d6acbf090..8ed10366c4a27 100644
+--- a/arch/x86/events/amd/core.c
++++ b/arch/x86/events/amd/core.c
+@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
+ /*
+  * AMD Performance Monitor Family 17h and later:
+  */
+-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
++static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
+ {
+ 	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+ 	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+ 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x0187,
+ };
+ 
++static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
++{
++	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
++	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
++	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
++	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
++	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
++	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
++	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00a9,
++};
++
+ static u64 amd_pmu_event_map(int hw_event)
+ {
+-	if (boot_cpu_data.x86 >= 0x17)
+-		return amd_f17h_perfmon_event_map[hw_event];
++	if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
++		return amd_zen2_perfmon_event_map[hw_event];
++
++	if (cpu_feature_enabled(X86_FEATURE_ZEN1))
++		return amd_zen1_perfmon_event_map[hw_event];
+ 
+ 	return amd_perfmon_event_map[hw_event];
+ }
+@@ -904,8 +918,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
+ 	if (!status)
+ 		goto done;
+ 
+-	/* Read branch records before unfreezing */
+-	if (status & GLOBAL_STATUS_LBRS_FROZEN) {
++	/* Read branch records */
++	if (x86_pmu.lbr_nr) {
+ 		amd_pmu_lbr_read();
+ 		status &= ~GLOBAL_STATUS_LBRS_FROZEN;
+ 	}
+diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
+index eb31f850841a8..110e34c59643a 100644
+--- a/arch/x86/events/amd/lbr.c
++++ b/arch/x86/events/amd/lbr.c
+@@ -400,10 +400,12 @@ void amd_pmu_lbr_enable_all(void)
+ 		wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
+ 	}
+ 
+-	rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+-	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
++	if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
++		rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
++		wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
++	}
+ 
+-	wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
++	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ 	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
+ }
+ 
+@@ -416,10 +418,12 @@ void amd_pmu_lbr_disable_all(void)
+ 		return;
+ 
+ 	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+-	rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+-
+ 	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
+-	wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
++
++	if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
++		rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
++		wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
++	}
+ }
+ 
+ __init int amd_pmu_lbr_init(void)
+diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
+index eb8dd8b8a1e86..2b53f696c3c96 100644
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -1236,11 +1236,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
+ 	struct pmu *pmu = event->pmu;
+ 
+ 	/*
+-	 * Make sure we get updated with the first PEBS
+-	 * event. It will trigger also during removal, but
+-	 * that does not hurt:
++	 * Make sure we get updated with the first PEBS event.
++	 * During removal, ->pebs_data_cfg is still valid for
++	 * the last PEBS event. Don't clear it.
+ 	 */
+-	if (cpuc->n_pebs == 1)
++	if ((cpuc->n_pebs == 1) && add)
+ 		cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
+ 
+ 	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
+diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
+index b1a98fa38828e..0e82074517f6b 100644
+--- a/arch/x86/include/asm/asm-prototypes.h
++++ b/arch/x86/include/asm/asm-prototypes.h
+@@ -13,6 +13,7 @@
+ #include <asm/preempt.h>
+ #include <asm/asm.h>
+ #include <asm/gsseg.h>
++#include <asm/nospec-branch.h>
+ 
+ #ifndef CONFIG_X86_CMPXCHG64
+ extern void cmpxchg8b_emu(void);
+diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
+index b3a7cfb0d99e0..c945c893c52e0 100644
+--- a/arch/x86/include/asm/boot.h
++++ b/arch/x86/include/asm/boot.h
+@@ -81,6 +81,7 @@
+ 
+ #ifndef __ASSEMBLY__
+ extern unsigned int output_len;
++extern const unsigned long kernel_text_size;
+ extern const unsigned long kernel_total_size;
+ 
+ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
+index 21940ef8d2904..de03537a01823 100644
+--- a/arch/x86/include/asm/coco.h
++++ b/arch/x86/include/asm/coco.h
+@@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask)
+ 
+ u64 cc_mkenc(u64 val);
+ u64 cc_mkdec(u64 val);
++void cc_random_init(void);
+ #else
+ static inline u64 cc_mkenc(u64 val)
+ {
+@@ -32,6 +33,7 @@ static inline u64 cc_mkdec(u64 val)
+ {
+ 	return val;
+ }
++static inline void cc_random_init(void) { }
+ #endif
+ 
+ #endif /* _ASM_X86_COCO_H */
+diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
+index a1273698fc430..686e92d2663ee 100644
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -33,6 +33,8 @@ enum cpuid_leafs
+ 	CPUID_7_EDX,
+ 	CPUID_8000_001F_EAX,
+ 	CPUID_8000_0021_EAX,
++	CPUID_LNX_5,
++	NR_CPUID_WORDS,
+ };
+ 
+ #define X86_CAP_FMT_NUM "%d:%d"
+@@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) ||	\
++	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) ||	\
+ 	   REQUIRED_MASK_CHECK					  ||	\
+-	   BUILD_BUG_ON_ZERO(NCAPINTS != 21))
++	   BUILD_BUG_ON_ZERO(NCAPINTS != 22))
+ 
+ #define DISABLED_MASK_BIT_SET(feature_bit)				\
+ 	 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||	\
+@@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
+ 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) ||	\
+ 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) ||	\
++	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) ||	\
+ 	   DISABLED_MASK_CHECK					  ||	\
+-	   BUILD_BUG_ON_ZERO(NCAPINTS != 21))
++	   BUILD_BUG_ON_ZERO(NCAPINTS != 22))
+ 
+ #define cpu_has(c, bit)							\
+ 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
+diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
+index bd33f6366c80d..8c1593dd2c317 100644
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -13,7 +13,7 @@
+ /*
+  * Defines x86 CPU feature bits
+  */
+-#define NCAPINTS			21	   /* N 32-bit words worth of info */
++#define NCAPINTS			22	   /* N 32-bit words worth of info */
+ #define NBUGINTS			2	   /* N 32-bit bug flags */
+ 
+ /*
+@@ -218,7 +218,7 @@
+ #define X86_FEATURE_IBRS		( 7*32+25) /* Indirect Branch Restricted Speculation */
+ #define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
+ #define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
+-#define X86_FEATURE_ZEN			(7*32+28) /* "" CPU based on Zen microarchitecture */
++#define X86_FEATURE_ZEN			( 7*32+28) /* "" Generic flag for all Zen and newer */
+ #define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* "" L1TF workaround PTE inversion */
+ #define X86_FEATURE_IBRS_ENHANCED	( 7*32+30) /* Enhanced IBRS */
+ #define X86_FEATURE_MSR_IA32_FEAT_CTL	( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
+@@ -312,6 +312,10 @@
+ #define X86_FEATURE_SRSO_ALIAS		(11*32+25) /* "" AMD BTB untrain RETs through aliasing */
+ #define X86_FEATURE_IBPB_ON_VMEXIT	(11*32+26) /* "" Issue an IBPB only on VMEXIT */
+ #define X86_FEATURE_APIC_MSRS_FENCE	(11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
++#define X86_FEATURE_ZEN2		(11*32+28) /* "" CPU based on Zen2 microarchitecture */
++#define X86_FEATURE_ZEN3		(11*32+29) /* "" CPU based on Zen3 microarchitecture */
++#define X86_FEATURE_ZEN4		(11*32+30) /* "" CPU based on Zen4 microarchitecture */
++#define X86_FEATURE_ZEN1		(11*32+31) /* "" CPU based on Zen1 microarchitecture */
+ 
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
+@@ -452,6 +456,18 @@
+ #define X86_FEATURE_IBPB_BRTYPE		(20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+ #define X86_FEATURE_SRSO_NO		(20*32+29) /* "" CPU is not affected by SRSO */
+ 
++/*
++ * Extended auxiliary flags: Linux defined - for features scattered in various
++ * CPUID levels like 0x80000022, etc and Linux defined features.
++ *
++ * Reuse free bits when adding new feature flags!
++ */
++#define X86_FEATURE_AMD_LBR_PMC_FREEZE	(21*32+ 0) /* AMD LBR and PMC Freeze */
++#define X86_FEATURE_CLEAR_BHB_LOOP	(21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
++#define X86_FEATURE_BHI_CTRL		(21*32+ 2) /* "" BHI_DIS_S HW control available */
++#define X86_FEATURE_CLEAR_BHB_HW	(21*32+ 3) /* "" BHI_DIS_S HW control enabled */
++#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
++
+ /*
+  * BUG word(s)
+  */
+@@ -499,4 +515,5 @@
+ #define X86_BUG_SRSO			X86_BUG(1*32 + 0) /* AMD SRSO bug */
+ #define X86_BUG_DIV0			X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
+ #define X86_BUG_RFDS			X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
++#define X86_BUG_BHI			X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
+index 702d93fdd10e8..88fcf08458d9c 100644
+--- a/arch/x86/include/asm/disabled-features.h
++++ b/arch/x86/include/asm/disabled-features.h
+@@ -143,6 +143,7 @@
+ #define DISABLED_MASK18	(DISABLE_IBT)
+ #define DISABLED_MASK19	0
+ #define DISABLED_MASK20	0
+-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
++#define DISABLED_MASK21	0
++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
+ 
+ #endif /* _ASM_X86_DISABLED_FEATURES_H */
+diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
+index 5f1d3c421f686..cc9ccf61b6bd1 100644
+--- a/arch/x86/include/asm/init.h
++++ b/arch/x86/include/asm/init.h
+@@ -2,6 +2,8 @@
+ #ifndef _ASM_X86_INIT_H
+ #define _ASM_X86_INIT_H
+ 
++#define __head	__section(".head.text")
++
+ struct x86_mapping_info {
+ 	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+ 	void *context;			 /* context for alloc_pgt_page */
+diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
+index f4f5269846295..76081a34fc231 100644
+--- a/arch/x86/include/asm/mem_encrypt.h
++++ b/arch/x86/include/asm/mem_encrypt.h
+@@ -46,8 +46,8 @@ void __init sme_unmap_bootdata(char *real_mode_data);
+ void __init sme_early_init(void);
+ void __init sev_setup_arch(void);
+ 
+-void __init sme_encrypt_kernel(struct boot_params *bp);
+-void __init sme_enable(struct boot_params *bp);
++void sme_encrypt_kernel(struct boot_params *bp);
++void sme_enable(struct boot_params *bp);
+ 
+ int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
+ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+@@ -81,8 +81,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
+ static inline void __init sme_early_init(void) { }
+ static inline void __init sev_setup_arch(void) { }
+ 
+-static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
+-static inline void __init sme_enable(struct boot_params *bp) { }
++static inline void sme_encrypt_kernel(struct boot_params *bp) { }
++static inline void sme_enable(struct boot_params *bp) { }
+ 
+ static inline void sev_es_init_vc_handling(void) { }
+ 
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index c75cc5610be30..621bac6b74011 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -50,10 +50,13 @@
+ #define SPEC_CTRL_SSBD			BIT(SPEC_CTRL_SSBD_SHIFT)	/* Speculative Store Bypass Disable */
+ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT	6	   /* Disable RRSBA behavior */
+ #define SPEC_CTRL_RRSBA_DIS_S		BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
++#define SPEC_CTRL_BHI_DIS_S_SHIFT	10	   /* Disable Branch History Injection behavior */
++#define SPEC_CTRL_BHI_DIS_S		BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
+ 
+ /* A mask for bits which the kernel toggles when controlling mitigations */
+ #define SPEC_CTRL_MITIGATIONS_MASK	(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
+-							| SPEC_CTRL_RRSBA_DIS_S)
++							| SPEC_CTRL_RRSBA_DIS_S \
++							| SPEC_CTRL_BHI_DIS_S)
+ 
+ #define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB			BIT(0)	   /* Indirect Branch Prediction Barrier */
+@@ -152,6 +155,10 @@
+ 						 * are restricted to targets in
+ 						 * kernel.
+ 						 */
++#define ARCH_CAP_BHI_NO			BIT(20)	/*
++						 * CPU is not affected by Branch
++						 * History Injection.
++						 */
+ #define ARCH_CAP_PBRSB_NO		BIT(24)	/*
+ 						 * Not susceptible to Post-Barrier
+ 						 * Return Stack Buffer Predictions.
+diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
+index 8ae2cb30ade3d..a8781c8763b44 100644
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -271,11 +271,20 @@
+ .Lskip_rsb_\@:
+ .endm
+ 
++/*
++ * The CALL to srso_alias_untrain_ret() must be patched in directly at
++ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
++ * must be the target of a CALL instruction instead of indirectly
++ * jumping to a wrapper which then calls it. Therefore, this macro is
++ * called outside of __UNTRAIN_RET below, for the time being, before the
++ * kernel can support nested alternatives with arbitrary nesting.
++ */
++.macro CALL_UNTRAIN_RET
+ #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
+-#define CALL_UNTRAIN_RET	"call entry_untrain_ret"
+-#else
+-#define CALL_UNTRAIN_RET	""
++	ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
++		          "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+ #endif
++.endm
+ 
+ /*
+  * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+@@ -288,38 +297,24 @@
+  * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+  * where we have a stack but before any RET instruction.
+  */
+-.macro UNTRAIN_RET
+-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+-	defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
++.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
++#if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY)
+ 	VALIDATE_UNRET_END
+-	ALTERNATIVE_3 "",						\
+-		      CALL_UNTRAIN_RET, X86_FEATURE_UNRET,		\
+-		      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,	\
+-		      __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
++	CALL_UNTRAIN_RET
++	ALTERNATIVE_2 "",						\
++		      "call entry_ibpb", \ibpb_feature,			\
++		     __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
+ #endif
+ .endm
+ 
+-.macro UNTRAIN_RET_VM
+-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+-	defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+-	VALIDATE_UNRET_END
+-	ALTERNATIVE_3 "",						\
+-		      CALL_UNTRAIN_RET, X86_FEATURE_UNRET,		\
+-		      "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT,	\
+-		      __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+-#endif
+-.endm
++#define UNTRAIN_RET \
++	__UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)
+ 
+-.macro UNTRAIN_RET_FROM_CALL
+-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+-	defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+-	VALIDATE_UNRET_END
+-	ALTERNATIVE_3 "",						\
+-		      CALL_UNTRAIN_RET, X86_FEATURE_UNRET,		\
+-		      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,	\
+-		      __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
+-#endif
+-.endm
++#define UNTRAIN_RET_VM \
++	__UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)
++
++#define UNTRAIN_RET_FROM_CALL \
++	__UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)
+ 
+ 
+ .macro CALL_DEPTH_ACCOUNT
+@@ -340,6 +335,19 @@
+ 	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+ .endm
+ 
++#ifdef CONFIG_X86_64
++.macro CLEAR_BRANCH_HISTORY
++	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
++.endm
++
++.macro CLEAR_BRANCH_HISTORY_VMEXIT
++	ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
++.endm
++#else
++#define CLEAR_BRANCH_HISTORY
++#define CLEAR_BRANCH_HISTORY_VMEXIT
++#endif
++
+ #else /* __ASSEMBLY__ */
+ 
+ #define ANNOTATE_RETPOLINE_SAFE					\
+@@ -359,6 +367,22 @@ extern void __x86_return_thunk(void);
+ static inline void __x86_return_thunk(void) {}
+ #endif
+ 
++#ifdef CONFIG_CPU_UNRET_ENTRY
++extern void retbleed_return_thunk(void);
++#else
++static inline void retbleed_return_thunk(void) {}
++#endif
++
++extern void srso_alias_untrain_ret(void);
++
++#ifdef CONFIG_CPU_SRSO
++extern void srso_return_thunk(void);
++extern void srso_alias_return_thunk(void);
++#else
++static inline void srso_return_thunk(void) {}
++static inline void srso_alias_return_thunk(void) {}
++#endif
++
+ extern void retbleed_return_thunk(void);
+ extern void srso_return_thunk(void);
+ extern void srso_alias_return_thunk(void);
+@@ -370,6 +394,10 @@ extern void srso_alias_untrain_ret(void);
+ extern void entry_untrain_ret(void);
+ extern void entry_ibpb(void);
+ 
++#ifdef CONFIG_X86_64
++extern void clear_bhb_loop(void);
++#endif
++
+ extern void (*x86_return_thunk)(void);
+ 
+ #ifdef CONFIG_CALL_DEPTH_TRACKING
+diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
+index 7ba1726b71c7b..e9187ddd3d1fd 100644
+--- a/arch/x86/include/asm/required-features.h
++++ b/arch/x86/include/asm/required-features.h
+@@ -99,6 +99,7 @@
+ #define REQUIRED_MASK18	0
+ #define REQUIRED_MASK19	0
+ #define REQUIRED_MASK20	0
+-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
++#define REQUIRED_MASK21	0
++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
+ 
+ #endif /* _ASM_X86_REQUIRED_FEATURES_H */
+diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
+index 36f905797075e..75a5388d40681 100644
+--- a/arch/x86/include/asm/sev.h
++++ b/arch/x86/include/asm/sev.h
+@@ -199,15 +199,15 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
+ struct snp_guest_request_ioctl;
+ 
+ void setup_ghcb(void);
+-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+-					 unsigned long npages);
+-void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
+-					unsigned long npages);
++void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
++				  unsigned long npages);
++void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
++				 unsigned long npages);
+ void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
+ void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
+ void snp_set_wakeup_secondary_cpu(void);
+ bool snp_init(struct boot_params *bp);
+-void __init __noreturn snp_abort(void);
++void __noreturn snp_abort(void);
+ void snp_dmi_setup(void);
+ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
+ void snp_accept_memory(phys_addr_t start, phys_addr_t end);
+diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
+index 4fb36fba4b5a1..03bb950eba690 100644
+--- a/arch/x86/include/asm/syscall.h
++++ b/arch/x86/include/asm/syscall.h
+@@ -16,19 +16,17 @@
+ #include <asm/thread_info.h>	/* for TS_COMPAT */
+ #include <asm/unistd.h>
+ 
++/* This is used purely for kernel/trace/trace_syscalls.c */
+ typedef long (*sys_call_ptr_t)(const struct pt_regs *);
+ extern const sys_call_ptr_t sys_call_table[];
+ 
+-#if defined(CONFIG_X86_32)
+-#define ia32_sys_call_table sys_call_table
+-#else
+ /*
+  * These may not exist, but still put the prototypes in so we
+  * can use IS_ENABLED().
+  */
+-extern const sys_call_ptr_t ia32_sys_call_table[];
+-extern const sys_call_ptr_t x32_sys_call_table[];
+-#endif
++extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
++extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
++extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
+ 
+ /*
+  * Only the low 32 bits of orig_ax are meaningful, so we return int.
+@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
+ }
+ 
+ void do_syscall_64(struct pt_regs *regs, int nr);
++void do_int80_emulation(struct pt_regs *regs);
+ 
+ #endif	/* CONFIG_X86_32 */
+ 
+diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
+index 01d19fc223463..eeea058cf6028 100644
+--- a/arch/x86/include/uapi/asm/bootparam.h
++++ b/arch/x86/include/uapi/asm/bootparam.h
+@@ -38,6 +38,7 @@
+ #define XLF_EFI_KEXEC			(1<<4)
+ #define XLF_5LEVEL			(1<<5)
+ #define XLF_5LEVEL_ENABLED		(1<<6)
++#define XLF_MEM_ENCRYPTION		(1<<7)
+ 
+ #ifndef __ASSEMBLY__
+ 
+diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
+index 031bca974fbf3..9fd91022d92d0 100644
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -66,20 +66,6 @@ static const int amd_erratum_400[] =
+ static const int amd_erratum_383[] =
+ 	AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+ 
+-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+-static const int amd_erratum_1054[] =
+-	AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+-
+-static const int amd_zenbleed[] =
+-	AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
+-			   AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
+-			   AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
+-			   AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
+-
+-static const int amd_div0[] =
+-	AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+-			   AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+-
+ static const int amd_erratum_1485[] =
+ 	AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf),
+ 			   AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf));
+@@ -620,6 +606,49 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
+ 	}
+ 
+ 	resctrl_cpu_detect(c);
++
++	/* Figure out Zen generations: */
++	switch (c->x86) {
++	case 0x17: {
++		switch (c->x86_model) {
++		case 0x00 ... 0x2f:
++		case 0x50 ... 0x5f:
++			setup_force_cpu_cap(X86_FEATURE_ZEN1);
++			break;
++		case 0x30 ... 0x4f:
++		case 0x60 ... 0x7f:
++		case 0x90 ... 0x91:
++		case 0xa0 ... 0xaf:
++			setup_force_cpu_cap(X86_FEATURE_ZEN2);
++			break;
++		default:
++			goto warn;
++		}
++		break;
++	}
++	case 0x19: {
++		switch (c->x86_model) {
++		case 0x00 ... 0x0f:
++		case 0x20 ... 0x5f:
++			setup_force_cpu_cap(X86_FEATURE_ZEN3);
++			break;
++		case 0x10 ... 0x1f:
++		case 0x60 ... 0xaf:
++			setup_force_cpu_cap(X86_FEATURE_ZEN4);
++			break;
++		default:
++			goto warn;
++		}
++		break;
++	}
++	default:
++		break;
++	}
++
++	return;
++
++warn:
++	WARN_ONCE(1, "Family 0x%x, model: 0x%x??\n", c->x86, c->x86_model);
+ }
+ 
+ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
+@@ -945,6 +974,19 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
+ 	clear_rdrand_cpuid_bit(c);
+ }
+ 
++static void fix_erratum_1386(struct cpuinfo_x86 *c)
++{
++	/*
++	 * Work around Erratum 1386.  The XSAVES instruction malfunctions in
++	 * certain circumstances on Zen1/2 uarch, and not all parts have had
++	 * updated microcode at the time of writing (March 2023).
++	 *
++	 * Affected parts all have no supervisor XSAVE states, meaning that
++	 * the XSAVEC instruction (which works fine) is equivalent.
++	 */
++	clear_cpu_cap(c, X86_FEATURE_XSAVES);
++}
++
+ void init_spectral_chicken(struct cpuinfo_x86 *c)
+ {
+ #ifdef CONFIG_CPU_UNRET_ENTRY
+@@ -965,24 +1007,19 @@ void init_spectral_chicken(struct cpuinfo_x86 *c)
+ 		}
+ 	}
+ #endif
+-	/*
+-	 * Work around Erratum 1386.  The XSAVES instruction malfunctions in
+-	 * certain circumstances on Zen1/2 uarch, and not all parts have had
+-	 * updated microcode at the time of writing (March 2023).
+-	 *
+-	 * Affected parts all have no supervisor XSAVE states, meaning that
+-	 * the XSAVEC instruction (which works fine) is equivalent.
+-	 */
+-	clear_cpu_cap(c, X86_FEATURE_XSAVES);
+ }
+ 
+ static void init_amd_zn(struct cpuinfo_x86 *c)
+ {
+-	set_cpu_cap(c, X86_FEATURE_ZEN);
+-
++	setup_force_cpu_cap(X86_FEATURE_ZEN);
+ #ifdef CONFIG_NUMA
+ 	node_reclaim_distance = 32;
+ #endif
++}
++
++static void init_amd_zen1(struct cpuinfo_x86 *c)
++{
++	fix_erratum_1386(c);
+ 
+ 	/* Fix up CPUID bits, but only if not virtualised. */
+ 	if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
+@@ -999,6 +1036,9 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
+ 		if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
+ 			set_cpu_cap(c, X86_FEATURE_BTC_NO);
+ 	}
++
++	pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
++	setup_force_cpu_bug(X86_BUG_DIV0);
+ }
+ 
+ static bool cpu_has_zenbleed_microcode(void)
+@@ -1023,11 +1063,8 @@ static bool cpu_has_zenbleed_microcode(void)
+ 	return true;
+ }
+ 
+-static void zenbleed_check(struct cpuinfo_x86 *c)
++static void zen2_zenbleed_check(struct cpuinfo_x86 *c)
+ {
+-	if (!cpu_has_amd_erratum(c, amd_zenbleed))
+-		return;
+-
+ 	if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ 		return;
+ 
+@@ -1042,6 +1079,20 @@ static void zenbleed_check(struct cpuinfo_x86 *c)
+ 	}
+ }
+ 
++static void init_amd_zen2(struct cpuinfo_x86 *c)
++{
++	fix_erratum_1386(c);
++	zen2_zenbleed_check(c);
++}
++
++static void init_amd_zen3(struct cpuinfo_x86 *c)
++{
++}
++
++static void init_amd_zen4(struct cpuinfo_x86 *c)
++{
++}
++
+ static void init_amd(struct cpuinfo_x86 *c)
+ {
+ 	early_init_amd(c);
+@@ -1080,6 +1131,15 @@ static void init_amd(struct cpuinfo_x86 *c)
+ 	case 0x19: init_amd_zn(c); break;
+ 	}
+ 
++	if (boot_cpu_has(X86_FEATURE_ZEN1))
++		init_amd_zen1(c);
++	else if (boot_cpu_has(X86_FEATURE_ZEN2))
++		init_amd_zen2(c);
++	else if (boot_cpu_has(X86_FEATURE_ZEN3))
++		init_amd_zen3(c);
++	else if (boot_cpu_has(X86_FEATURE_ZEN4))
++		init_amd_zen4(c);
++
+ 	/*
+ 	 * Enable workaround for FXSAVE leak on CPUs
+ 	 * without a XSaveErPtr feature
+@@ -1131,7 +1191,7 @@ static void init_amd(struct cpuinfo_x86 *c)
+ 	 * Counter May Be Inaccurate".
+ 	 */
+ 	if (cpu_has(c, X86_FEATURE_IRPERF) &&
+-	    !cpu_has_amd_erratum(c, amd_erratum_1054))
++	    (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f))
+ 		msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
+ 
+ 	check_null_seg_clears_base(c);
+@@ -1147,13 +1207,6 @@ static void init_amd(struct cpuinfo_x86 *c)
+ 	    cpu_has(c, X86_FEATURE_AUTOIBRS))
+ 		WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
+ 
+-	zenbleed_check(c);
+-
+-	if (cpu_has_amd_erratum(c, amd_div0)) {
+-		pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+-		setup_force_cpu_bug(X86_BUG_DIV0);
+-	}
+-
+ 	if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ 	     cpu_has_amd_erratum(c, amd_erratum_1485))
+ 		msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+@@ -1313,7 +1366,7 @@ static void zenbleed_check_cpu(void *unused)
+ {
+ 	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ 
+-	zenbleed_check(c);
++	zen2_zenbleed_check(c);
+ }
+ 
+ void amd_check_microcode(void)
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 3452f7271d074..3fc2301556271 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -63,7 +63,7 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd);
+ 
+ static DEFINE_MUTEX(spec_ctrl_mutex);
+ 
+-void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
++void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
+ 
+ /* Update SPEC_CTRL MSR and its cached copy unconditionally */
+ static void update_spec_ctrl(u64 val)
+@@ -1108,8 +1108,7 @@ static void __init retbleed_select_mitigation(void)
+ 		setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ 		setup_force_cpu_cap(X86_FEATURE_UNRET);
+ 
+-		if (IS_ENABLED(CONFIG_RETHUNK))
+-			x86_return_thunk = retbleed_return_thunk;
++		x86_return_thunk = retbleed_return_thunk;
+ 
+ 		if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ 		    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+@@ -1607,6 +1606,79 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
+ 	dump_stack();
+ }
+ 
++/*
++ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
++ * branch history in userspace. Not needed if BHI_NO is set.
++ */
++static bool __init spec_ctrl_bhi_dis(void)
++{
++	if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
++		return false;
++
++	x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
++	update_spec_ctrl(x86_spec_ctrl_base);
++	setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
++
++	return true;
++}
++
++enum bhi_mitigations {
++	BHI_MITIGATION_OFF,
++	BHI_MITIGATION_ON,
++	BHI_MITIGATION_AUTO,
++};
++
++static enum bhi_mitigations bhi_mitigation __ro_after_init =
++	IS_ENABLED(CONFIG_SPECTRE_BHI_ON)  ? BHI_MITIGATION_ON  :
++	IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF :
++					     BHI_MITIGATION_AUTO;
++
++static int __init spectre_bhi_parse_cmdline(char *str)
++{
++	if (!str)
++		return -EINVAL;
++
++	if (!strcmp(str, "off"))
++		bhi_mitigation = BHI_MITIGATION_OFF;
++	else if (!strcmp(str, "on"))
++		bhi_mitigation = BHI_MITIGATION_ON;
++	else if (!strcmp(str, "auto"))
++		bhi_mitigation = BHI_MITIGATION_AUTO;
++	else
++		pr_err("Ignoring unknown spectre_bhi option (%s)", str);
++
++	return 0;
++}
++early_param("spectre_bhi", spectre_bhi_parse_cmdline);
++
++static void __init bhi_select_mitigation(void)
++{
++	if (bhi_mitigation == BHI_MITIGATION_OFF)
++		return;
++
++	/* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
++	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
++	    !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
++		return;
++
++	if (spec_ctrl_bhi_dis())
++		return;
++
++	if (!IS_ENABLED(CONFIG_X86_64))
++		return;
++
++	/* Mitigate KVM by default */
++	setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
++	pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
++
++	if (bhi_mitigation == BHI_MITIGATION_AUTO)
++		return;
++
++	/* Mitigate syscalls when the mitigation is forced =on */
++	setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
++	pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
++}
++
+ static void __init spectre_v2_select_mitigation(void)
+ {
+ 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+@@ -1718,6 +1790,9 @@ static void __init spectre_v2_select_mitigation(void)
+ 	    mode == SPECTRE_V2_RETPOLINE)
+ 		spec_ctrl_disable_kernel_rrsba();
+ 
++	if (boot_cpu_has(X86_BUG_BHI))
++		bhi_select_mitigation();
++
+ 	spectre_v2_enabled = mode;
+ 	pr_info("%s\n", spectre_v2_strings[mode]);
+ 
+@@ -2695,15 +2770,15 @@ static char *stibp_state(void)
+ 
+ 	switch (spectre_v2_user_stibp) {
+ 	case SPECTRE_V2_USER_NONE:
+-		return ", STIBP: disabled";
++		return "; STIBP: disabled";
+ 	case SPECTRE_V2_USER_STRICT:
+-		return ", STIBP: forced";
++		return "; STIBP: forced";
+ 	case SPECTRE_V2_USER_STRICT_PREFERRED:
+-		return ", STIBP: always-on";
++		return "; STIBP: always-on";
+ 	case SPECTRE_V2_USER_PRCTL:
+ 	case SPECTRE_V2_USER_SECCOMP:
+ 		if (static_key_enabled(&switch_to_cond_stibp))
+-			return ", STIBP: conditional";
++			return "; STIBP: conditional";
+ 	}
+ 	return "";
+ }
+@@ -2712,10 +2787,10 @@ static char *ibpb_state(void)
+ {
+ 	if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ 		if (static_key_enabled(&switch_mm_always_ibpb))
+-			return ", IBPB: always-on";
++			return "; IBPB: always-on";
+ 		if (static_key_enabled(&switch_mm_cond_ibpb))
+-			return ", IBPB: conditional";
+-		return ", IBPB: disabled";
++			return "; IBPB: conditional";
++		return "; IBPB: disabled";
+ 	}
+ 	return "";
+ }
+@@ -2725,14 +2800,31 @@ static char *pbrsb_eibrs_state(void)
+ 	if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
+ 		if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
+ 		    boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
+-			return ", PBRSB-eIBRS: SW sequence";
++			return "; PBRSB-eIBRS: SW sequence";
+ 		else
+-			return ", PBRSB-eIBRS: Vulnerable";
++			return "; PBRSB-eIBRS: Vulnerable";
+ 	} else {
+-		return ", PBRSB-eIBRS: Not affected";
++		return "; PBRSB-eIBRS: Not affected";
+ 	}
+ }
+ 
++static const char * const spectre_bhi_state(void)
++{
++	if (!boot_cpu_has_bug(X86_BUG_BHI))
++		return "; BHI: Not affected";
++	else if  (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
++		return "; BHI: BHI_DIS_S";
++	else if  (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
++		return "; BHI: SW loop, KVM: SW loop";
++	else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
++		 !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
++		return "; BHI: Retpoline";
++	else if  (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
++		return "; BHI: Syscall hardening, KVM: SW loop";
++
++	return "; BHI: Vulnerable (Syscall hardening enabled)";
++}
++
+ static ssize_t spectre_v2_show_state(char *buf)
+ {
+ 	if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+@@ -2745,13 +2837,15 @@ static ssize_t spectre_v2_show_state(char *buf)
+ 	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ 		return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
+ 
+-	return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
++	return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
+ 			  spectre_v2_strings[spectre_v2_enabled],
+ 			  ibpb_state(),
+-			  boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
++			  boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
+ 			  stibp_state(),
+-			  boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
++			  boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
+ 			  pbrsb_eibrs_state(),
++			  spectre_bhi_state(),
++			  /* this should always be at the end */
+ 			  spectre_v2_module_string());
+ }
+ 
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index 73cfac3fc9c4c..fc4c9a7fb1e3d 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1165,6 +1165,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+ #define NO_SPECTRE_V2		BIT(8)
+ #define NO_MMIO			BIT(9)
+ #define NO_EIBRS_PBRSB		BIT(10)
++#define NO_BHI			BIT(11)
+ 
+ #define VULNWL(vendor, family, model, whitelist)	\
+ 	X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
+@@ -1227,18 +1228,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
+ 	VULNWL_INTEL(ATOM_TREMONT_D,		NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+ 
+ 	/* AMD Family 0xf - 0x12 */
+-	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+-	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+-	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+-	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
++	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
++	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
++	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
++	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ 
+ 	/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+-	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+-	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
++	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
++	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+ 
+ 	/* Zhaoxin Family 7 */
+-	VULNWL(CENTAUR,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+-	VULNWL(ZHAOXIN,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
++	VULNWL(CENTAUR,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
++	VULNWL(ZHAOXIN,	7, X86_MODEL_ANY,	NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+ 	{}
+ };
+ 
+@@ -1475,6 +1476,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+ 	if (vulnerable_to_rfds(ia32_cap))
+ 		setup_force_cpu_bug(X86_BUG_RFDS);
+ 
++	/* When virtualized, eIBRS could be hidden, assume vulnerable */
++	if (!(ia32_cap & ARCH_CAP_BHI_NO) &&
++	    !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
++	    (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
++	     boot_cpu_has(X86_FEATURE_HYPERVISOR)))
++		setup_force_cpu_bug(X86_BUG_BHI);
++
+ 	if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
+ 		return;
+ 
+diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
+index 20ab11aec60b8..e103c227acd3a 100644
+--- a/arch/x86/kernel/cpu/mce/core.c
++++ b/arch/x86/kernel/cpu/mce/core.c
+@@ -2468,12 +2468,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
+ 		return -EINVAL;
+ 
+ 	b = &per_cpu(mce_banks_array, s->id)[bank];
+-
+ 	if (!b->init)
+ 		return -ENODEV;
+ 
+ 	b->ctl = new;
++
++	mutex_lock(&mce_sysfs_mutex);
+ 	mce_restart();
++	mutex_unlock(&mce_sysfs_mutex);
+ 
+ 	return size;
+ }
+diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
+index 0dad49a09b7a9..af5aa2c754c22 100644
+--- a/arch/x86/kernel/cpu/scattered.c
++++ b/arch/x86/kernel/cpu/scattered.c
+@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
+ 	{ X86_FEATURE_EPB,		CPUID_ECX,  3, 0x00000006, 0 },
+ 	{ X86_FEATURE_INTEL_PPIN,	CPUID_EBX,  0, 0x00000007, 1 },
+ 	{ X86_FEATURE_RRSBA_CTRL,	CPUID_EDX,  2, 0x00000007, 2 },
++	{ X86_FEATURE_BHI_CTRL,		CPUID_EDX,  4, 0x00000007, 2 },
+ 	{ X86_FEATURE_CQM_LLC,		CPUID_EDX,  1, 0x0000000f, 0 },
+ 	{ X86_FEATURE_CQM_OCCUP_LLC,	CPUID_EDX,  0, 0x0000000f, 1 },
+ 	{ X86_FEATURE_CQM_MBM_TOTAL,	CPUID_EDX,  1, 0x0000000f, 1 },
+@@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = {
+ 	{ X86_FEATURE_BMEC,		CPUID_EBX,  3, 0x80000020, 0 },
+ 	{ X86_FEATURE_PERFMON_V2,	CPUID_EAX,  0, 0x80000022, 0 },
+ 	{ X86_FEATURE_AMD_LBR_V2,	CPUID_EAX,  1, 0x80000022, 0 },
++	{ X86_FEATURE_AMD_LBR_PMC_FREEZE,	CPUID_EAX,  2, 0x80000022, 0 },
+ 	{ 0, 0, 0, 0, 0 }
+ };
+ 
+diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
+index bbc21798df10e..c58213bce294e 100644
+--- a/arch/x86/kernel/head64.c
++++ b/arch/x86/kernel/head64.c
+@@ -41,6 +41,7 @@
+ #include <asm/trapnr.h>
+ #include <asm/sev.h>
+ #include <asm/tdx.h>
++#include <asm/init.h>
+ 
+ /*
+  * Manage page tables very early on.
+@@ -84,8 +85,6 @@ static struct desc_ptr startup_gdt_descr = {
+ 	.address = 0,
+ };
+ 
+-#define __head	__section(".head.text")
+-
+ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
+ {
+ 	return ptr - (void *)_text + (void *)physaddr;
+diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
+index 15c700d358700..b223922248e9f 100644
+--- a/arch/x86/kernel/mpparse.c
++++ b/arch/x86/kernel/mpparse.c
+@@ -196,12 +196,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
+ 	if (!smp_check_mpc(mpc, oem, str))
+ 		return 0;
+ 
+-	if (early) {
+-		/* Initialize the lapic mapping */
+-		if (!acpi_lapic)
+-			register_lapic_address(mpc->lapic);
++	/* Initialize the lapic mapping */
++	if (!acpi_lapic)
++		register_lapic_address(mpc->lapic);
++
++	if (early)
+ 		return 1;
+-	}
+ 
+ 	/* Now process the configuration blocks. */
+ 	while (count < mpc->length) {
+diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
+index e63a8d05ce298..eb129277dcdd6 100644
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -35,6 +35,7 @@
+ #include <asm/bios_ebda.h>
+ #include <asm/bugs.h>
+ #include <asm/cacheinfo.h>
++#include <asm/coco.h>
+ #include <asm/cpu.h>
+ #include <asm/efi.h>
+ #include <asm/gart.h>
+@@ -1120,6 +1121,7 @@ void __init setup_arch(char **cmdline_p)
+ 	 * memory size.
+ 	 */
+ 	sev_setup_arch();
++	cc_random_init();
+ 
+ 	efi_fake_memmap();
+ 	efi_find_mirror();
+diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
+index 466fe09898ccd..acbec4de3ec31 100644
+--- a/arch/x86/kernel/sev-shared.c
++++ b/arch/x86/kernel/sev-shared.c
+@@ -89,7 +89,8 @@ static bool __init sev_es_check_cpu_features(void)
+ 	return true;
+ }
+ 
+-static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
++static void __head __noreturn
++sev_es_terminate(unsigned int set, unsigned int reason)
+ {
+ 	u64 val = GHCB_MSR_TERM_REQ;
+ 
+@@ -326,13 +327,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid
+  */
+ static const struct snp_cpuid_table *snp_cpuid_get_table(void)
+ {
+-	void *ptr;
+-
+-	asm ("lea cpuid_table_copy(%%rip), %0"
+-	     : "=r" (ptr)
+-	     : "p" (&cpuid_table_copy));
+-
+-	return ptr;
++	return &RIP_REL_REF(cpuid_table_copy);
+ }
+ 
+ /*
+@@ -391,7 +386,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
+ 	return xsave_size;
+ }
+ 
+-static bool
++static bool __head
+ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
+ {
+ 	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+@@ -528,7 +523,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+  * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
+  * should be treated as fatal by caller.
+  */
+-static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
++static int __head
++snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+ {
+ 	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
+ 
+@@ -570,7 +566,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le
+  * page yet, so it only supports the MSR based communication with the
+  * hypervisor and only the CPUID exit-code.
+  */
+-void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
++void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+ {
+ 	unsigned int subfn = lower_bits(regs->cx, 32);
+ 	unsigned int fn = lower_bits(regs->ax, 32);
+@@ -1016,7 +1012,8 @@ struct cc_setup_data {
+  * Search for a Confidential Computing blob passed in as a setup_data entry
+  * via the Linux Boot Protocol.
+  */
+-static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
++static __head
++struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
+ {
+ 	struct cc_setup_data *sd = NULL;
+ 	struct setup_data *hdr;
+@@ -1043,7 +1040,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
+  * mapping needs to be updated in sync with all the changes to virtual memory
+  * layout and related mapping facilities throughout the boot process.
+  */
+-static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
++static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
+ {
+ 	const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
+ 	int i;
+diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
+index a8db68a063c46..9905dc0e0b096 100644
+--- a/arch/x86/kernel/sev.c
++++ b/arch/x86/kernel/sev.c
+@@ -26,6 +26,7 @@
+ #include <linux/dmi.h>
+ #include <uapi/linux/sev-guest.h>
+ 
++#include <asm/init.h>
+ #include <asm/cpu_entry_area.h>
+ #include <asm/stacktrace.h>
+ #include <asm/sev.h>
+@@ -683,8 +684,9 @@ static u64 __init get_jump_table_addr(void)
+ 	return ret;
+ }
+ 
+-static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+-				  unsigned long npages, enum psc_op op)
++static void __head
++early_set_pages_state(unsigned long vaddr, unsigned long paddr,
++		      unsigned long npages, enum psc_op op)
+ {
+ 	unsigned long paddr_end;
+ 	u64 val;
+@@ -740,7 +742,7 @@ static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+ 	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+ }
+ 
+-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
++void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
+ 					 unsigned long npages)
+ {
+ 	/*
+@@ -2045,7 +2047,7 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
+  *
+  * Scan for the blob in that order.
+  */
+-static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
++static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+ {
+ 	struct cc_blob_sev_info *cc_info;
+ 
+@@ -2071,7 +2073,7 @@ static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
+ 	return cc_info;
+ }
+ 
+-bool __init snp_init(struct boot_params *bp)
++bool __head snp_init(struct boot_params *bp)
+ {
+ 	struct cc_blob_sev_info *cc_info;
+ 
+@@ -2093,7 +2095,7 @@ bool __init snp_init(struct boot_params *bp)
+ 	return true;
+ }
+ 
+-void __init __noreturn snp_abort(void)
++void __head __noreturn snp_abort(void)
+ {
+ 	sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+ }
+diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
+index f15fb71f280e2..54a5596adaa61 100644
+--- a/arch/x86/kernel/vmlinux.lds.S
++++ b/arch/x86/kernel/vmlinux.lds.S
+@@ -139,10 +139,7 @@ SECTIONS
+ 		STATIC_CALL_TEXT
+ 
+ 		ALIGN_ENTRY_TEXT_BEGIN
+-#ifdef CONFIG_CPU_SRSO
+ 		*(.text..__x86.rethunk_untrain)
+-#endif
+-
+ 		ENTRY_TEXT
+ 
+ #ifdef CONFIG_CPU_SRSO
+@@ -520,12 +517,12 @@ INIT_PER_CPU(irq_stack_backing_store);
+            "fixed_percpu_data is not at start of per-cpu area");
+ #endif
+ 
+-#ifdef CONFIG_RETHUNK
++#ifdef CONFIG_CPU_UNRET_ENTRY
+ . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
+-. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+ #endif
+ 
+ #ifdef CONFIG_CPU_SRSO
++. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+ /*
+  * GNU ld cannot do XOR until 2.41.
+  * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index f7901cb4d2fa4..11c484d72eab2 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -3120,7 +3120,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
+ 	if (pud_none(pud) || !pud_present(pud))
+ 		goto out;
+ 
+-	if (pud_large(pud)) {
++	if (pud_leaf(pud)) {
+ 		level = PG_LEVEL_1G;
+ 		goto out;
+ 	}
+diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
+index aadefcaa9561d..2f4e155080bad 100644
+--- a/arch/x86/kvm/reverse_cpuid.h
++++ b/arch/x86/kvm/reverse_cpuid.h
+@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
+ #define X86_FEATURE_IPRED_CTRL		KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
+ #define KVM_X86_FEATURE_RRSBA_CTRL	KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
+ #define X86_FEATURE_DDPD_U		KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
+-#define X86_FEATURE_BHI_CTRL		KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
++#define KVM_X86_FEATURE_BHI_CTRL	KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+ #define X86_FEATURE_MCDT_NO		KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
+ 
+ /* CPUID level 0x80000007 (EDX). */
+@@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = {
+  */
+ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
+ {
++	BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
+ 	BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
+ 	BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
+ 	BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
+ 	BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
++	BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
+ 	BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
+ 	BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
+ }
+@@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
+ 	KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
+ 	KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
+ 	KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
++	KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
+ 	default:
+ 		return x86_feature;
+ 	}
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index e86231c3b8a54..c5845f31c34dc 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -84,9 +84,10 @@ struct enc_region {
+ };
+ 
+ /* Called with the sev_bitmap_lock held, or on shutdown  */
+-static int sev_flush_asids(int min_asid, int max_asid)
++static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
+ {
+-	int ret, asid, error = 0;
++	int ret, error = 0;
++	unsigned int asid;
+ 
+ 	/* Check if there are any ASIDs to reclaim before performing a flush */
+ 	asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
+@@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm)
+ }
+ 
+ /* Must be called with the sev_bitmap_lock held */
+-static bool __sev_recycle_asids(int min_asid, int max_asid)
++static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
+ {
+ 	if (sev_flush_asids(min_asid, max_asid))
+ 		return false;
+@@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
+ 
+ static int sev_asid_new(struct kvm_sev_info *sev)
+ {
+-	int asid, min_asid, max_asid, ret;
++	/*
++	 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
++	 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
++	 * Note: min ASID can end up larger than the max if basic SEV support is
++	 * effectively disabled by disallowing use of ASIDs for SEV guests.
++	 */
++	unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
++	unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
++	unsigned int asid;
+ 	bool retry = true;
++	int ret;
++
++	if (min_asid > max_asid)
++		return -ENOTTY;
+ 
+ 	WARN_ON(sev->misc_cg);
+ 	sev->misc_cg = get_current_misc_cg();
+@@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev)
+ 
+ 	mutex_lock(&sev_bitmap_lock);
+ 
+-	/*
+-	 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+-	 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
+-	 */
+-	min_asid = sev->es_active ? 1 : min_sev_asid;
+-	max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
+ again:
+ 	asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+ 	if (asid > max_asid) {
+@@ -187,7 +194,7 @@ static int sev_asid_new(struct kvm_sev_info *sev)
+ 	return ret;
+ }
+ 
+-static int sev_get_asid(struct kvm *kvm)
++static unsigned int sev_get_asid(struct kvm *kvm)
+ {
+ 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ 
+@@ -284,8 +291,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ 
+ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
+ {
++	unsigned int asid = sev_get_asid(kvm);
+ 	struct sev_data_activate activate;
+-	int asid = sev_get_asid(kvm);
+ 	int ret;
+ 
+ 	/* activate ASID on the given handle */
+@@ -2234,8 +2241,10 @@ void __init sev_hardware_setup(void)
+ 		goto out;
+ 	}
+ 
+-	sev_asid_count = max_sev_asid - min_sev_asid + 1;
+-	WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
++	if (min_sev_asid <= max_sev_asid) {
++		sev_asid_count = max_sev_asid - min_sev_asid + 1;
++		WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
++	}
+ 	sev_supported = true;
+ 
+ 	/* SEV-ES support requested? */
+@@ -2266,7 +2275,9 @@ void __init sev_hardware_setup(void)
+ out:
+ 	if (boot_cpu_has(X86_FEATURE_SEV))
+ 		pr_info("SEV %s (ASIDs %u - %u)\n",
+-			sev_supported ? "enabled" : "disabled",
++			sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
++								       "unusable" :
++								       "disabled",
+ 			min_sev_asid, max_sev_asid);
+ 	if (boot_cpu_has(X86_FEATURE_SEV_ES))
+ 		pr_info("SEV-ES %s (ASIDs %u - %u)\n",
+@@ -2314,7 +2325,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
+  */
+ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
+ {
+-	int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
++	unsigned int asid = sev_get_asid(vcpu->kvm);
+ 
+ 	/*
+ 	 * Note!  The address must be a kernel address, as regular page walk
+@@ -2632,7 +2643,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
+ void pre_sev_run(struct vcpu_svm *svm, int cpu)
+ {
+ 	struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
+-	int asid = sev_get_asid(svm->vcpu.kvm);
++	unsigned int asid = sev_get_asid(svm->vcpu.kvm);
+ 
+ 	/* Assign the asid allocated with this SEV guest */
+ 	svm->asid = asid;
+diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
+index 83843379813ee..b82e6ed4f0241 100644
+--- a/arch/x86/kvm/trace.h
++++ b/arch/x86/kvm/trace.h
+@@ -732,13 +732,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
+  * Tracepoint for nested #vmexit because of interrupt pending
+  */
+ TRACE_EVENT(kvm_invlpga,
+-	    TP_PROTO(__u64 rip, int asid, u64 address),
++	    TP_PROTO(__u64 rip, unsigned int asid, u64 address),
+ 	    TP_ARGS(rip, asid, address),
+ 
+ 	TP_STRUCT__entry(
+-		__field(	__u64,	rip	)
+-		__field(	int,	asid	)
+-		__field(	__u64,	address	)
++		__field(	__u64,		rip	)
++		__field(	unsigned int,	asid	)
++		__field(	__u64,		address	)
+ 	),
+ 
+ 	TP_fast_assign(
+@@ -747,7 +747,7 @@ TRACE_EVENT(kvm_invlpga,
+ 		__entry->address	=	address;
+ 	),
+ 
+-	TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
++	TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx",
+ 		  __entry->rip, __entry->asid, __entry->address)
+ );
+ 
+diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
+index 139960deb7362..9522d46567f81 100644
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
+ 
+ 	call vmx_spec_ctrl_restore_host
+ 
++	CLEAR_BRANCH_HISTORY_VMEXIT
++
+ 	/* Put return value in AX */
+ 	mov %_ASM_BX, %_ASM_AX
+ 
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 4aafd007964fe..4ed8a7dc05369 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
+ 	 ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
+ 	 ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
+ 	 ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
+-	 ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
++	 ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
+ 
+ static u64 kvm_get_arch_capabilities(void)
+ {
+diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
+index ea3a28e7b613c..f0dae4fb6d071 100644
+--- a/arch/x86/lib/Makefile
++++ b/arch/x86/lib/Makefile
+@@ -14,19 +14,6 @@ ifdef CONFIG_KCSAN
+ CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE)
+ endif
+ 
+-# Early boot use of cmdline; don't instrument it
+-ifdef CONFIG_AMD_MEM_ENCRYPT
+-KCOV_INSTRUMENT_cmdline.o := n
+-KASAN_SANITIZE_cmdline.o  := n
+-KCSAN_SANITIZE_cmdline.o  := n
+-
+-ifdef CONFIG_FUNCTION_TRACER
+-CFLAGS_REMOVE_cmdline.o = -pg
+-endif
+-
+-CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
+-endif
+-
+ inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
+ inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
+ quiet_cmd_inat_tables = GEN     $@
+diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
+index cd86aeb5fdd3e..ffa51f392e17a 100644
+--- a/arch/x86/lib/retpoline.S
++++ b/arch/x86/lib/retpoline.S
+@@ -126,12 +126,13 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
+ #include <asm/GEN-for-each-reg.h>
+ #undef GEN
+ #endif
+-/*
+- * This function name is magical and is used by -mfunction-return=thunk-extern
+- * for the compiler to generate JMPs to it.
+- */
++
+ #ifdef CONFIG_RETHUNK
+ 
++	.section .text..__x86.return_thunk
++
++#ifdef CONFIG_CPU_SRSO
++
+ /*
+  * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
+  * special addresses:
+@@ -147,9 +148,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
+  *
+  * As a result, srso_alias_safe_ret() becomes a safe return.
+  */
+-#ifdef CONFIG_CPU_SRSO
+-	.section .text..__x86.rethunk_untrain
+-
++	.pushsection .text..__x86.rethunk_untrain
+ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ 	UNWIND_HINT_FUNC
+ 	ANNOTATE_NOENDBR
+@@ -158,17 +157,9 @@ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ 	jmp srso_alias_return_thunk
+ SYM_FUNC_END(srso_alias_untrain_ret)
+ __EXPORT_THUNK(srso_alias_untrain_ret)
++	.popsection
+ 
+-	.section .text..__x86.rethunk_safe
+-#else
+-/* dummy definition for alternatives */
+-SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+-	ANNOTATE_UNRET_SAFE
+-	ret
+-	int3
+-SYM_FUNC_END(srso_alias_untrain_ret)
+-#endif
+-
++	.pushsection .text..__x86.rethunk_safe
+ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ 	lea 8(%_ASM_SP), %_ASM_SP
+ 	UNWIND_HINT_FUNC
+@@ -177,14 +168,69 @@ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ 	int3
+ SYM_FUNC_END(srso_alias_safe_ret)
+ 
+-	.section .text..__x86.return_thunk
+-
+-SYM_CODE_START(srso_alias_return_thunk)
++SYM_CODE_START_NOALIGN(srso_alias_return_thunk)
+ 	UNWIND_HINT_FUNC
+ 	ANNOTATE_NOENDBR
+ 	call srso_alias_safe_ret
+ 	ud2
+ SYM_CODE_END(srso_alias_return_thunk)
++	.popsection
++
++/*
++ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
++ * above. On kernel entry, srso_untrain_ret() is executed which is a
++ *
++ * movabs $0xccccc30824648d48,%rax
++ *
++ * and when the return thunk executes the inner label srso_safe_ret()
++ * later, it is a stack manipulation and a RET which is mispredicted and
++ * thus a "safe" one to use.
++ */
++	.align 64
++	.skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
++SYM_START(srso_untrain_ret, SYM_L_LOCAL, SYM_A_NONE)
++	ANNOTATE_NOENDBR
++	.byte 0x48, 0xb8
++
++/*
++ * This forces the function return instruction to speculate into a trap
++ * (UD2 in srso_return_thunk() below).  This RET will then mispredict
++ * and execution will continue at the return site read from the top of
++ * the stack.
++ */
++SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
++	lea 8(%_ASM_SP), %_ASM_SP
++	ret
++	int3
++	int3
++	/* end of movabs */
++	lfence
++	call srso_safe_ret
++	ud2
++SYM_CODE_END(srso_safe_ret)
++SYM_FUNC_END(srso_untrain_ret)
++
++SYM_CODE_START(srso_return_thunk)
++	UNWIND_HINT_FUNC
++	ANNOTATE_NOENDBR
++	call srso_safe_ret
++	ud2
++SYM_CODE_END(srso_return_thunk)
++
++#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret"
++#else /* !CONFIG_CPU_SRSO */
++#define JMP_SRSO_UNTRAIN_RET "ud2"
++/* Dummy for the alternative in CALL_UNTRAIN_RET. */
++SYM_CODE_START(srso_alias_untrain_ret)
++	ANNOTATE_UNRET_SAFE
++	ANNOTATE_NOENDBR
++	ret
++	int3
++SYM_FUNC_END(srso_alias_untrain_ret)
++__EXPORT_THUNK(srso_alias_untrain_ret)
++#endif /* CONFIG_CPU_SRSO */
++
++#ifdef CONFIG_CPU_UNRET_ENTRY
+ 
+ /*
+  * Some generic notes on the untraining sequences:
+@@ -266,65 +312,19 @@ SYM_CODE_END(retbleed_return_thunk)
+ SYM_FUNC_END(retbleed_untrain_ret)
+ __EXPORT_THUNK(retbleed_untrain_ret)
+ 
+-/*
+- * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
+- * above. On kernel entry, srso_untrain_ret() is executed which is a
+- *
+- * movabs $0xccccc30824648d48,%rax
+- *
+- * and when the return thunk executes the inner label srso_safe_ret()
+- * later, it is a stack manipulation and a RET which is mispredicted and
+- * thus a "safe" one to use.
+- */
+-	.align 64
+-	.skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
+-SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+-	ANNOTATE_NOENDBR
+-	.byte 0x48, 0xb8
+-
+-/*
+- * This forces the function return instruction to speculate into a trap
+- * (UD2 in srso_return_thunk() below).  This RET will then mispredict
+- * and execution will continue at the return site read from the top of
+- * the stack.
+- */
+-SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+-	lea 8(%_ASM_SP), %_ASM_SP
+-	ret
+-	int3
+-	int3
+-	/* end of movabs */
+-	lfence
+-	call srso_safe_ret
+-	ud2
+-SYM_CODE_END(srso_safe_ret)
+-SYM_FUNC_END(srso_untrain_ret)
+-__EXPORT_THUNK(srso_untrain_ret)
++#define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret"
++#else /* !CONFIG_CPU_UNRET_ENTRY */
++#define JMP_RETBLEED_UNTRAIN_RET "ud2"
++#endif /* CONFIG_CPU_UNRET_ENTRY */
+ 
+-SYM_CODE_START(srso_return_thunk)
+-	UNWIND_HINT_FUNC
+-	ANNOTATE_NOENDBR
+-	call srso_safe_ret
+-	ud2
+-SYM_CODE_END(srso_return_thunk)
++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
+ 
+ SYM_FUNC_START(entry_untrain_ret)
+-	ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
+-		      "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
+-		      "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
++	ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO
+ SYM_FUNC_END(entry_untrain_ret)
+ __EXPORT_THUNK(entry_untrain_ret)
+ 
+-SYM_CODE_START(__x86_return_thunk)
+-	UNWIND_HINT_FUNC
+-	ANNOTATE_NOENDBR
+-	ANNOTATE_UNRET_SAFE
+-	ret
+-	int3
+-SYM_CODE_END(__x86_return_thunk)
+-EXPORT_SYMBOL(__x86_return_thunk)
+-
+-#endif /* CONFIG_RETHUNK */
++#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */
+ 
+ #ifdef CONFIG_CALL_DEPTH_TRACKING
+ 
+@@ -359,3 +359,22 @@ SYM_FUNC_START(__x86_return_skl)
+ SYM_FUNC_END(__x86_return_skl)
+ 
+ #endif /* CONFIG_CALL_DEPTH_TRACKING */
++
++/*
++ * This function name is magical and is used by -mfunction-return=thunk-extern
++ * for the compiler to generate JMPs to it.
++ *
++ * This code is only used during kernel boot or module init.  All
++ * 'JMP __x86_return_thunk' sites are changed to something else by
++ * apply_returns().
++ */
++SYM_CODE_START(__x86_return_thunk)
++	UNWIND_HINT_FUNC
++	ANNOTATE_NOENDBR
++	ANNOTATE_UNRET_SAFE
++	ret
++	int3
++SYM_CODE_END(__x86_return_thunk)
++EXPORT_SYMBOL(__x86_return_thunk)
++
++#endif /* CONFIG_RETHUNK */
+diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
+index a9d69ec994b75..e238517968836 100644
+--- a/arch/x86/mm/fault.c
++++ b/arch/x86/mm/fault.c
+@@ -376,7 +376,7 @@ static void dump_pagetable(unsigned long address)
+ 		goto bad;
+ 
+ 	pr_cont("PUD %lx ", pud_val(*pud));
+-	if (!pud_present(*pud) || pud_large(*pud))
++	if (!pud_present(*pud) || pud_leaf(*pud))
+ 		goto out;
+ 
+ 	pmd = pmd_offset(pud, address);
+@@ -1037,7 +1037,7 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address)
+ 	if (!pud_present(*pud))
+ 		return 0;
+ 
+-	if (pud_large(*pud))
++	if (pud_leaf(*pud))
+ 		return spurious_kernel_fault_check(error_code, (pte_t *) pud);
+ 
+ 	pmd = pmd_offset(pud, address);
+diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
+index f50cc210a9818..968d7005f4a72 100644
+--- a/arch/x86/mm/ident_map.c
++++ b/arch/x86/mm/ident_map.c
+@@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+ 	for (; addr < end; addr = next) {
+ 		pud_t *pud = pud_page + pud_index(addr);
+ 		pmd_t *pmd;
+-		bool use_gbpage;
+ 
+ 		next = (addr & PUD_MASK) + PUD_SIZE;
+ 		if (next > end)
+ 			next = end;
+ 
+-		/* if this is already a gbpage, this portion is already mapped */
+-		if (pud_large(*pud))
+-			continue;
+-
+-		/* Is using a gbpage allowed? */
+-		use_gbpage = info->direct_gbpages;
+-
+-		/* Don't use gbpage if it maps more than the requested region. */
+-		/* at the begining: */
+-		use_gbpage &= ((addr & ~PUD_MASK) == 0);
+-		/* ... or at the end: */
+-		use_gbpage &= ((next & ~PUD_MASK) == 0);
+-
+-		/* Never overwrite existing mappings */
+-		use_gbpage &= !pud_present(*pud);
+-
+-		if (use_gbpage) {
++		if (info->direct_gbpages) {
+ 			pud_t pudval;
+ 
++			if (pud_present(*pud))
++				continue;
++
++			addr &= PUD_MASK;
+ 			pudval = __pud((addr - info->offset) | info->page_flag);
+ 			set_pud(pud, pudval);
+ 			continue;
+diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
+index a190aae8ceaf7..19d209b412d7a 100644
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -617,7 +617,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
+ 		}
+ 
+ 		if (!pud_none(*pud)) {
+-			if (!pud_large(*pud)) {
++			if (!pud_leaf(*pud)) {
+ 				pmd = pmd_offset(pud, 0);
+ 				paddr_last = phys_pmd_init(pmd, paddr,
+ 							   paddr_end,
+@@ -1163,7 +1163,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
+ 		if (!pud_present(*pud))
+ 			continue;
+ 
+-		if (pud_large(*pud) &&
++		if (pud_leaf(*pud) &&
+ 		    IS_ALIGNED(addr, PUD_SIZE) &&
+ 		    IS_ALIGNED(next, PUD_SIZE)) {
+ 			spin_lock(&init_mm.page_table_lock);
+diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
+index 0302491d799d1..fcf508c52bdc5 100644
+--- a/arch/x86/mm/kasan_init_64.c
++++ b/arch/x86/mm/kasan_init_64.c
+@@ -115,7 +115,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
+ 	pud = pud_offset(p4d, addr);
+ 	do {
+ 		next = pud_addr_end(addr, end);
+-		if (!pud_large(*pud))
++		if (!pud_leaf(*pud))
+ 			kasan_populate_pud(pud, addr, next, nid);
+ 	} while (pud++, addr = next, addr != end);
+ }
+diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
+index 0166ab1780ccb..cc47a818a640a 100644
+--- a/arch/x86/mm/mem_encrypt_identity.c
++++ b/arch/x86/mm/mem_encrypt_identity.c
+@@ -41,9 +41,9 @@
+ #include <linux/mem_encrypt.h>
+ #include <linux/cc_platform.h>
+ 
++#include <asm/init.h>
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+-#include <asm/cmdline.h>
+ #include <asm/coco.h>
+ #include <asm/sev.h>
+ 
+@@ -95,10 +95,7 @@ struct sme_populate_pgd_data {
+  */
+ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
+ 
+-static char sme_cmdline_arg[] __initdata = "mem_encrypt";
+-static char sme_cmdline_on[]  __initdata = "on";
+-
+-static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
++static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+ {
+ 	unsigned long pgd_start, pgd_end, pgd_size;
+ 	pgd_t *pgd_p;
+@@ -113,7 +110,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+ 	memset(pgd_p, 0, pgd_size);
+ }
+ 
+-static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
++static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+ {
+ 	pgd_t *pgd;
+ 	p4d_t *p4d;
+@@ -144,13 +141,13 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+ 		set_pud(pud, __pud(PUD_FLAGS | __pa(pmd)));
+ 	}
+ 
+-	if (pud_large(*pud))
++	if (pud_leaf(*pud))
+ 		return NULL;
+ 
+ 	return pud;
+ }
+ 
+-static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
++static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+ {
+ 	pud_t *pud;
+ 	pmd_t *pmd;
+@@ -166,7 +163,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+ 	set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
+ }
+ 
+-static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
++static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+ {
+ 	pud_t *pud;
+ 	pmd_t *pmd;
+@@ -192,7 +189,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+ 		set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
+ }
+ 
+-static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
++static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+ {
+ 	while (ppd->vaddr < ppd->vaddr_end) {
+ 		sme_populate_pgd_large(ppd);
+@@ -202,7 +199,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+ 	}
+ }
+ 
+-static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
++static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+ {
+ 	while (ppd->vaddr < ppd->vaddr_end) {
+ 		sme_populate_pgd(ppd);
+@@ -212,7 +209,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+ 	}
+ }
+ 
+-static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
++static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
+ 				   pmdval_t pmd_flags, pteval_t pte_flags)
+ {
+ 	unsigned long vaddr_end;
+@@ -236,22 +233,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+ 	__sme_map_range_pte(ppd);
+ }
+ 
+-static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
++static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+ {
+ 	__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
+ }
+ 
+-static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
++static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+ {
+ 	__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
+ }
+ 
+-static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
++static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+ {
+ 	__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
+ }
+ 
+-static unsigned long __init sme_pgtable_calc(unsigned long len)
++static unsigned long __head sme_pgtable_calc(unsigned long len)
+ {
+ 	unsigned long entries = 0, tables = 0;
+ 
+@@ -288,7 +285,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
+ 	return entries + tables;
+ }
+ 
+-void __init sme_encrypt_kernel(struct boot_params *bp)
++void __head sme_encrypt_kernel(struct boot_params *bp)
+ {
+ 	unsigned long workarea_start, workarea_end, workarea_len;
+ 	unsigned long execute_start, execute_end, execute_len;
+@@ -323,9 +320,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
+ 	 *     memory from being cached.
+ 	 */
+ 
+-	/* Physical addresses gives us the identity mapped virtual addresses */
+-	kernel_start = __pa_symbol(_text);
+-	kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE);
++	kernel_start = (unsigned long)RIP_REL_REF(_text);
++	kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE);
+ 	kernel_len = kernel_end - kernel_start;
+ 
+ 	initrd_start = 0;
+@@ -342,14 +338,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
+ 	}
+ #endif
+ 
+-	/*
+-	 * We're running identity mapped, so we must obtain the address to the
+-	 * SME encryption workarea using rip-relative addressing.
+-	 */
+-	asm ("lea sme_workarea(%%rip), %0"
+-	     : "=r" (workarea_start)
+-	     : "p" (sme_workarea));
+-
+ 	/*
+ 	 * Calculate required number of workarea bytes needed:
+ 	 *   executable encryption area size:
+@@ -359,7 +347,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
+ 	 *   pagetable structures for the encryption of the kernel
+ 	 *   pagetable structures for workarea (in case not currently mapped)
+ 	 */
+-	execute_start = workarea_start;
++	execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea);
+ 	execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
+ 	execute_len = execute_end - execute_start;
+ 
+@@ -502,13 +490,11 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
+ 	native_write_cr3(__native_read_cr3());
+ }
+ 
+-void __init sme_enable(struct boot_params *bp)
++void __head sme_enable(struct boot_params *bp)
+ {
+-	const char *cmdline_ptr, *cmdline_arg, *cmdline_on;
+ 	unsigned int eax, ebx, ecx, edx;
+ 	unsigned long feature_mask;
+ 	unsigned long me_mask;
+-	char buffer[16];
+ 	bool snp;
+ 	u64 msr;
+ 
+@@ -551,6 +537,9 @@ void __init sme_enable(struct boot_params *bp)
+ 
+ 	/* Check if memory encryption is enabled */
+ 	if (feature_mask == AMD_SME_BIT) {
++		if (!(bp->hdr.xloadflags & XLF_MEM_ENCRYPTION))
++			return;
++
+ 		/*
+ 		 * No SME if Hypervisor bit is set. This check is here to
+ 		 * prevent a guest from trying to enable SME. For running as a
+@@ -570,31 +559,8 @@ void __init sme_enable(struct boot_params *bp)
+ 		msr = __rdmsr(MSR_AMD64_SYSCFG);
+ 		if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
+ 			return;
+-	} else {
+-		/* SEV state cannot be controlled by a command line option */
+-		goto out;
+ 	}
+ 
+-	/*
+-	 * Fixups have not been applied to phys_base yet and we're running
+-	 * identity mapped, so we must obtain the address to the SME command
+-	 * line argument data using rip-relative addressing.
+-	 */
+-	asm ("lea sme_cmdline_arg(%%rip), %0"
+-	     : "=r" (cmdline_arg)
+-	     : "p" (sme_cmdline_arg));
+-	asm ("lea sme_cmdline_on(%%rip), %0"
+-	     : "=r" (cmdline_on)
+-	     : "p" (sme_cmdline_on));
+-
+-	cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+-				     ((u64)bp->ext_cmd_line_ptr << 32));
+-
+-	if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 ||
+-	    strncmp(buffer, cmdline_on, sizeof(buffer)))
+-		return;
+-
+-out:
+ 	RIP_REL_REF(sme_me_mask) = me_mask;
+ 	physical_mask &= ~me_mask;
+ 	cc_vendor = CC_VENDOR_AMD;
+diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
+index de10800cd4dd4..e7b9ac63bb02a 100644
+--- a/arch/x86/mm/pat/memtype.c
++++ b/arch/x86/mm/pat/memtype.c
+@@ -950,6 +950,38 @@ static void free_pfn_range(u64 paddr, unsigned long size)
+ 		memtype_free(paddr, paddr + size);
+ }
+ 
++static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
++		pgprot_t *pgprot)
++{
++	unsigned long prot;
++
++	VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
++
++	/*
++	 * We need the starting PFN and cachemode used for track_pfn_remap()
++	 * that covered the whole VMA. For most mappings, we can obtain that
++	 * information from the page tables. For COW mappings, we might now
++	 * suddenly have anon folios mapped and follow_phys() will fail.
++	 *
++	 * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
++	 * detect the PFN. If we need the cachemode as well, we're out of luck
++	 * for now and have to fail fork().
++	 */
++	if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
++		if (pgprot)
++			*pgprot = __pgprot(prot);
++		return 0;
++	}
++	if (is_cow_mapping(vma->vm_flags)) {
++		if (pgprot)
++			return -EINVAL;
++		*paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
++		return 0;
++	}
++	WARN_ON_ONCE(1);
++	return -EINVAL;
++}
++
+ /*
+  * track_pfn_copy is called when vma that is covering the pfnmap gets
+  * copied through copy_page_range().
+@@ -960,20 +992,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
+ int track_pfn_copy(struct vm_area_struct *vma)
+ {
+ 	resource_size_t paddr;
+-	unsigned long prot;
+ 	unsigned long vma_size = vma->vm_end - vma->vm_start;
+ 	pgprot_t pgprot;
+ 
+ 	if (vma->vm_flags & VM_PAT) {
+-		/*
+-		 * reserve the whole chunk covered by vma. We need the
+-		 * starting address and protection from pte.
+-		 */
+-		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+-			WARN_ON_ONCE(1);
++		if (get_pat_info(vma, &paddr, &pgprot))
+ 			return -EINVAL;
+-		}
+-		pgprot = __pgprot(prot);
++		/* reserve the whole chunk covered by vma. */
+ 		return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
+ 	}
+ 
+@@ -1048,7 +1073,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ 		 unsigned long size, bool mm_wr_locked)
+ {
+ 	resource_size_t paddr;
+-	unsigned long prot;
+ 
+ 	if (vma && !(vma->vm_flags & VM_PAT))
+ 		return;
+@@ -1056,11 +1080,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ 	/* free the chunk starting from pfn or the whole chunk */
+ 	paddr = (resource_size_t)pfn << PAGE_SHIFT;
+ 	if (!paddr && !size) {
+-		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+-			WARN_ON_ONCE(1);
++		if (get_pat_info(vma, &paddr, NULL))
+ 			return;
+-		}
+-
+ 		size = vma->vm_end - vma->vm_start;
+ 	}
+ 	free_pfn_range(paddr, size);
+diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
+index bda9f129835e9..f3c4c756fe1ee 100644
+--- a/arch/x86/mm/pat/set_memory.c
++++ b/arch/x86/mm/pat/set_memory.c
+@@ -684,7 +684,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+ 		return NULL;
+ 
+ 	*level = PG_LEVEL_1G;
+-	if (pud_large(*pud) || !pud_present(*pud))
++	if (pud_leaf(*pud) || !pud_present(*pud))
+ 		return (pte_t *)pud;
+ 
+ 	pmd = pmd_offset(pud, address);
+@@ -743,7 +743,7 @@ pmd_t *lookup_pmd_address(unsigned long address)
+ 		return NULL;
+ 
+ 	pud = pud_offset(p4d, address);
+-	if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
++	if (pud_none(*pud) || pud_leaf(*pud) || !pud_present(*pud))
+ 		return NULL;
+ 
+ 	return pmd_offset(pud, address);
+@@ -1274,7 +1274,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
+ 	 */
+ 	while (end - start >= PUD_SIZE) {
+ 
+-		if (pud_large(*pud))
++		if (pud_leaf(*pud))
+ 			pud_clear(pud);
+ 		else
+ 			unmap_pmd_range(pud, start, start + PUD_SIZE);
+diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
+index 9deadf517f14a..8e1ef5345b7a8 100644
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -774,7 +774,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+  */
+ int pud_clear_huge(pud_t *pud)
+ {
+-	if (pud_large(*pud)) {
++	if (pud_leaf(*pud)) {
+ 		pud_clear(pud);
+ 		return 1;
+ 	}
+diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
+index 78414c6d1b5ed..51b6b78e6b175 100644
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -217,7 +217,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+ 
+ 	pud = pud_offset(p4d, address);
+ 	/* The user page tables do not use large mappings: */
+-	if (pud_large(*pud)) {
++	if (pud_leaf(*pud)) {
+ 		WARN_ON(1);
+ 		return NULL;
+ 	}
+diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
+index 955133077c105..a6a4d3ca8ddc6 100644
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -344,7 +344,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
+ static int emit_rsb_call(u8 **pprog, void *func, void *ip)
+ {
+ 	OPTIMIZER_HIDE_VAR(func);
+-	x86_call_depth_emit_accounting(pprog, func);
++	ip += x86_call_depth_emit_accounting(pprog, func);
+ 	return emit_patch(pprog, func, ip, 0xE8);
+ }
+ 
+diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
+index 6f955eb1e1631..d8af46e677503 100644
+--- a/arch/x86/power/hibernate.c
++++ b/arch/x86/power/hibernate.c
+@@ -170,7 +170,7 @@ int relocate_restore_code(void)
+ 		goto out;
+ 	}
+ 	pud = pud_offset(p4d, relocated_restore_code);
+-	if (pud_large(*pud)) {
++	if (pud_leaf(*pud)) {
+ 		set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
+ 		goto out;
+ 	}
+diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
+index b6830554ff690..9d4a9311e819b 100644
+--- a/arch/x86/xen/mmu_pv.c
++++ b/arch/x86/xen/mmu_pv.c
+@@ -1082,7 +1082,7 @@ static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
+ 	pmd_t *pmd_tbl;
+ 	int i;
+ 
+-	if (pud_large(*pud)) {
++	if (pud_leaf(*pud)) {
+ 		pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
+ 		xen_free_ro_pages(pa, PUD_SIZE);
+ 		return;
+@@ -1863,7 +1863,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
+ 	if (!pud_present(pud))
+ 		return 0;
+ 	pa = pud_val(pud) & PTE_PFN_MASK;
+-	if (pud_large(pud))
++	if (pud_leaf(pud))
+ 		return pa + (vaddr & ~PUD_MASK);
+ 
+ 	pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
+diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
+index b91155ea9c343..c9131259f717b 100644
+--- a/drivers/acpi/acpica/dbnames.c
++++ b/drivers/acpi/acpica/dbnames.c
+@@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle,
+ 	ACPI_FREE(buffer.pointer);
+ 
+ 	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
+-	acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
+-
++	status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
++	if (ACPI_FAILURE(status)) {
++		acpi_os_printf("Could Not evaluate object %p\n",
++			       obj_handle);
++		return (AE_OK);
++	}
+ 	/*
+ 	 * Since this is a field unit, surround the output in braces
+ 	 */
+diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
+index 45e48d653c60b..80a45e11fb5b6 100644
+--- a/drivers/ata/sata_mv.c
++++ b/drivers/ata/sata_mv.c
+@@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = {
+ 	},
+ };
+ 
+-static const struct pci_device_id mv_pci_tbl[] = {
+-	{ PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
+-	{ PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
+-	{ PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
+-	{ PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
+-	/* RocketRAID 1720/174x have different identifiers */
+-	{ PCI_VDEVICE(TTI, 0x1720), chip_6042 },
+-	{ PCI_VDEVICE(TTI, 0x1740), chip_6042 },
+-	{ PCI_VDEVICE(TTI, 0x1742), chip_6042 },
+-
+-	{ PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
+-	{ PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
+-	{ PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
+-	{ PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
+-	{ PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
+-
+-	{ PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
+-
+-	/* Adaptec 1430SA */
+-	{ PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
+-
+-	/* Marvell 7042 support */
+-	{ PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
+-
+-	/* Highpoint RocketRAID PCIe series */
+-	{ PCI_VDEVICE(TTI, 0x2300), chip_7042 },
+-	{ PCI_VDEVICE(TTI, 0x2310), chip_7042 },
+-
+-	{ }			/* terminate list */
+-};
+-
+ static const struct mv_hw_ops mv5xxx_ops = {
+ 	.phy_errata		= mv5_phy_errata,
+ 	.enable_leds		= mv5_enable_leds,
+@@ -4300,6 +4269,36 @@ static int mv_pci_init_one(struct pci_dev *pdev,
+ static int mv_pci_device_resume(struct pci_dev *pdev);
+ #endif
+ 
++static const struct pci_device_id mv_pci_tbl[] = {
++	{ PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
++	{ PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
++	{ PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
++	{ PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
++	/* RocketRAID 1720/174x have different identifiers */
++	{ PCI_VDEVICE(TTI, 0x1720), chip_6042 },
++	{ PCI_VDEVICE(TTI, 0x1740), chip_6042 },
++	{ PCI_VDEVICE(TTI, 0x1742), chip_6042 },
++
++	{ PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
++	{ PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
++	{ PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
++	{ PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
++	{ PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
++
++	{ PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
++
++	/* Adaptec 1430SA */
++	{ PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
++
++	/* Marvell 7042 support */
++	{ PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
++
++	/* Highpoint RocketRAID PCIe series */
++	{ PCI_VDEVICE(TTI, 0x2300), chip_7042 },
++	{ PCI_VDEVICE(TTI, 0x2310), chip_7042 },
++
++	{ }			/* terminate list */
++};
+ 
+ static struct pci_driver mv_pci_driver = {
+ 	.name			= DRV_NAME,
+@@ -4312,6 +4311,7 @@ static struct pci_driver mv_pci_driver = {
+ #endif
+ 
+ };
++MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
+ 
+ /**
+  *      mv_print_info - Dump key info to kernel log for perusal.
+@@ -4484,7 +4484,6 @@ static void __exit mv_exit(void)
+ MODULE_AUTHOR("Brett Russ");
+ MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers");
+ MODULE_LICENSE("GPL v2");
+-MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
+ MODULE_VERSION(DRV_VERSION);
+ MODULE_ALIAS("platform:" DRV_NAME);
+ 
+diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
+index b51d7a9d0d90c..a482741eb181f 100644
+--- a/drivers/ata/sata_sx4.c
++++ b/drivers/ata/sata_sx4.c
+@@ -957,8 +957,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource,
+ 
+ 	offset -= (idx * window_size);
+ 	idx++;
+-	dist = ((long) (window_size - (offset + size))) >= 0 ? size :
+-		(long) (window_size - offset);
++	dist = min(size, window_size - offset);
+ 	memcpy_fromio(psource, dimm_mmio + offset / 4, dist);
+ 
+ 	psource += dist;
+@@ -1005,8 +1004,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource,
+ 	readl(mmio + PDC_DIMM_WINDOW_CTLR);
+ 	offset -= (idx * window_size);
+ 	idx++;
+-	dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size :
+-		(long) (window_size - offset);
++	dist = min(size, window_size - offset);
+ 	memcpy_toio(dimm_mmio + offset / 4, psource, dist);
+ 	writel(0x01, mmio + PDC_GENERAL_CTLR);
+ 	readl(mmio + PDC_GENERAL_CTLR);
+diff --git a/drivers/base/core.c b/drivers/base/core.c
+index 2cc0ab8541680..0214288765c8c 100644
+--- a/drivers/base/core.c
++++ b/drivers/base/core.c
+@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
+ static void __fw_devlink_link_to_consumers(struct device *dev);
+ static bool fw_devlink_drv_reg_done;
+ static bool fw_devlink_best_effort;
++static struct workqueue_struct *device_link_wq;
+ 
+ /**
+  * __fwnode_link_add - Create a link between two fwnode_handles.
+@@ -531,12 +532,26 @@ static void devlink_dev_release(struct device *dev)
+ 	/*
+ 	 * It may take a while to complete this work because of the SRCU
+ 	 * synchronization in device_link_release_fn() and if the consumer or
+-	 * supplier devices get deleted when it runs, so put it into the "long"
+-	 * workqueue.
++	 * supplier devices get deleted when it runs, so put it into the
++	 * dedicated workqueue.
+ 	 */
+-	queue_work(system_long_wq, &link->rm_work);
++	queue_work(device_link_wq, &link->rm_work);
+ }
+ 
++/**
++ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
++ */
++void device_link_wait_removal(void)
++{
++	/*
++	 * devlink removal jobs are queued in the dedicated work queue.
++	 * To be sure that all removal jobs are terminated, ensure that any
++	 * scheduled work has run to completion.
++	 */
++	flush_workqueue(device_link_wq);
++}
++EXPORT_SYMBOL_GPL(device_link_wait_removal);
++
+ static struct class devlink_class = {
+ 	.name = "devlink",
+ 	.dev_groups = devlink_groups,
+@@ -4090,9 +4105,14 @@ int __init devices_init(void)
+ 	sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
+ 	if (!sysfs_dev_char_kobj)
+ 		goto char_kobj_err;
++	device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
++	if (!device_link_wq)
++		goto wq_err;
+ 
+ 	return 0;
+ 
++ wq_err:
++	kobject_put(sysfs_dev_char_kobj);
+  char_kobj_err:
+ 	kobject_put(sysfs_dev_block_kobj);
+  block_kobj_err:
+diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
+index 41edd6a430eb4..55999a50ccc0b 100644
+--- a/drivers/base/regmap/regcache-maple.c
++++ b/drivers/base/regmap/regcache-maple.c
+@@ -112,7 +112,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
+ 	unsigned long *entry, *lower, *upper;
+ 	unsigned long lower_index, lower_last;
+ 	unsigned long upper_index, upper_last;
+-	int ret;
++	int ret = 0;
+ 
+ 	lower = NULL;
+ 	upper = NULL;
+@@ -145,7 +145,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
+ 			upper_index = max + 1;
+ 			upper_last = mas.last;
+ 
+-			upper = kmemdup(&entry[max + 1],
++			upper = kmemdup(&entry[max - mas.index + 1],
+ 					((mas.last - max) *
+ 					 sizeof(unsigned long)),
+ 					map->alloc_flags);
+@@ -244,7 +244,7 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min,
+ 	unsigned long lmin = min;
+ 	unsigned long lmax = max;
+ 	unsigned int r, v, sync_start;
+-	int ret;
++	int ret = 0;
+ 	bool sync_needed = false;
+ 
+ 	map->cache_bypass = true;
+diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
+index 0211f704a358b..5277090c6d6d7 100644
+--- a/drivers/bluetooth/btqca.c
++++ b/drivers/bluetooth/btqca.c
+@@ -758,11 +758,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup);
+ 
+ int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
+ {
++	bdaddr_t bdaddr_swapped;
+ 	struct sk_buff *skb;
+ 	int err;
+ 
+-	skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr,
+-				HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
++	baswap(&bdaddr_swapped, bdaddr);
++
++	skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6,
++				&bdaddr_swapped, HCI_EV_VENDOR,
++				HCI_INIT_TIMEOUT);
+ 	if (IS_ERR(skb)) {
+ 		err = PTR_ERR(skb);
+ 		bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err);
+diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
+index f2d4985e036e4..8861b8017fbdf 100644
+--- a/drivers/bluetooth/hci_qca.c
++++ b/drivers/bluetooth/hci_qca.c
+@@ -7,7 +7,6 @@
+  *
+  *  Copyright (C) 2007 Texas Instruments, Inc.
+  *  Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
+- *  Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+  *
+  *  Acknowledgements:
+  *  This file is based on hci_ll.c, which was...
+@@ -226,6 +225,7 @@ struct qca_serdev {
+ 	struct qca_power *bt_power;
+ 	u32 init_speed;
+ 	u32 oper_speed;
++	bool bdaddr_property_broken;
+ 	const char *firmware_name;
+ };
+ 
+@@ -1825,6 +1825,7 @@ static int qca_setup(struct hci_uart *hu)
+ 	const char *firmware_name = qca_get_firmware_name(hu);
+ 	int ret;
+ 	struct qca_btsoc_version ver;
++	struct qca_serdev *qcadev;
+ 	const char *soc_name;
+ 
+ 	ret = qca_check_speeds(hu);
+@@ -1882,16 +1883,11 @@ static int qca_setup(struct hci_uart *hu)
+ 	case QCA_WCN6750:
+ 	case QCA_WCN6855:
+ 	case QCA_WCN7850:
++		set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ 
+-		/* Set BDA quirk bit for reading BDA value from fwnode property
+-		 * only if that property exist in DT.
+-		 */
+-		if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
+-			set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+-			bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
+-		} else {
+-			bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
+-		}
++		qcadev = serdev_device_get_drvdata(hu->serdev);
++		if (qcadev->bdaddr_property_broken)
++			set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks);
+ 
+ 		hci_set_aosp_capable(hdev);
+ 
+@@ -2264,6 +2260,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
+ 	if (!qcadev->oper_speed)
+ 		BT_DBG("UART will pick default operating speed");
+ 
++	qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev,
++			"qcom,local-bd-address-broken");
++
+ 	if (data)
+ 		qcadev->btsoc_type = data->soc_type;
+ 	else
+diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
+index c0979c8049b5a..661de4add4c72 100644
+--- a/drivers/dma-buf/st-dma-fence-chain.c
++++ b/drivers/dma-buf/st-dma-fence-chain.c
+@@ -84,11 +84,11 @@ static int sanitycheck(void *arg)
+ 		return -ENOMEM;
+ 
+ 	chain = mock_chain(NULL, f, 1);
+-	if (!chain)
++	if (chain)
++		dma_fence_enable_sw_signaling(chain);
++	else
+ 		err = -ENOMEM;
+ 
+-	dma_fence_enable_sw_signaling(chain);
+-
+ 	dma_fence_signal(f);
+ 	dma_fence_put(f);
+ 
+diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
+index bfa30625f5d03..3dc2f9aaf08db 100644
+--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
++++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
+@@ -24,6 +24,8 @@ static bool efi_noinitrd;
+ static bool efi_nosoftreserve;
+ static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA);
+ 
++int efi_mem_encrypt;
++
+ bool __pure __efi_soft_reserve_enabled(void)
+ {
+ 	return !efi_nosoftreserve;
+@@ -75,6 +77,12 @@ efi_status_t efi_parse_options(char const *cmdline)
+ 			efi_noinitrd = true;
+ 		} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+ 			efi_no5lvl = true;
++		} else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) &&
++			   !strcmp(param, "mem_encrypt") && val) {
++			if (parse_option_str(val, "on"))
++				efi_mem_encrypt = 1;
++			else if (parse_option_str(val, "off"))
++				efi_mem_encrypt = -1;
+ 		} else if (!strcmp(param, "efi") && val) {
+ 			efi_nochunk = parse_option_str(val, "nochunk");
+ 			efi_novamap |= parse_option_str(val, "novamap");
+diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
+index c04b82ea40f21..fc18fd649ed77 100644
+--- a/drivers/firmware/efi/libstub/efistub.h
++++ b/drivers/firmware/efi/libstub/efistub.h
+@@ -37,8 +37,8 @@ extern bool efi_no5lvl;
+ extern bool efi_nochunk;
+ extern bool efi_nokaslr;
+ extern int efi_loglevel;
++extern int efi_mem_encrypt;
+ extern bool efi_novamap;
+-
+ extern const efi_system_table_t *efi_system_table;
+ 
+ typedef union efi_dxe_services_table efi_dxe_services_table_t;
+diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
+index 8307950fe3ced..e4ae3db727efa 100644
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -238,6 +238,15 @@ efi_status_t efi_adjust_memory_range_protection(unsigned long start,
+ 	rounded_end = roundup(start + size, EFI_PAGE_SIZE);
+ 
+ 	if (memattr != NULL) {
++		status = efi_call_proto(memattr, set_memory_attributes,
++					rounded_start,
++					rounded_end - rounded_start,
++					EFI_MEMORY_RO);
++		if (status != EFI_SUCCESS) {
++			efi_warn("Failed to set EFI_MEMORY_RO attribute\n");
++			return status;
++		}
++
+ 		status = efi_call_proto(memattr, clear_memory_attributes,
+ 					rounded_start,
+ 					rounded_end - rounded_start,
+@@ -816,7 +825,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
+ 
+ 	*kernel_entry = addr + entry;
+ 
+-	return efi_adjust_memory_range_protection(addr, kernel_total_size);
++	return efi_adjust_memory_range_protection(addr, kernel_text_size);
+ }
+ 
+ static void __noreturn enter_kernel(unsigned long kernel_addr,
+@@ -888,6 +897,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
+ 		}
+ 	}
+ 
++	if (efi_mem_encrypt > 0)
++		hdr->xloadflags |= XLF_MEM_ENCRYPTION;
++
+ 	status = efi_decompress_kernel(&kernel_entry);
+ 	if (status != EFI_SUCCESS) {
+ 		efi_err("Failed to decompress kernel\n");
+diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
+index 4f3e66ece7f78..84125e55de101 100644
+--- a/drivers/gpio/gpiolib-cdev.c
++++ b/drivers/gpio/gpiolib-cdev.c
+@@ -655,6 +655,25 @@ static u32 line_event_id(int level)
+ 		       GPIO_V2_LINE_EVENT_FALLING_EDGE;
+ }
+ 
++static inline char *make_irq_label(const char *orig)
++{
++	char *new;
++
++	if (!orig)
++		return NULL;
++
++	new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
++	if (!new)
++		return ERR_PTR(-ENOMEM);
++
++	return new;
++}
++
++static inline void free_irq_label(const char *label)
++{
++	kfree(label);
++}
++
+ #ifdef CONFIG_HTE
+ 
+ static enum hte_return process_hw_ts_thread(void *p)
+@@ -942,6 +961,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
+ {
+ 	unsigned long irqflags;
+ 	int ret, level, irq;
++	char *label;
+ 
+ 	/* try hardware */
+ 	ret = gpiod_set_debounce(line->desc, debounce_period_us);
+@@ -964,11 +984,17 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
+ 			if (irq < 0)
+ 				return -ENXIO;
+ 
++			label = make_irq_label(line->req->label);
++			if (IS_ERR(label))
++				return -ENOMEM;
++
+ 			irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING;
+ 			ret = request_irq(irq, debounce_irq_handler, irqflags,
+-					  line->req->label, line);
+-			if (ret)
++					  label, line);
++			if (ret) {
++				free_irq_label(label);
+ 				return ret;
++			}
+ 			line->irq = irq;
+ 		} else {
+ 			ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH);
+@@ -1013,7 +1039,7 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc,
+ static void edge_detector_stop(struct line *line)
+ {
+ 	if (line->irq) {
+-		free_irq(line->irq, line);
++		free_irq_label(free_irq(line->irq, line));
+ 		line->irq = 0;
+ 	}
+ 
+@@ -1038,6 +1064,7 @@ static int edge_detector_setup(struct line *line,
+ 	unsigned long irqflags = 0;
+ 	u64 eflags;
+ 	int irq, ret;
++	char *label;
+ 
+ 	eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS;
+ 	if (eflags && !kfifo_initialized(&line->req->events)) {
+@@ -1074,11 +1101,17 @@ static int edge_detector_setup(struct line *line,
+ 			IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
+ 	irqflags |= IRQF_ONESHOT;
+ 
++	label = make_irq_label(line->req->label);
++	if (IS_ERR(label))
++		return PTR_ERR(label);
++
+ 	/* Request a thread to read the events */
+ 	ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
+-				   irqflags, line->req->label, line);
+-	if (ret)
++				   irqflags, label, line);
++	if (ret) {
++		free_irq_label(label);
+ 		return ret;
++	}
+ 
+ 	line->irq = irq;
+ 	return 0;
+@@ -1943,7 +1976,7 @@ static void lineevent_free(struct lineevent_state *le)
+ 		blocking_notifier_chain_unregister(&le->gdev->device_notifier,
+ 						   &le->device_unregistered_nb);
+ 	if (le->irq)
+-		free_irq(le->irq, le);
++		free_irq_label(free_irq(le->irq, le));
+ 	if (le->desc)
+ 		gpiod_free(le->desc);
+ 	kfree(le->label);
+@@ -2091,6 +2124,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ 	int fd;
+ 	int ret;
+ 	int irq, irqflags = 0;
++	char *label;
+ 
+ 	if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
+ 		return -EFAULT;
+@@ -2175,15 +2209,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ 	if (ret)
+ 		goto out_free_le;
+ 
++	label = make_irq_label(le->label);
++	if (IS_ERR(label)) {
++		ret = PTR_ERR(label);
++		goto out_free_le;
++	}
++
+ 	/* Request a thread to read the events */
+ 	ret = request_threaded_irq(irq,
+ 				   lineevent_irq_handler,
+ 				   lineevent_irq_thread,
+ 				   irqflags,
+-				   le->label,
++				   label,
+ 				   le);
+-	if (ret)
++	if (ret) {
++		free_irq_label(label);
+ 		goto out_free_le;
++	}
+ 
+ 	le->irq = irq;
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 85efd686e538d..d59e8536192ca 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -1369,6 +1369,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
+ void amdgpu_driver_release_kms(struct drm_device *dev);
+ 
+ int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
++int amdgpu_device_prepare(struct drm_device *dev);
+ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
+ int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
+ u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 79261bec26542..062d78818da16 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -1549,6 +1549,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
+ 	} else {
+ 		pr_info("switched off\n");
+ 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
++		amdgpu_device_prepare(dev);
+ 		amdgpu_device_suspend(dev, true);
+ 		amdgpu_device_cache_pci_state(pdev);
+ 		/* Shut down the device */
+@@ -4094,6 +4095,43 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
+ /*
+  * Suspend & resume.
+  */
++/**
++ * amdgpu_device_prepare - prepare for device suspend
++ *
++ * @dev: drm dev pointer
++ *
++ * Prepare to put the hw in the suspend state (all asics).
++ * Returns 0 for success or an error on failure.
++ * Called at driver suspend.
++ */
++int amdgpu_device_prepare(struct drm_device *dev)
++{
++	struct amdgpu_device *adev = drm_to_adev(dev);
++	int i, r;
++
++	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
++		return 0;
++
++	/* Evict the majority of BOs before starting suspend sequence */
++	r = amdgpu_device_evict_resources(adev);
++	if (r)
++		return r;
++
++	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
++
++	for (i = 0; i < adev->num_ip_blocks; i++) {
++		if (!adev->ip_blocks[i].status.valid)
++			continue;
++		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
++			continue;
++		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
++		if (r)
++			return r;
++	}
++
++	return 0;
++}
++
+ /**
+  * amdgpu_device_suspend - initiate device suspend
+  *
+@@ -4114,11 +4152,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+ 
+ 	adev->in_suspend = true;
+ 
+-	/* Evict the majority of BOs before grabbing the full access */
+-	r = amdgpu_device_evict_resources(adev);
+-	if (r)
+-		return r;
+-
+ 	if (amdgpu_sriov_vf(adev)) {
+ 		amdgpu_virt_fini_data_exchange(adev);
+ 		r = amdgpu_virt_request_full_gpu(adev, false);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index 3204c3a42f2a3..f9bc38d20ce3e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -2386,8 +2386,9 @@ static int amdgpu_pmops_prepare(struct device *dev)
+ 	/* Return a positive number here so
+ 	 * DPM_FLAG_SMART_SUSPEND works properly
+ 	 */
+-	if (amdgpu_device_supports_boco(drm_dev))
+-		return pm_runtime_suspended(dev);
++	if (amdgpu_device_supports_boco(drm_dev) &&
++	    pm_runtime_suspended(dev))
++		return 1;
+ 
+ 	/* if we will not support s3 or s2i for the device
+ 	 *  then skip suspend
+@@ -2396,7 +2397,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
+ 	    !amdgpu_acpi_is_s3_active(adev))
+ 		return 1;
+ 
+-	return 0;
++	return amdgpu_device_prepare(drm_dev);
+ }
+ 
+ static void amdgpu_pmops_complete(struct device *dev)
+@@ -2598,6 +2599,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
+ 	if (amdgpu_device_supports_boco(drm_dev))
+ 		adev->mp1_state = PP_MP1_STATE_UNLOAD;
+ 
++	ret = amdgpu_device_prepare(drm_dev);
++	if (ret)
++		return ret;
+ 	ret = amdgpu_device_suspend(drm_dev, false);
+ 	if (ret) {
+ 		adev->in_runpm = false;
+diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+index 251dd800a2a66..7b5c1498941dd 100644
+--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
++++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+@@ -1179,9 +1179,10 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
+ 		dto_params.timing = &pipe_ctx->stream->timing;
+ 		dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+ 		if (dccg) {
+-			dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ 			dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
+ 			dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
++			if (dccg && dccg->funcs->set_dtbclk_dto)
++				dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ 		}
+ 	} else if (dccg && dccg->funcs->disable_symclk_se) {
+ 		dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
+diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+index 1e3803739ae61..12af2859002f7 100644
+--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+@@ -2728,18 +2728,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
+ 	}
+ 
+ 	if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
+-		dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+-		dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
+-
+-		phyd32clk = get_phyd32clk_src(link);
+-		dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
+-
+ 		dto_params.otg_inst = tg->inst;
+ 		dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10;
+ 		dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx);
+ 		dto_params.timing = &pipe_ctx->stream->timing;
+ 		dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
+ 		dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
++		dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
++		dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
++
++		phyd32clk = get_phyd32clk_src(link);
++		dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
+ 	} else {
+ 		}
+ 	if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) {
+diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
+index abe829bbd54af..a9880fc531955 100644
+--- a/drivers/gpu/drm/amd/include/amd_shared.h
++++ b/drivers/gpu/drm/amd/include/amd_shared.h
+@@ -295,6 +295,7 @@ struct amd_ip_funcs {
+ 	int (*hw_init)(void *handle);
+ 	int (*hw_fini)(void *handle);
+ 	void (*late_fini)(void *handle);
++	int (*prepare_suspend)(void *handle);
+ 	int (*suspend)(void *handle);
+ 	int (*resume)(void *handle);
+ 	bool (*is_idle)(void *handle);
+diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
+index 7352bde299d54..03bd3c7bd0dc2 100644
+--- a/drivers/gpu/drm/drm_prime.c
++++ b/drivers/gpu/drm/drm_prime.c
+@@ -582,7 +582,12 @@ int drm_gem_map_attach(struct dma_buf *dma_buf,
+ {
+ 	struct drm_gem_object *obj = dma_buf->priv;
+ 
+-	if (!obj->funcs->get_sg_table)
++	/*
++	 * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers
++	 * that implement their own ->map_dma_buf() do not.
++	 */
++	if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf &&
++	    !obj->funcs->get_sg_table)
+ 		return -ENOSYS;
+ 
+ 	return drm_gem_pin(obj);
+diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
+index 79f65eff6bb2a..23400313d8a64 100644
+--- a/drivers/gpu/drm/i915/Makefile
++++ b/drivers/gpu/drm/i915/Makefile
+@@ -104,6 +104,7 @@ gt-y += \
+ 	gt/intel_ggtt_fencing.o \
+ 	gt/intel_gt.o \
+ 	gt/intel_gt_buffer_pool.o \
++	gt/intel_gt_ccs_mode.o \
+ 	gt/intel_gt_clock_utils.o \
+ 	gt/intel_gt_debugfs.o \
+ 	gt/intel_gt_engines_debugfs.o \
+diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
+index b342fad180ca5..61df6cd3f3778 100644
+--- a/drivers/gpu/drm/i915/display/intel_cursor.c
++++ b/drivers/gpu/drm/i915/display/intel_cursor.c
+@@ -23,6 +23,8 @@
+ #include "intel_psr.h"
+ #include "skl_watermark.h"
+ 
++#include "gem/i915_gem_object.h"
++
+ /* Cursor formats */
+ static const u32 intel_cursor_formats[] = {
+ 	DRM_FORMAT_ARGB8888,
+@@ -32,12 +34,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
+ {
+ 	struct drm_i915_private *dev_priv =
+ 		to_i915(plane_state->uapi.plane->dev);
+-	const struct drm_framebuffer *fb = plane_state->hw.fb;
+-	const struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+ 	u32 base;
+ 
+ 	if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
+-		base = sg_dma_address(obj->mm.pages->sgl);
++		base = plane_state->phys_dma_addr;
+ 	else
+ 		base = intel_plane_ggtt_offset(plane_state);
+ 
+diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
+index 7fc92b1474cc4..8b0dc2b75da4a 100644
+--- a/drivers/gpu/drm/i915/display/intel_display_types.h
++++ b/drivers/gpu/drm/i915/display/intel_display_types.h
+@@ -701,6 +701,7 @@ struct intel_plane_state {
+ #define PLANE_HAS_FENCE BIT(0)
+ 
+ 	struct intel_fb_view view;
++	u32 phys_dma_addr; /* for cursor_needs_physical */
+ 
+ 	/* Plane pxp decryption state */
+ 	bool decrypt;
+diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
+index fffd568070d41..a131656757f2b 100644
+--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
++++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
+@@ -254,6 +254,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
+ 			return PTR_ERR(vma);
+ 
+ 		plane_state->ggtt_vma = vma;
++
++		/*
++		 * Pre-populate the dma address before we enter the vblank
++		 * evade critical section as i915_gem_object_get_dma_address()
++		 * will trigger might_sleep() even if it won't actually sleep,
++		 * which is the case when the fb has already been pinned.
++		 */
++		if (phys_cursor)
++			plane_state->phys_dma_addr =
++				i915_gem_object_get_dma_address(intel_fb_obj(fb), 0);
+ 	} else {
+ 		struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+ 
+diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
+index ffc15d278a39d..d557ecd4e1ebe 100644
+--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
++++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
+@@ -20,6 +20,7 @@
+ #include "skl_scaler.h"
+ #include "skl_universal_plane.h"
+ #include "skl_watermark.h"
++#include "gt/intel_gt.h"
+ #include "pxp/intel_pxp.h"
+ 
+ static const u32 skl_plane_formats[] = {
+@@ -2169,8 +2170,8 @@ static bool skl_plane_has_rc_ccs(struct drm_i915_private *i915,
+ 				 enum pipe pipe, enum plane_id plane_id)
+ {
+ 	/* Wa_14017240301 */
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++	if (IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 71), STEP_A0, STEP_B0))
+ 		return false;
+ 
+ 	/* Wa_22011186057 */
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
+index d24c0ce8805c7..19156ba4b9ef4 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
+@@ -405,8 +405,8 @@ static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+ 	BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+ 		     offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+ 
+-	/* Limiting the extension only to Meteor Lake */
+-	if (!IS_METEORLAKE(i915))
++	/* Limiting the extension only to Xe_LPG and beyond */
++	if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70))
+ 		return -ENODEV;
+ 
+ 	if (copy_from_user(&ext, base, sizeof(ext)))
+diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+index 7ad36198aab2a..cddf8c16e9a72 100644
+--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
++++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+@@ -4,9 +4,9 @@
+  */
+ 
+ #include "gen8_engine_cs.h"
+-#include "i915_drv.h"
+ #include "intel_engine_regs.h"
+ #include "intel_gpu_commands.h"
++#include "intel_gt.h"
+ #include "intel_lrc.h"
+ #include "intel_ring.h"
+ 
+@@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
+ static int mtl_dummy_pipe_control(struct i915_request *rq)
+ {
+ 	/* Wa_14016712196 */
+-	if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) {
++	if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
++	    IS_DG2(rq->i915)) {
+ 		u32 *cs;
+ 
+ 		/* dummy PIPE_CONTROL + depth flush */
+@@ -808,6 +808,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
+ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
+ {
+ 	struct drm_i915_private *i915 = rq->i915;
++	struct intel_gt *gt = rq->engine->gt;
+ 	u32 flags = (PIPE_CONTROL_CS_STALL |
+ 		     PIPE_CONTROL_TLB_INVALIDATE |
+ 		     PIPE_CONTROL_TILE_CACHE_FLUSH |
+@@ -818,8 +819,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
+ 		     PIPE_CONTROL_FLUSH_ENABLE);
+ 
+ 	/* Wa_14016712196 */
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
+ 		/* dummy PIPE_CONTROL + depth flush */
+ 		cs = gen12_emit_pipe_control(cs, 0,
+ 					     PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+index e85d70a62123f..765387639dabb 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+@@ -912,6 +912,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
+ 		info->engine_mask &= ~BIT(GSC0);
+ 	}
+ 
++	/*
++	 * Do not create the command streamer for CCS slices beyond the first.
++	 * All the workload submitted to the first engine will be shared among
++	 * all the slices.
++	 *
++	 * Once the user will be allowed to customize the CCS mode, then this
++	 * check needs to be removed.
++	 */
++	if (IS_DG2(gt->i915)) {
++		u8 first_ccs = __ffs(CCS_MASK(gt));
++
++		/* Mask off all the CCS engine */
++		info->engine_mask &= ~GENMASK(CCS3, CCS0);
++		/* Put back in the first CCS engine */
++		info->engine_mask |= BIT(_CCS(first_ccs));
++	}
++
+ 	return info->engine_mask;
+ }
+ 
+@@ -1616,9 +1633,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
+ 	 * Wa_22011802037: Prior to doing a reset, ensure CS is
+ 	 * stopped, set ring stop bit and prefetch disable bit to halt CS
+ 	 */
+-	if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+-	    (GRAPHICS_VER(engine->i915) >= 11 &&
+-	    GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
++	if (intel_engine_reset_needs_wa_22011802037(engine->gt))
+ 		intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
+ 				      _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
+ 
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+index a95615b345cd7..5a3a5b29d1507 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+@@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine)
+ {
+ 	struct drm_i915_private *i915 = engine->i915;
+ 
+-	if (IS_METEORLAKE(i915) && engine->id == GSC0) {
++	if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) {
+ 		intel_uncore_write(engine->gt->uncore,
+ 				   RC_PSMI_CTRL_GSCCS,
+ 				   _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
+diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+index 5a720e2523126..42e09f1589205 100644
+--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+@@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
+ 	 * Wa_22011802037: In addition to stopping the cs, we need
+ 	 * to wait for any pending mi force wakeups
+ 	 */
+-	if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+-	    (GRAPHICS_VER(engine->i915) >= 11 &&
+-	    GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
++	if (intel_engine_reset_needs_wa_22011802037(engine->gt))
+ 		intel_engine_wait_for_pending_mi_fw(engine);
+ 
+ 	engine->execlists.reset_ccid = active_ccid(engine);
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
+index 6c34547b58b59..6e63b46682f76 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt.h
+@@ -14,6 +14,37 @@
+ struct drm_i915_private;
+ struct drm_printer;
+ 
++/*
++ * Check that the GT is a graphics GT and has an IP version within the
++ * specified range (inclusive).
++ */
++#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \
++	BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \
++	BUILD_BUG_ON_ZERO((until) < (from)) + \
++	((gt)->type != GT_MEDIA && \
++	 GRAPHICS_VER_FULL((gt)->i915) >= (from) && \
++	 GRAPHICS_VER_FULL((gt)->i915) <= (until)))
++
++/*
++ * Check that the GT is a graphics GT with a specific IP version and has
++ * a stepping in the range [from, until).  The lower stepping bound is
++ * inclusive, the upper bound is exclusive.  The most common use-case of this
++ * macro is for checking bounds for workarounds, which usually have a stepping
++ * ("from") at which the hardware issue is first present and another stepping
++ * ("until") at which a hardware fix is present and the software workaround is
++ * no longer necessary.  E.g.,
++ *
++ *    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)
++ *    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER)
++ *
++ * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper
++ * stepping bound for the specified IP version.
++ */
++#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \
++	BUILD_BUG_ON_ZERO((until) <= (from)) + \
++	(IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \
++	 IS_GRAPHICS_STEP((gt)->i915, (from), (until))))
++
+ #define GT_TRACE(gt, fmt, ...) do {					\
+ 	const struct intel_gt *gt__ __maybe_unused = (gt);		\
+ 	GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev),		\
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+new file mode 100644
+index 0000000000000..044219c5960a5
+--- /dev/null
++++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+@@ -0,0 +1,39 @@
++// SPDX-License-Identifier: MIT
++/*
++ * Copyright © 2024 Intel Corporation
++ */
++
++#include "i915_drv.h"
++#include "intel_gt.h"
++#include "intel_gt_ccs_mode.h"
++#include "intel_gt_regs.h"
++
++void intel_gt_apply_ccs_mode(struct intel_gt *gt)
++{
++	int cslice;
++	u32 mode = 0;
++	int first_ccs = __ffs(CCS_MASK(gt));
++
++	if (!IS_DG2(gt->i915))
++		return;
++
++	/* Build the value for the fixed CCS load balancing */
++	for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
++		if (CCS_MASK(gt) & BIT(cslice))
++			/*
++			 * If available, assign the cslice
++			 * to the first available engine...
++			 */
++			mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
++
++		else
++			/*
++			 * ... otherwise, mark the cslice as
++			 * unavailable if no CCS dispatches here
++			 */
++			mode |= XEHP_CCS_MODE_CSLICE(cslice,
++						     XEHP_CCS_MODE_CSLICE_MASK);
++	}
++
++	intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
++}
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
+new file mode 100644
+index 0000000000000..9e5549caeb269
+--- /dev/null
++++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
+@@ -0,0 +1,13 @@
++/* SPDX-License-Identifier: MIT */
++/*
++ * Copyright © 2024 Intel Corporation
++ */
++
++#ifndef __INTEL_GT_CCS_MODE_H__
++#define __INTEL_GT_CCS_MODE_H__
++
++struct intel_gt;
++
++void intel_gt_apply_ccs_mode(struct intel_gt *gt);
++
++#endif /* __INTEL_GT_CCS_MODE_H__ */
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+index 2c0f1f3e28ff8..c6dec485aefbe 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
++++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+@@ -3,8 +3,7 @@
+  * Copyright © 2022 Intel Corporation
+  */
+ 
+-#include "i915_drv.h"
+-
++#include "intel_gt.h"
+ #include "intel_gt_mcr.h"
+ #include "intel_gt_print.h"
+ #include "intel_gt_regs.h"
+@@ -166,8 +165,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
+ 		gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
+ 	} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+ 		/* Wa_14016747170 */
+-		if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-		    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++		if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++		    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
+ 			fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK,
+ 					     intel_uncore_read(gt->uncore,
+ 							       MTL_GT_ACTIVITY_FACTOR));
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+index 2cdfb2f713d02..64acab146b52f 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+@@ -1468,8 +1468,14 @@
+ #define   ECOBITS_PPGTT_CACHE4B			(0 << 8)
+ 
+ #define GEN12_RCU_MODE				_MMIO(0x14800)
++#define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE	REG_BIT(1)
+ #define   GEN12_RCU_MODE_CCS_ENABLE		REG_BIT(0)
+ 
++#define XEHP_CCS_MODE				_MMIO(0x14804)
++#define   XEHP_CCS_MODE_CSLICE_MASK		REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
++#define   XEHP_CCS_MODE_CSLICE_WIDTH		ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
++#define   XEHP_CCS_MODE_CSLICE(cslice, ccs)	(ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
++
+ #define CHV_FUSE_GT				_MMIO(VLV_GUNIT_BASE + 0x2168)
+ #define   CHV_FGT_DISABLE_SS0			(1 << 10)
+ #define   CHV_FGT_DISABLE_SS1			(1 << 11)
+diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
+index c378cc7c953c4..b99efa348ad1e 100644
+--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
++++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
+@@ -1316,29 +1316,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
+ 	return cs;
+ }
+ 
+-/*
+- * On DG2 during context restore of a preempted context in GPGPU mode,
+- * RCS restore hang is detected. This is extremely timing dependent.
+- * To address this below sw wabb is implemented for DG2 A steppings.
+- */
+-static u32 *
+-dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
+-{
+-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+-	*cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base));
+-	*cs++ = 0x21;
+-
+-	*cs++ = MI_LOAD_REGISTER_REG;
+-	*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
+-	*cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1);
+-
+-	*cs++ = MI_LOAD_REGISTER_REG;
+-	*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
+-	*cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2);
+-
+-	return cs;
+-}
+-
+ /*
+  * The bspec's tuning guide asks us to program a vertical watermark value of
+  * 0x3FF.  However this register is not saved/restored properly by the
+@@ -1363,21 +1340,15 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
+ 	cs = gen12_emit_cmd_buf_wa(ce, cs);
+ 	cs = gen12_emit_restore_scratch(ce, cs);
+ 
+-	/* Wa_22011450934:dg2 */
+-	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
+-	    IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
+-		cs = dg2_emit_rcs_hang_wabb(ce, cs);
+-
+ 	/* Wa_16013000631:dg2 */
+-	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
+-	    IS_DG2_G11(ce->engine->i915))
++	if (IS_DG2_G11(ce->engine->i915))
+ 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
+ 
+ 	cs = gen12_emit_aux_table_inv(ce->engine, cs);
+ 
+ 	/* Wa_16014892111 */
+-	if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) ||
++	if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ 	    IS_DG2(ce->engine->i915))
+ 		cs = dg2_emit_draw_watermark_setting(cs);
+ 
+@@ -1391,8 +1362,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
+ 	cs = gen12_emit_restore_scratch(ce, cs);
+ 
+ 	/* Wa_16013000631:dg2 */
+-	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
+-	    IS_DG2_G11(ce->engine->i915))
++	if (IS_DG2_G11(ce->engine->i915))
+ 		if (ce->engine->class == COMPUTE_CLASS)
+ 			cs = gen8_emit_pipe_control(cs,
+ 						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
+diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
+index 2c014407225cc..07269ff3be136 100644
+--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
++++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
+@@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
+ 	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+ };
+ 
+-static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
+-	/* Wa_14011441408: Set Go to Memory for MOCS#0 */
+-	MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+-	/* UC - Coherent; GO:Memory */
+-	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+-	/* UC - Non-Coherent; GO:Memory */
+-	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+-
+-	/* WB - LC */
+-	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+-};
+-
+ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
+ 	/* Error */
+ 	MOCS_ENTRY(0, 0, L3_3_WB),
+@@ -507,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
+ 	memset(table, 0, sizeof(struct drm_i915_mocs_table));
+ 
+ 	table->unused_entries_index = I915_MOCS_PTE;
+-	if (IS_METEORLAKE(i915)) {
++	if (IS_GFX_GT_IP_RANGE(&i915->gt0, IP_VER(12, 70), IP_VER(12, 71))) {
+ 		table->size = ARRAY_SIZE(mtl_mocs_table);
+ 		table->table = mtl_mocs_table;
+ 		table->n_entries = MTL_NUM_MOCS_ENTRIES;
+@@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
+ 		table->wb_index = 2;
+ 		table->unused_entries_index = 2;
+ 	} else if (IS_DG2(i915)) {
+-		if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
+-			table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
+-			table->table = dg2_mocs_table_g10_ax;
+-		} else {
+-			table->size = ARRAY_SIZE(dg2_mocs_table);
+-			table->table = dg2_mocs_table;
+-		}
++		table->size = ARRAY_SIZE(dg2_mocs_table);
++		table->table = dg2_mocs_table;
+ 		table->uc_index = 1;
+ 		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ 		table->unused_entries_index = 3;
+diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
+index ccdc1afbf11b5..9e113e9473260 100644
+--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
++++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
+@@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
+ 			GEN6_RC_CTL_EI_MODE(1);
+ 
+ 	/*
+-	 * Wa_16011777198 and BSpec 52698 - Render powergating must be off.
++	 * BSpec 52698 - Render powergating must be off.
+ 	 * FIXME BSpec is outdated, disabling powergating for MTL is just
+ 	 * temporary wa and should be removed after fixing real cause
+ 	 * of forcewake timeouts.
+ 	 */
+-	if (IS_METEORLAKE(gt->i915) ||
+-	    IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+-	    IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
++	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
+ 		pg_enable =
+ 			GEN9_MEDIA_PG_ENABLE |
+ 			GEN11_MEDIA_SAMPLER_PG_ENABLE;
+diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
+index 5fa57a34cf4bb..13fb8e5042c58 100644
+--- a/drivers/gpu/drm/i915/gt/intel_reset.c
++++ b/drivers/gpu/drm/i915/gt/intel_reset.c
+@@ -705,7 +705,7 @@ static int __reset_guc(struct intel_gt *gt)
+ 
+ static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+ {
+-	if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0))
++	if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0))
+ 		return false;
+ 
+ 	if (!__HAS_ENGINE(engine_mask, GSC0))
+@@ -1632,6 +1632,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
+ 	w->gt = NULL;
+ }
+ 
++/*
++ * Wa_22011802037 requires that we (or the GuC) ensure that no command
++ * streamers are executing MI_FORCE_WAKE while an engine reset is initiated.
++ */
++bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt)
++{
++	if (GRAPHICS_VER(gt->i915) < 11)
++		return false;
++
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0))
++		return true;
++
++	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
++		return false;
++
++	return true;
++}
++
+ #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+ #include "selftest_reset.c"
+ #include "selftest_hangcheck.c"
+diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
+index 25c975b6e8fc0..f615b30b81c59 100644
+--- a/drivers/gpu/drm/i915/gt/intel_reset.h
++++ b/drivers/gpu/drm/i915/gt/intel_reset.h
+@@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
+ bool intel_has_gpu_reset(const struct intel_gt *gt);
+ bool intel_has_reset_engine(const struct intel_gt *gt);
+ 
++bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt);
++
+ #endif /* I915_RESET_H */
+diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
+index 092542f53aad9..4feef874e6d69 100644
+--- a/drivers/gpu/drm/i915/gt/intel_rps.c
++++ b/drivers/gpu/drm/i915/gt/intel_rps.c
+@@ -1161,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
+ {
+ 	struct drm_i915_private *i915 = rps_to_i915(rps);
+ 
+-	if (IS_METEORLAKE(i915))
++	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ 		return mtl_get_freq_caps(rps, caps);
+ 	else
+ 		return __gen6_rps_get_freq_caps(rps, caps);
+diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
+index 3ae0dbd39eaa3..be060b32bd9ce 100644
+--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
+@@ -10,6 +10,7 @@
+ #include "intel_engine_regs.h"
+ #include "intel_gpu_commands.h"
+ #include "intel_gt.h"
++#include "intel_gt_ccs_mode.h"
+ #include "intel_gt_mcr.h"
+ #include "intel_gt_regs.h"
+ #include "intel_ring.h"
+@@ -50,7 +51,8 @@
+  *   registers belonging to BCS, VCS or VECS should be implemented in
+  *   xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
+  *   engine's MMIO range but that are part of of the common RCS/CCS reset domain
+- *   should be implemented in general_render_compute_wa_init().
++ *   should be implemented in general_render_compute_wa_init(). The settings
++ *   about the CCS load balancing should be added in ccs_engine_wa_mode().
+  *
+  * - GT workarounds: the list of these WAs is applied whenever these registers
+  *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
+@@ -764,39 +766,15 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
+ {
+ 	dg2_ctx_gt_tuning_init(engine, wal);
+ 
+-	/* Wa_16011186671:dg2_g11 */
+-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+-		wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
+-		wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
+-	}
+-
+-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+-		/* Wa_14010469329:dg2_g10 */
+-		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+-				 XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
+-
+-		/*
+-		 * Wa_22010465075:dg2_g10
+-		 * Wa_22010613112:dg2_g10
+-		 * Wa_14010698770:dg2_g10
+-		 */
+-		wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+-				 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+-	}
+-
+ 	/* Wa_16013271637:dg2 */
+ 	wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
+ 			 MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
+ 
+ 	/* Wa_14014947963:dg2 */
+-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
+-	    IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
+-		wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
++	wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
+ 
+ 	/* Wa_18018764978:dg2 */
+-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
+-	    IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
+-		wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
++	wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
+ 
+ 	/* Wa_15010599737:dg2 */
+ 	wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
+@@ -805,27 +783,32 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
+ 	wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
+ }
+ 
+-static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine,
+-				   struct i915_wa_list *wal)
++static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
++				     struct i915_wa_list *wal)
+ {
+-	struct drm_i915_private *i915 = engine->i915;
++	struct intel_gt *gt = engine->gt;
+ 
+ 	dg2_ctx_gt_tuning_init(engine, wal);
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
++	/*
++	 * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
++	 * gen12_emit_indirect_ctx_rcs() rather than here on some early
++	 * steppings.
++	 */
++	if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	      IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
+ 		wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
+ }
+ 
+-static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
+-				     struct i915_wa_list *wal)
++static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
++				       struct i915_wa_list *wal)
+ {
+-	struct drm_i915_private *i915 = engine->i915;
++	struct intel_gt *gt = engine->gt;
+ 
+-	mtl_ctx_gt_tuning_init(engine, wal);
++	xelpg_ctx_gt_tuning_init(engine, wal);
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
+ 		/* Wa_14014947963 */
+ 		wa_masked_field_set(wal, VF_PREEMPTION,
+ 				    PREEMPTION_VERTEX_COUNT, 0x4000);
+@@ -931,8 +914,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
+ 	if (engine->class != RENDER_CLASS)
+ 		goto done;
+ 
+-	if (IS_METEORLAKE(i915))
+-		mtl_ctx_workarounds_init(engine, wal);
++	if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
++		xelpg_ctx_workarounds_init(engine, wal);
+ 	else if (IS_PONTEVECCHIO(i915))
+ 		; /* noop; none at this time */
+ 	else if (IS_DG2(i915))
+@@ -1606,31 +1589,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ static void
+ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ {
+-	struct intel_engine_cs *engine;
+-	int id;
+-
+ 	xehp_init_mcr(gt, wal);
+ 
+ 	/* Wa_14011060649:dg2 */
+ 	wa_14011060649(gt, wal);
+ 
+-	/*
+-	 * Although there are per-engine instances of these registers,
+-	 * they technically exist outside the engine itself and are not
+-	 * impacted by engine resets.  Furthermore, they're part of the
+-	 * GuC blacklist so trying to treat them as engine workarounds
+-	 * will result in GuC initialization failure and a wedged GPU.
+-	 */
+-	for_each_engine(engine, gt, id) {
+-		if (engine->class != VIDEO_DECODE_CLASS)
+-			continue;
+-
+-		/* Wa_16010515920:dg2_g10 */
+-		if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
+-			wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
+-				    ALNUNIT_CLKGATE_DIS);
+-	}
+-
+ 	if (IS_DG2_G10(gt->i915)) {
+ 		/* Wa_22010523718:dg2 */
+ 		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+@@ -1641,65 +1604,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ 				DSS_ROUTER_CLKGATE_DIS);
+ 	}
+ 
+-	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
+-	    IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
+-		/* Wa_14012362059:dg2 */
+-		wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+-	}
+-
+-	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
+-		/* Wa_14010948348:dg2_g10 */
+-		wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
+-
+-		/* Wa_14011037102:dg2_g10 */
+-		wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
+-
+-		/* Wa_14011371254:dg2_g10 */
+-		wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
+-
+-		/* Wa_14011431319:dg2_g10 */
+-		wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
+-			    GAMTLBVDBOX7_CLKGATE_DIS |
+-			    GAMTLBVDBOX6_CLKGATE_DIS |
+-			    GAMTLBVDBOX5_CLKGATE_DIS |
+-			    GAMTLBVDBOX4_CLKGATE_DIS |
+-			    GAMTLBVDBOX3_CLKGATE_DIS |
+-			    GAMTLBVDBOX2_CLKGATE_DIS |
+-			    GAMTLBVDBOX1_CLKGATE_DIS |
+-			    GAMTLBVDBOX0_CLKGATE_DIS |
+-			    GAMTLBKCR_CLKGATE_DIS |
+-			    GAMTLBGUC_CLKGATE_DIS |
+-			    GAMTLBBLT_CLKGATE_DIS);
+-		wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
+-			    GAMTLBGFXA1_CLKGATE_DIS |
+-			    GAMTLBCOMPA0_CLKGATE_DIS |
+-			    GAMTLBCOMPA1_CLKGATE_DIS |
+-			    GAMTLBCOMPB0_CLKGATE_DIS |
+-			    GAMTLBCOMPB1_CLKGATE_DIS |
+-			    GAMTLBCOMPC0_CLKGATE_DIS |
+-			    GAMTLBCOMPC1_CLKGATE_DIS |
+-			    GAMTLBCOMPD0_CLKGATE_DIS |
+-			    GAMTLBCOMPD1_CLKGATE_DIS |
+-			    GAMTLBMERT_CLKGATE_DIS   |
+-			    GAMTLBVEBOX3_CLKGATE_DIS |
+-			    GAMTLBVEBOX2_CLKGATE_DIS |
+-			    GAMTLBVEBOX1_CLKGATE_DIS |
+-			    GAMTLBVEBOX0_CLKGATE_DIS);
+-
+-		/* Wa_14010569222:dg2_g10 */
+-		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+-			    GAMEDIA_CLKGATE_DIS);
+-
+-		/* Wa_14011028019:dg2_g10 */
+-		wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+-
+-		/* Wa_14010680813:dg2_g10 */
+-		wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL,
+-				CONTROL_BLOCK_CLKGATE_DIS |
+-				EGRESS_BLOCK_CLKGATE_DIS |
+-				TAG_BLOCK_CLKGATE_DIS);
+-	}
+-
+ 	/* Wa_14014830051:dg2 */
+ 	wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+ 
+@@ -1741,14 +1645,15 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ static void
+ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+ {
+-	/* Wa_14018778641 / Wa_18018781329 */
++	/* Wa_14018575942 / Wa_18018781329 */
++	wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+ 	wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+ 
+ 	/* Wa_22016670082 */
+ 	wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
+ 
+-	if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) {
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
+ 		/* Wa_14014830051 */
+ 		wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+ 
+@@ -1791,10 +1696,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+  */
+ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
+ {
+-	if (IS_METEORLAKE(gt->i915)) {
+-		if (gt->type != GT_MEDIA)
+-			wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+-
++	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
++		wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+ 		wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
+ 	}
+ 
+@@ -1826,7 +1729,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
+ 		return;
+ 	}
+ 
+-	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
++	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
+ 		xelpg_gt_workarounds_init(gt, wal);
+ 	else if (IS_PONTEVECCHIO(i915))
+ 		pvc_gt_workarounds_init(gt, wal);
+@@ -2242,29 +2145,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine)
+ 
+ 	switch (engine->class) {
+ 	case RENDER_CLASS:
+-		/*
+-		 * Wa_1507100340:dg2_g10
+-		 *
+-		 * This covers 4 registers which are next to one another :
+-		 *   - PS_INVOCATION_COUNT
+-		 *   - PS_INVOCATION_COUNT_UDW
+-		 *   - PS_DEPTH_COUNT
+-		 *   - PS_DEPTH_COUNT_UDW
+-		 */
+-		if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
+-			whitelist_reg_ext(w, PS_INVOCATION_COUNT,
+-					  RING_FORCE_TO_NONPRIV_ACCESS_RD |
+-					  RING_FORCE_TO_NONPRIV_RANGE_4);
+-
+ 		/* Required by recommended tuning setting (not a workaround) */
+ 		whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
+ 
+ 		break;
+-	case COMPUTE_CLASS:
+-		/* Wa_16011157294:dg2_g10 */
+-		if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
+-			whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
+-		break;
+ 	default:
+ 		break;
+ 	}
+@@ -2294,7 +2178,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine)
+ 	blacklist_trtt(engine);
+ }
+ 
+-static void mtl_whitelist_build(struct intel_engine_cs *engine)
++static void xelpg_whitelist_build(struct intel_engine_cs *engine)
+ {
+ 	struct i915_wa_list *w = &engine->whitelist;
+ 
+@@ -2316,8 +2200,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
+ 
+ 	wa_init_start(w, engine->gt, "whitelist", engine->name);
+ 
+-	if (IS_METEORLAKE(i915))
+-		mtl_whitelist_build(engine);
++	if (engine->gt->type == GT_MEDIA)
++		; /* none yet */
++	else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
++		xelpg_whitelist_build(engine);
+ 	else if (IS_PONTEVECCHIO(i915))
+ 		pvc_whitelist_build(engine);
+ 	else if (IS_DG2(i915))
+@@ -2415,62 +2301,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ 	}
+ }
+ 
+-static bool needs_wa_1308578152(struct intel_engine_cs *engine)
+-{
+-	return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
+-		GEN_DSS_PER_GSLICE;
+-}
+-
+ static void
+ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ {
+ 	struct drm_i915_private *i915 = engine->i915;
++	struct intel_gt *gt = engine->gt;
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
+ 		/* Wa_22014600077 */
+ 		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
+ 				 ENABLE_EU_COUNT_FOR_TDL_FLUSH);
+ 	}
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
+-	    IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+-	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
++	    IS_DG2(i915)) {
+ 		/* Wa_1509727124 */
+ 		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+ 				 SC_DISABLE_POWER_OPTIMIZATION_EBB);
+ 	}
+ 
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+-	    IS_DG2_G11(i915) || IS_DG2_G12(i915) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) {
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_DG2(i915)) {
+ 		/* Wa_22012856258 */
+ 		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+ 				 GEN12_DISABLE_READ_SUPPRESSION);
+ 	}
+ 
+-	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
+-		/* Wa_14013392000:dg2_g11 */
+-		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
+-	}
+-
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
+-	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
+-		/* Wa_14012419201:dg2 */
+-		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
+-				 GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
+-	}
+-
+-	/* Wa_1308578152:dg2_g10 when first gslice is fused off */
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
+-	    needs_wa_1308578152(engine)) {
+-		wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
+-			      GEN12_REPLAY_MODE_GRANULARITY);
+-	}
+-
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+-	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
++	if (IS_DG2(i915)) {
+ 		/*
+ 		 * Wa_22010960976:dg2
+ 		 * Wa_14013347512:dg2
+@@ -2479,34 +2338,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ 				  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
+ 	}
+ 
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
+-		/*
+-		 * Wa_1608949956:dg2_g10
+-		 * Wa_14010198302:dg2_g10
+-		 */
+-		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+-				 MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+-	}
+-
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
+-		/* Wa_22010430635:dg2 */
+-		wa_mcr_masked_en(wal,
+-				 GEN9_ROW_CHICKEN4,
+-				 GEN12_DISABLE_GRF_CLEAR);
+-
+-	/* Wa_14013202645:dg2 */
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
+-	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
+-		wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+-
+-	/* Wa_22012532006:dg2 */
+-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
+-	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+-		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+-				 DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+-
+-	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
+-	    IS_DG2_G10(i915)) {
++	if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
+ 		/* Wa_22014600077:dg2 */
+ 		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+ 			   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
+@@ -2514,6 +2346,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ 			   true);
+ 	}
+ 
++	if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
++	    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
++		/*
++		 * Wa_1606700617:tgl,dg1,adl-p
++		 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
++		 * Wa_14010826681:tgl,dg1,rkl,adl-p
++		 * Wa_18019627453:dg2
++		 */
++		wa_masked_en(wal,
++			     GEN9_CS_DEBUG_MODE1,
++			     FF_DOP_CLOCK_GATE_DISABLE);
++	}
++
+ 	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
+ 	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+ 		/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
+@@ -2527,19 +2372,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ 		 */
+ 		wa_write_or(wal, GEN7_FF_THREAD_MODE,
+ 			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+-	}
+ 
+-	if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
+-	    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+-		/*
+-		 * Wa_1606700617:tgl,dg1,adl-p
+-		 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
+-		 * Wa_14010826681:tgl,dg1,rkl,adl-p
+-		 * Wa_18019627453:dg2
+-		 */
+-		wa_masked_en(wal,
+-			     GEN9_CS_DEBUG_MODE1,
+-			     FF_DOP_CLOCK_GATE_DISABLE);
++		/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
++		wa_mcr_masked_en(wal,
++				 GEN10_SAMPLER_MODE,
++				 ENABLE_SMALLPL);
+ 	}
+ 
+ 	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
+@@ -2566,14 +2403,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ 			     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
+ 	}
+ 
+-	if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
+-	    IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
+-		/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
+-		wa_mcr_masked_en(wal,
+-				 GEN10_SAMPLER_MODE,
+-				 ENABLE_SMALLPL);
+-	}
+-
+ 	if (GRAPHICS_VER(i915) == 11) {
+ 		/* This is not an Wa. Enable for better image quality */
+ 		wa_masked_en(wal,
+@@ -2975,10 +2804,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+  * function invoked by __intel_engine_init_ctx_wa().
+  */
+ static void
+-add_render_compute_tuning_settings(struct drm_i915_private *i915,
++add_render_compute_tuning_settings(struct intel_gt *gt,
+ 				   struct i915_wa_list *wal)
+ {
+-	if (IS_METEORLAKE(i915) || IS_DG2(i915))
++	struct drm_i915_private *i915 = gt->i915;
++
++	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
+ 		wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
+ 
+ 	/*
+@@ -2994,6 +2825,28 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
+ 		wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
+ }
+ 
++static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
++{
++	struct intel_gt *gt = engine->gt;
++
++	if (!IS_DG2(gt->i915))
++		return;
++
++	/*
++	 * Wa_14019159160: This workaround, along with others, leads to
++	 * significant challenges in utilizing load balancing among the
++	 * CCS slices. Consequently, an architectural decision has been
++	 * made to completely disable automatic CCS load balancing.
++	 */
++	wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
++
++	/*
++	 * After having disabled automatic load balancing we need to
++	 * assign all slices to a single CCS. We will call it CCS mode 1
++	 */
++	intel_gt_apply_ccs_mode(gt);
++}
++
+ /*
+  * The workarounds in this function apply to shared registers in
+  * the general render reset domain that aren't tied to a
+@@ -3007,8 +2860,9 @@ static void
+ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+ {
+ 	struct drm_i915_private *i915 = engine->i915;
++	struct intel_gt *gt = engine->gt;
+ 
+-	add_render_compute_tuning_settings(i915, wal);
++	add_render_compute_tuning_settings(gt, wal);
+ 
+ 	if (GRAPHICS_VER(i915) >= 11) {
+ 		/* This is not a Wa (although referred to as
+@@ -3029,13 +2883,14 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
+ 				 GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
+ 	}
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
++	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
++	    IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74)))
+ 		/* Wa_14017856879 */
+ 		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
+ 		/*
+ 		 * Wa_14017066071
+ 		 * Wa_14017654203
+@@ -3043,37 +2898,47 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
+ 		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+ 				 MTL_DISABLE_SAMPLER_SC_OOO);
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
+ 		/* Wa_22015279794 */
+ 		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
+ 				 DISABLE_PREFETCH_INTO_IC);
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
+-	    IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+-	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
++	    IS_DG2(i915)) {
+ 		/* Wa_22013037850 */
+ 		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
+ 				DISABLE_128B_EVICTION_COMMAND_UDW);
++
++		/* Wa_18017747507 */
++		wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
+ 	}
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
++	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ 	    IS_PONTEVECCHIO(i915) ||
+ 	    IS_DG2(i915)) {
+ 		/* Wa_22014226127 */
+ 		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+ 	}
+ 
+-	if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
+-	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
+-	    IS_DG2(i915)) {
+-		/* Wa_18017747507 */
+-		wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
++	if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) {
++		/* Wa_14015227452:dg2,pvc */
++		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
++
++		/* Wa_16015675438:dg2,pvc */
++		wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
++	}
++
++	if (IS_DG2(i915)) {
++		/*
++		 * Wa_16011620976:dg2_g11
++		 * Wa_22015475538:dg2
++		 */
++		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+ 	}
+ 
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
+-	    IS_DG2_G11(i915)) {
++	if (IS_DG2_G11(i915)) {
+ 		/*
+ 		 * Wa_22012826095:dg2
+ 		 * Wa_22013059131:dg2
+@@ -3085,18 +2950,18 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
+ 		/* Wa_22013059131:dg2 */
+ 		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
+ 				FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
+-	}
+ 
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
+ 		/*
+-		 * Wa_14010918519:dg2_g10
++		 * Wa_22012654132
+ 		 *
+-		 * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
+-		 * so ignoring verification.
++		 * Note that register 0xE420 is write-only and cannot be read
++		 * back for verification on DG2 (due to Wa_14012342262), so
++		 * we need to explicitly skip the readback.
+ 		 */
+-		wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
+-			   FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
+-			   0, false);
++		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
++			   _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
++			   0 /* write-only, so skip validation */,
++			   true);
+ 	}
+ 
+ 	if (IS_XEHPSDV(i915)) {
+@@ -3114,35 +2979,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
+ 		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+ 				 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+ 	}
+-
+-	if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
+-		/* Wa_14015227452:dg2,pvc */
+-		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+-
+-		/* Wa_16015675438:dg2,pvc */
+-		wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
+-	}
+-
+-	if (IS_DG2(i915)) {
+-		/*
+-		 * Wa_16011620976:dg2_g11
+-		 * Wa_22015475538:dg2
+-		 */
+-		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+-	}
+-
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
+-		/*
+-		 * Wa_22012654132
+-		 *
+-		 * Note that register 0xE420 is write-only and cannot be read
+-		 * back for verification on DG2 (due to Wa_14012342262), so
+-		 * we need to explicitly skip the readback.
+-		 */
+-		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+-			   _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+-			   0 /* write-only, so skip validation */,
+-			   true);
+ }
+ 
+ static void
+@@ -3158,8 +2994,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
+ 	 * to a single RCS/CCS engine's workaround list since
+ 	 * they're reset as part of the general render domain reset.
+ 	 */
+-	if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
++	if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
+ 		general_render_compute_wa_init(engine, wal);
++		ccs_engine_wa_mode(engine, wal);
++	}
+ 
+ 	if (engine->class == COMPUTE_CLASS)
+ 		ccs_engine_wa_init(engine, wal);
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+index 569b5fe94c416..861d0c58388cf 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+@@ -272,18 +272,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
+ 	    GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
+ 		flags |= GUC_WA_POLLCS;
+ 
+-	/* Wa_16011759253:dg2_g10:a0 */
+-	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
+-		flags |= GUC_WA_GAM_CREDITS;
+-
+ 	/* Wa_14014475959 */
+-	if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
++	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ 	    IS_DG2(gt->i915))
+ 		flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+ 
+ 	/*
+-	 * Wa_14012197797:dg2_g10:a0,dg2_g11:a0
+-	 * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12
++	 * Wa_14012197797
++	 * Wa_22011391025
+ 	 *
+ 	 * The same WA bit is used for both and 22011391025 is applicable to
+ 	 * all DG2.
+@@ -292,22 +288,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
+ 		flags |= GUC_WA_DUAL_QUEUE;
+ 
+ 	/* Wa_22011802037: graphics version 11/12 */
+-	if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
+-	    (GRAPHICS_VER(gt->i915) >= 11 &&
+-	    GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70)))
++	if (intel_engine_reset_needs_wa_22011802037(gt))
+ 		flags |= GUC_WA_PRE_PARSER;
+ 
+-	/* Wa_16011777198:dg2 */
+-	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+-	    IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
+-		flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
+-
+ 	/*
+-	 * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..)
+-	 * Wa_22012727685:dg2_g11[a0..)
++	 * Wa_22012727170
++	 * Wa_22012727685
+ 	 */
+-	if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+-	    IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
++	if (IS_DG2_G11(gt->i915))
+ 		flags |= GUC_WA_CONTEXT_ISOLATION;
+ 
+ 	/* Wa_16015675438 */
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+index 836e4d9d65ef6..b5de5a9f59671 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+@@ -1690,9 +1690,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
+ 	 * Wa_22011802037: In addition to stopping the cs, we need
+ 	 * to wait for any pending mi force wakeups
+ 	 */
+-	if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+-	    (GRAPHICS_VER(engine->i915) >= 11 &&
+-	     GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) {
++	if (intel_engine_reset_needs_wa_22011802037(engine->gt)) {
+ 		intel_engine_stop_cs(engine);
+ 		intel_engine_wait_for_pending_mi_fw(engine);
+ 	}
+@@ -4299,7 +4297,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
+ 
+ 	/* Wa_14014475959:dg2 */
+ 	if (engine->class == COMPUTE_CLASS)
+-		if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
++		if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ 		    IS_DG2(engine->i915))
+ 			engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+ 
+diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
+index 4de44cf1026dc..7a90a2e32c9f1 100644
+--- a/drivers/gpu/drm/i915/i915_debugfs.c
++++ b/drivers/gpu/drm/i915/i915_debugfs.c
+@@ -144,7 +144,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj)
+ {
+ 	struct drm_i915_private *i915 = obj_to_i915(obj);
+ 
+-	if (IS_METEORLAKE(i915)) {
++	if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) {
+ 		switch (obj->pat_index) {
+ 		case 0: return " WB";
+ 		case 1: return " WT";
+diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
+index 7a8ce7239bc9e..e0e0493d6c1f0 100644
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -658,10 +658,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
+ #define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \
+ 	(IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until))
+ 
+-#define IS_MTL_GRAPHICS_STEP(__i915, variant, since, until) \
+-	(IS_SUBPLATFORM(__i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_##variant) && \
+-	 IS_GRAPHICS_STEP(__i915, since, until))
+-
+ #define IS_MTL_DISPLAY_STEP(__i915, since, until) \
+ 	(IS_METEORLAKE(__i915) && \
+ 	 IS_DISPLAY_STEP(__i915, since, until))
+diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
+index 8f4a25d2cfc24..3f90403d86cb4 100644
+--- a/drivers/gpu/drm/i915/i915_perf.c
++++ b/drivers/gpu/drm/i915/i915_perf.c
+@@ -3255,11 +3255,10 @@ get_sseu_config(struct intel_sseu *out_sseu,
+  */
+ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
+ {
+-	/*
+-	 * Wa_18013179988:dg2
+-	 * Wa_14015846243:mtl
+-	 */
+-	if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
++	struct intel_gt *gt = to_gt(i915);
++
++	/* Wa_18013179988 */
++	if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
+ 		intel_wakeref_t wakeref;
+ 		u32 reg, shift;
+ 
+@@ -4564,7 +4563,7 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
+ 
+ static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
+ {
+-	if (IS_METEORLAKE(perf->i915))
++	if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
+ 		return reg_in_range_table(addr, mtl_oa_mux_regs);
+ 	else
+ 		return reg_in_range_table(addr, gen12_oa_mux_regs);
+diff --git a/drivers/gpu/drm/i915/intel_clock_gating.c b/drivers/gpu/drm/i915/intel_clock_gating.c
+index 81a4d32734e94..c66eb6abd4a2e 100644
+--- a/drivers/gpu/drm/i915/intel_clock_gating.c
++++ b/drivers/gpu/drm/i915/intel_clock_gating.c
+@@ -396,14 +396,6 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915)
+ 	/* Wa_22010954014:dg2 */
+ 	intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
+ 			 SGSI_SIDECLK_DIS);
+-
+-	/*
+-	 * Wa_14010733611:dg2_g10
+-	 * Wa_22010146351:dg2_g10
+-	 */
+-	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
+-		intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
+-				 SGR_DIS | SGGI_DIS);
+ }
+ 
+ static void pvc_init_clock_gating(struct drm_i915_private *i915)
+diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+index aae780e4a4aa3..2bbcdc649e862 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -804,15 +804,15 @@ op_remap(struct drm_gpuva_op_remap *r,
+ 	struct drm_gpuva_op_unmap *u = r->unmap;
+ 	struct nouveau_uvma *uvma = uvma_from_va(u->va);
+ 	u64 addr = uvma->va.va.addr;
+-	u64 range = uvma->va.va.range;
++	u64 end = uvma->va.va.addr + uvma->va.va.range;
+ 
+ 	if (r->prev)
+ 		addr = r->prev->va.addr + r->prev->va.range;
+ 
+ 	if (r->next)
+-		range = r->next->va.addr - addr;
++		end = r->next->va.addr;
+ 
+-	op_unmap_range(u, addr, range);
++	op_unmap_range(u, addr, end - addr);
+ }
+ 
+ static int
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
+index eca45b83e4e67..c067ff550692a 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
++++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
+@@ -387,19 +387,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev)
+ 
+ 	gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
+ 	ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
+-					 val, !val, 1, 1000);
++					 val, !val, 1, 2000);
+ 	if (ret)
+ 		dev_err(pfdev->dev, "shader power transition timeout");
+ 
+ 	gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
+ 	ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
+-					 val, !val, 1, 1000);
++					 val, !val, 1, 2000);
+ 	if (ret)
+ 		dev_err(pfdev->dev, "tiler power transition timeout");
+ 
+ 	gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
+ 	ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
+-				 val, !val, 0, 1000);
++				 val, !val, 0, 2000);
+ 	if (ret)
+ 		dev_err(pfdev->dev, "l2 power transition timeout");
+ }
+diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
+index e7cd27e387df1..470add73f7bda 100644
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -4231,7 +4231,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
+ 		} else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
+ 			log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
+ 		} else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
+-			if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
++			if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+ 				r = -EINVAL;
+ 				ti->error = "Invalid bitmap_flush_interval argument";
+ 				goto bad;
+diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
+index 5ad51271a5349..b8fde22aebf93 100644
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -5386,8 +5386,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
+ 		.family = MV88E6XXX_FAMILY_6250,
+ 		.name = "Marvell 88E6020",
+ 		.num_databases = 64,
+-		.num_ports = 4,
++		/* Ports 2-4 are not routed to pins
++		 * => usable ports 0, 1, 5, 6
++		 */
++		.num_ports = 7,
+ 		.num_internal_phys = 2,
++		.invalid_port_mask = BIT(2) | BIT(3) | BIT(4),
+ 		.max_vid = 4095,
+ 		.port_base_addr = 0x8,
+ 		.phy_base_addr = 0x0,
+diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
+index 833e55e4b9612..52ddb4ef259e9 100644
+--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
++++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
+@@ -94,7 +94,7 @@ int sja1110_pcs_mdio_read_c45(struct mii_bus *bus, int phy, int mmd, int reg)
+ 	return tmp & 0xffff;
+ }
+ 
+-int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd,
++int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg,
+ 			       u16 val)
+ {
+ 	struct sja1105_mdio_private *mdio_priv = bus->priv;
+diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+index 9cae5a3090000..b3d04f49f77e9 100644
+--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
++++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+@@ -391,7 +391,9 @@ static void umac_reset(struct bcmasp_intf *intf)
+ 	umac_wl(intf, 0x0, UMC_CMD);
+ 	umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD);
+ 	usleep_range(10, 100);
+-	umac_wl(intf, 0x0, UMC_CMD);
++	/* We hold the umac in reset and bring it out of
++	 * reset when phy link is up.
++	 */
+ }
+ 
+ static void umac_set_hw_addr(struct bcmasp_intf *intf,
+@@ -411,6 +413,8 @@ static void umac_enable_set(struct bcmasp_intf *intf, u32 mask,
+ 	u32 reg;
+ 
+ 	reg = umac_rl(intf, UMC_CMD);
++	if (reg & UMC_CMD_SW_RESET)
++		return;
+ 	if (enable)
+ 		reg |= mask;
+ 	else
+@@ -429,7 +433,6 @@ static void umac_init(struct bcmasp_intf *intf)
+ 	umac_wl(intf, 0x800, UMC_FRM_LEN);
+ 	umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL);
+ 	umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ);
+-	umac_enable_set(intf, UMC_CMD_PROMISC, 1);
+ }
+ 
+ static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
+@@ -656,6 +659,12 @@ static void bcmasp_adj_link(struct net_device *dev)
+ 			UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE |
+ 			UMC_CMD_TX_PAUSE_IGNORE);
+ 		reg |= cmd_bits;
++		if (reg & UMC_CMD_SW_RESET) {
++			reg &= ~UMC_CMD_SW_RESET;
++			umac_wl(intf, reg, UMC_CMD);
++			udelay(2);
++			reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC;
++		}
+ 		umac_wl(intf, reg, UMC_CMD);
+ 
+ 		intf->eee.eee_active = phy_init_eee(phydev, 0) >= 0;
+@@ -1061,9 +1070,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
+ 
+ 	umac_init(intf);
+ 
+-	/* Disable the UniMAC RX/TX */
+-	umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0);
+-
+ 	umac_set_hw_addr(intf, dev->dev_addr);
+ 
+ 	intf->old_duplex = -1;
+@@ -1083,9 +1089,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
+ 
+ 	bcmasp_enable_rx(intf, 1);
+ 
+-	/* Turn on UniMAC TX/RX */
+-	umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1);
+-
+ 	intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD);
+ 
+ 	bcmasp_netif_start(dev);
+@@ -1321,7 +1324,14 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf)
+ 	if (intf->wolopts & WAKE_FILTER)
+ 		bcmasp_netfilt_suspend(intf);
+ 
+-	/* UniMAC receive needs to be turned on */
++	/* Bring UniMAC out of reset if needed and enable RX */
++	reg = umac_rl(intf, UMC_CMD);
++	if (reg & UMC_CMD_SW_RESET)
++		reg &= ~UMC_CMD_SW_RESET;
++
++	reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC;
++	umac_wl(intf, reg, UMC_CMD);
++
+ 	umac_enable_set(intf, UMC_CMD_RX_EN, 1);
+ 
+ 	if (intf->parent->wol_irq > 0) {
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index 54da59286df4e..7ca8cd78d5574 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -2381,8 +2381,6 @@ static int fec_enet_mii_probe(struct net_device *ndev)
+ 	fep->link = 0;
+ 	fep->full_duplex = 0;
+ 
+-	phy_dev->mac_managed_pm = true;
+-
+ 	phy_attached_info(phy_dev);
+ 
+ 	return 0;
+@@ -2394,10 +2392,12 @@ static int fec_enet_mii_init(struct platform_device *pdev)
+ 	struct net_device *ndev = platform_get_drvdata(pdev);
+ 	struct fec_enet_private *fep = netdev_priv(ndev);
+ 	bool suppress_preamble = false;
++	struct phy_device *phydev;
+ 	struct device_node *node;
+ 	int err = -ENXIO;
+ 	u32 mii_speed, holdtime;
+ 	u32 bus_freq;
++	int addr;
+ 
+ 	/*
+ 	 * The i.MX28 dual fec interfaces are not equal.
+@@ -2511,6 +2511,13 @@ static int fec_enet_mii_init(struct platform_device *pdev)
+ 		goto err_out_free_mdiobus;
+ 	of_node_put(node);
+ 
++	/* find all the PHY devices on the bus and set mac_managed_pm to true */
++	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
++		phydev = mdiobus_get_phy(fep->mii_bus, addr);
++		if (phydev)
++			phydev->mac_managed_pm = true;
++	}
++
+ 	mii_cnt++;
+ 
+ 	/* save fec0 mii_bus */
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
+index f3c9395d8351c..618f66d9586b3 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
+@@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle,
+ 		hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS,
+ 						true);
+ 
+-		desc.data[0] = cpu_to_le32(tqp->index & 0x1ff);
++		desc.data[0] = cpu_to_le32(tqp->index);
+ 		ret = hclge_comm_cmd_send(hw, &desc, 1);
+ 		if (ret) {
+ 			dev_err(&hw->cmq.csq.pdev->dev,
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+index 682239f33082b..78181eea93c1c 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+@@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = {
+ #define HNS3_NIC_LB_TEST_NO_MEM_ERR	1
+ #define HNS3_NIC_LB_TEST_TX_CNT_ERR	2
+ #define HNS3_NIC_LB_TEST_RX_CNT_ERR	3
++#define HNS3_NIC_LB_TEST_UNEXECUTED	4
++
++static int hns3_get_sset_count(struct net_device *netdev, int stringset);
+ 
+ static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
+ {
+@@ -418,18 +421,26 @@ static void hns3_do_external_lb(struct net_device *ndev,
+ static void hns3_self_test(struct net_device *ndev,
+ 			   struct ethtool_test *eth_test, u64 *data)
+ {
++	int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST);
+ 	struct hns3_nic_priv *priv = netdev_priv(ndev);
+ 	struct hnae3_handle *h = priv->ae_handle;
+ 	int st_param[HNAE3_LOOP_NONE][2];
+ 	bool if_running = netif_running(ndev);
++	int i;
++
++	/* initialize the loopback test result, avoid marking an unexcuted
++	 * loopback test as PASS.
++	 */
++	for (i = 0; i < cnt; i++)
++		data[i] = HNS3_NIC_LB_TEST_UNEXECUTED;
+ 
+ 	if (hns3_nic_resetting(ndev)) {
+ 		netdev_err(ndev, "dev resetting!");
+-		return;
++		goto failure;
+ 	}
+ 
+ 	if (!(eth_test->flags & ETH_TEST_FL_OFFLINE))
+-		return;
++		goto failure;
+ 
+ 	if (netif_msg_ifdown(h))
+ 		netdev_info(ndev, "self test start\n");
+@@ -451,6 +462,10 @@ static void hns3_self_test(struct net_device *ndev,
+ 
+ 	if (netif_msg_ifdown(h))
+ 		netdev_info(ndev, "self test end\n");
++	return;
++
++failure:
++	eth_test->flags |= ETH_TEST_FL_FAILED;
+ }
+ 
+ static void hns3_update_limit_promisc_mode(struct net_device *netdev,
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index f1ca2cda2961e..dfd0c5f4cb9f5 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -11614,6 +11614,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+ 	if (ret)
+ 		goto err_pci_uninit;
+ 
++	devl_lock(hdev->devlink);
++
+ 	/* Firmware command queue initialize */
+ 	ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw);
+ 	if (ret)
+@@ -11793,6 +11795,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+ 
+ 	hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+ 
++	devl_unlock(hdev->devlink);
+ 	return 0;
+ 
+ err_mdiobus_unreg:
+@@ -11805,6 +11808,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+ err_cmd_uninit:
+ 	hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw);
+ err_devlink_uninit:
++	devl_unlock(hdev->devlink);
+ 	hclge_devlink_uninit(hdev);
+ err_pci_uninit:
+ 	pcim_iounmap(pdev, hdev->hw.hw.io_base);
+diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
+index 4542e2bc28e8d..f9328f2e669f8 100644
+--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
++++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
+@@ -5,6 +5,7 @@
+  * Shared functions for accessing and configuring the MAC
+  */
+ 
++#include <linux/bitfield.h>
+ #include "e1000.h"
+ 
+ static s32 e1000_check_downshift(struct e1000_hw *hw);
+@@ -3260,8 +3261,7 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw,
+ 		return ret_val;
+ 
+ 	phy_info->mdix_mode =
+-	    (e1000_auto_x_mode) ((phy_data & IGP01E1000_PSSR_MDIX) >>
+-				 IGP01E1000_PSSR_MDIX_SHIFT);
++	    (e1000_auto_x_mode)FIELD_GET(IGP01E1000_PSSR_MDIX, phy_data);
+ 
+ 	if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
+ 	    IGP01E1000_PSSR_SPEED_1000MBPS) {
+@@ -3272,11 +3272,11 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw,
+ 		if (ret_val)
+ 			return ret_val;
+ 
+-		phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >>
+-				      SR_1000T_LOCAL_RX_STATUS_SHIFT) ?
++		phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS,
++					       phy_data) ?
+ 		    e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+-		phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >>
+-				       SR_1000T_REMOTE_RX_STATUS_SHIFT) ?
++		phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS,
++						phy_data) ?
+ 		    e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+ 
+ 		/* Get cable length */
+@@ -3326,14 +3326,12 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw,
+ 		return ret_val;
+ 
+ 	phy_info->extended_10bt_distance =
+-	    ((phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >>
+-	     M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT) ?
++	    FIELD_GET(M88E1000_PSCR_10BT_EXT_DIST_ENABLE, phy_data) ?
+ 	    e1000_10bt_ext_dist_enable_lower :
+ 	    e1000_10bt_ext_dist_enable_normal;
+ 
+ 	phy_info->polarity_correction =
+-	    ((phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >>
+-	     M88E1000_PSCR_POLARITY_REVERSAL_SHIFT) ?
++	    FIELD_GET(M88E1000_PSCR_POLARITY_REVERSAL, phy_data) ?
+ 	    e1000_polarity_reversal_disabled : e1000_polarity_reversal_enabled;
+ 
+ 	/* Check polarity status */
+@@ -3347,27 +3345,25 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw,
+ 		return ret_val;
+ 
+ 	phy_info->mdix_mode =
+-	    (e1000_auto_x_mode) ((phy_data & M88E1000_PSSR_MDIX) >>
+-				 M88E1000_PSSR_MDIX_SHIFT);
++	    (e1000_auto_x_mode)FIELD_GET(M88E1000_PSSR_MDIX, phy_data);
+ 
+ 	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
+ 		/* Cable Length Estimation and Local/Remote Receiver Information
+ 		 * are only valid at 1000 Mbps.
+ 		 */
+ 		phy_info->cable_length =
+-		    (e1000_cable_length) ((phy_data &
+-					   M88E1000_PSSR_CABLE_LENGTH) >>
+-					  M88E1000_PSSR_CABLE_LENGTH_SHIFT);
++		    (e1000_cable_length)FIELD_GET(M88E1000_PSSR_CABLE_LENGTH,
++						  phy_data);
+ 
+ 		ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
+ 		if (ret_val)
+ 			return ret_val;
+ 
+-		phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >>
+-				      SR_1000T_LOCAL_RX_STATUS_SHIFT) ?
++		phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS,
++					       phy_data) ?
+ 		    e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+-		phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >>
+-				       SR_1000T_REMOTE_RX_STATUS_SHIFT) ?
++		phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS,
++						phy_data) ?
+ 		    e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+ 	}
+ 
+@@ -3515,7 +3511,7 @@ s32 e1000_init_eeprom_params(struct e1000_hw *hw)
+ 		if (ret_val)
+ 			return ret_val;
+ 		eeprom_size =
+-		    (eeprom_size & EEPROM_SIZE_MASK) >> EEPROM_SIZE_SHIFT;
++		    FIELD_GET(EEPROM_SIZE_MASK, eeprom_size);
+ 		/* 256B eeprom size was not supported in earlier hardware, so we
+ 		 * bump eeprom_size up one to ensure that "1" (which maps to
+ 		 * 256B) is never the result used in the shifting logic below.
+@@ -4891,8 +4887,7 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length,
+ 					     &phy_data);
+ 		if (ret_val)
+ 			return ret_val;
+-		cable_length = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+-		    M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++		cable_length = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
+ 
+ 		/* Convert the enum value to ranged values */
+ 		switch (cable_length) {
+@@ -5001,8 +4996,7 @@ static s32 e1000_check_polarity(struct e1000_hw *hw,
+ 					     &phy_data);
+ 		if (ret_val)
+ 			return ret_val;
+-		*polarity = ((phy_data & M88E1000_PSSR_REV_POLARITY) >>
+-			     M88E1000_PSSR_REV_POLARITY_SHIFT) ?
++		*polarity = FIELD_GET(M88E1000_PSSR_REV_POLARITY, phy_data) ?
+ 		    e1000_rev_polarity_reversed : e1000_rev_polarity_normal;
+ 
+ 	} else if (hw->phy_type == e1000_phy_igp) {
+@@ -5072,8 +5066,8 @@ static s32 e1000_check_downshift(struct e1000_hw *hw)
+ 		if (ret_val)
+ 			return ret_val;
+ 
+-		hw->speed_downgraded = (phy_data & M88E1000_PSSR_DOWNSHIFT) >>
+-		    M88E1000_PSSR_DOWNSHIFT_SHIFT;
++		hw->speed_downgraded = FIELD_GET(M88E1000_PSSR_DOWNSHIFT,
++						 phy_data);
+ 	}
+ 
+ 	return E1000_SUCCESS;
+diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+index be9c695dde127..c51fb6bf9c4e0 100644
+--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
++++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+@@ -92,8 +92,7 @@ static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw)
+ 
+ 	nvm->type = e1000_nvm_eeprom_spi;
+ 
+-	size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+-		     E1000_EECD_SIZE_EX_SHIFT);
++	size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd);
+ 
+ 	/* Added to a constant, "size" becomes the left-shift value
+ 	 * for setting word_size.
+diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
+index 0b1e890dd583b..969f855a79ee6 100644
+--- a/drivers/net/ethernet/intel/e1000e/82571.c
++++ b/drivers/net/ethernet/intel/e1000e/82571.c
+@@ -157,8 +157,7 @@ static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw)
+ 		fallthrough;
+ 	default:
+ 		nvm->type = e1000_nvm_eeprom_spi;
+-		size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+-			     E1000_EECD_SIZE_EX_SHIFT);
++		size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd);
+ 		/* Added to a constant, "size" becomes the left-shift value
+ 		 * for setting word_size.
+ 		 */
+diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
+index 9835e6a90d56c..fc0f98ea61332 100644
+--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
++++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
+@@ -654,8 +654,8 @@ static void e1000_get_drvinfo(struct net_device *netdev,
+ 	 */
+ 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ 		 "%d.%d-%d",
+-		 (adapter->eeprom_vers & 0xF000) >> 12,
+-		 (adapter->eeprom_vers & 0x0FF0) >> 4,
++		 FIELD_GET(0xF000, adapter->eeprom_vers),
++		 FIELD_GET(0x0FF0, adapter->eeprom_vers),
+ 		 (adapter->eeprom_vers & 0x000F));
+ 
+ 	strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
+@@ -925,8 +925,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
+ 	}
+ 
+ 	if (mac->type >= e1000_pch_lpt)
+-		wlock_mac = (er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK) >>
+-		    E1000_FWSM_WLOCK_MAC_SHIFT;
++		wlock_mac = FIELD_GET(E1000_FWSM_WLOCK_MAC_MASK, er32(FWSM));
+ 
+ 	for (i = 0; i < mac->rar_entry_count; i++) {
+ 		if (mac->type >= e1000_pch_lpt) {
+diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
+index 1fef6bb5a5fbc..4b6e7536170ab 100644
+--- a/drivers/net/ethernet/intel/e1000e/hw.h
++++ b/drivers/net/ethernet/intel/e1000e/hw.h
+@@ -628,6 +628,7 @@ struct e1000_phy_info {
+ 	u32 id;
+ 	u32 reset_delay_us;	/* in usec */
+ 	u32 revision;
++	u32 retry_count;
+ 
+ 	enum e1000_media_type media_type;
+ 
+@@ -644,6 +645,7 @@ struct e1000_phy_info {
+ 	bool polarity_correction;
+ 	bool speed_downgraded;
+ 	bool autoneg_wait_to_complete;
++	bool retry_enabled;
+ };
+ 
+ struct e1000_nvm_info {
+diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
+index 39e9fc601bf5a..4d83c9a0c023a 100644
+--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
++++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
+@@ -222,11 +222,18 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
+ 	if (hw->mac.type >= e1000_pch_lpt) {
+ 		/* Only unforce SMBus if ME is not active */
+ 		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
++			/* Switching PHY interface always returns MDI error
++			 * so disable retry mechanism to avoid wasting time
++			 */
++			e1000e_disable_phy_retry(hw);
++
+ 			/* Unforce SMBus mode in PHY */
+ 			e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
+ 			phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+ 			e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
+ 
++			e1000e_enable_phy_retry(hw);
++
+ 			/* Unforce SMBus mode in MAC */
+ 			mac_reg = er32(CTRL_EXT);
+ 			mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+@@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
+ 		goto out;
+ 	}
+ 
++	/* There is no guarantee that the PHY is accessible at this time
++	 * so disable retry mechanism to avoid wasting time
++	 */
++	e1000e_disable_phy_retry(hw);
++
+ 	/* The MAC-PHY interconnect may be in SMBus mode.  If the PHY is
+ 	 * inaccessible and resetting the PHY is not blocked, toggle the
+ 	 * LANPHYPC Value bit to force the interconnect to PCIe mode.
+@@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
+ 		break;
+ 	}
+ 
++	e1000e_enable_phy_retry(hw);
++
+ 	hw->phy.ops.release(hw);
+ 	if (!ret_val) {
+ 
+@@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
+ 
+ 	phy->id = e1000_phy_unknown;
+ 
++	if (hw->mac.type == e1000_pch_mtp) {
++		phy->retry_count = 2;
++		e1000e_enable_phy_retry(hw);
++	}
++
+ 	ret_val = e1000_init_phy_workarounds_pchlan(hw);
+ 	if (ret_val)
+ 		return ret_val;
+@@ -1072,13 +1091,11 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
+ 
+ 		lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) *
+ 			     (1U << (E1000_LTRV_SCALE_FACTOR *
+-			     ((lat_enc & E1000_LTRV_SCALE_MASK)
+-			     >> E1000_LTRV_SCALE_SHIFT)));
++			     FIELD_GET(E1000_LTRV_SCALE_MASK, lat_enc)));
+ 
+ 		max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) *
+-				 (1U << (E1000_LTRV_SCALE_FACTOR *
+-				 ((max_ltr_enc & E1000_LTRV_SCALE_MASK)
+-				 >> E1000_LTRV_SCALE_SHIFT)));
++			(1U << (E1000_LTRV_SCALE_FACTOR *
++				FIELD_GET(E1000_LTRV_SCALE_MASK, max_ltr_enc)));
+ 
+ 		if (lat_enc_d > max_ltr_enc_d)
+ 			lat_enc = max_ltr_enc;
+@@ -1148,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
+ 	if (ret_val)
+ 		goto out;
+ 
+-	/* Force SMBus mode in PHY */
+-	ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+-	if (ret_val)
+-		goto release;
+-	phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
+-	e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+-
+-	/* Force SMBus mode in MAC */
+-	mac_reg = er32(CTRL_EXT);
+-	mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+-	ew32(CTRL_EXT, mac_reg);
+-
+ 	/* Si workaround for ULP entry flow on i127/rev6 h/w.  Enable
+ 	 * LPLU and disable Gig speed when entering ULP
+ 	 */
+@@ -1315,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
+ 		/* Toggle LANPHYPC Value bit */
+ 		e1000_toggle_lanphypc_pch_lpt(hw);
+ 
++	/* Switching PHY interface always returns MDI error
++	 * so disable retry mechanism to avoid wasting time
++	 */
++	e1000e_disable_phy_retry(hw);
++
+ 	/* Unforce SMBus mode in PHY */
+ 	ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+ 	if (ret_val) {
+@@ -1335,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
+ 	phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+ 	e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+ 
++	e1000e_enable_phy_retry(hw);
++
+ 	/* Unforce SMBus mode in MAC */
+ 	mac_reg = er32(CTRL_EXT);
+ 	mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+@@ -2075,8 +2087,7 @@ static s32 e1000_write_smbus_addr(struct e1000_hw *hw)
+ {
+ 	u16 phy_data;
+ 	u32 strap = er32(STRAP);
+-	u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >>
+-	    E1000_STRAP_SMT_FREQ_SHIFT;
++	u32 freq = FIELD_GET(E1000_STRAP_SMT_FREQ_MASK, strap);
+ 	s32 ret_val;
+ 
+ 	strap &= E1000_STRAP_SMBUS_ADDRESS_MASK;
+@@ -2562,8 +2573,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw)
+ 		hw->phy.ops.write_reg_page(hw, BM_RAR_H(i),
+ 					   (u16)(mac_reg & 0xFFFF));
+ 		hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i),
+-					   (u16)((mac_reg & E1000_RAH_AV)
+-						 >> 16));
++					   FIELD_GET(E1000_RAH_AV, mac_reg));
+ 	}
+ 
+ 	e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg);
+@@ -3205,7 +3215,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
+ 							 &nvm_dword);
+ 		if (ret_val)
+ 			return ret_val;
+-		sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
++		sig_byte = FIELD_GET(0xFF00, nvm_dword);
+ 		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+ 		    E1000_ICH_NVM_SIG_VALUE) {
+ 			*bank = 0;
+@@ -3218,7 +3228,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
+ 							 &nvm_dword);
+ 		if (ret_val)
+ 			return ret_val;
+-		sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
++		sig_byte = FIELD_GET(0xFF00, nvm_dword);
+ 		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+ 		    E1000_ICH_NVM_SIG_VALUE) {
+ 			*bank = 1;
+diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
+index 5df7ad93f3d77..30515bfb259ea 100644
+--- a/drivers/net/ethernet/intel/e1000e/mac.c
++++ b/drivers/net/ethernet/intel/e1000e/mac.c
+@@ -52,7 +52,7 @@ void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw)
+ 	 * for the device regardless of function swap state.
+ 	 */
+ 	reg = er32(STATUS);
+-	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
++	bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg);
+ }
+ 
+ /**
+diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
+index f536c856727cb..3692fce201959 100644
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -1788,8 +1788,7 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data)
+ 		adapter->corr_errors +=
+ 		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+ 		adapter->uncorr_errors +=
+-		    (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+-		    E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
++		    FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts);
+ 
+ 		/* Do the reset outside of interrupt context */
+ 		schedule_work(&adapter->reset_task);
+@@ -1868,8 +1867,7 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data)
+ 		adapter->corr_errors +=
+ 		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+ 		adapter->uncorr_errors +=
+-		    (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+-		    E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
++		    FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts);
+ 
+ 		/* Do the reset outside of interrupt context */
+ 		schedule_work(&adapter->reset_task);
+@@ -5031,8 +5029,7 @@ static void e1000e_update_stats(struct e1000_adapter *adapter)
+ 		adapter->corr_errors +=
+ 		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+ 		adapter->uncorr_errors +=
+-		    (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+-		    E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
++		    FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts);
+ 	}
+ }
+ 
+@@ -6249,7 +6246,7 @@ static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc)
+ 		phy_reg |= BM_RCTL_MPE;
+ 	phy_reg &= ~(BM_RCTL_MO_MASK);
+ 	if (mac_reg & E1000_RCTL_MO_3)
+-		phy_reg |= (((mac_reg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
++		phy_reg |= (FIELD_GET(E1000_RCTL_MO_3, mac_reg)
+ 			    << BM_RCTL_MO_SHIFT);
+ 	if (mac_reg & E1000_RCTL_BAM)
+ 		phy_reg |= BM_RCTL_BAM;
+@@ -6626,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
+ 	struct e1000_hw *hw = &adapter->hw;
+ 	u32 ctrl, ctrl_ext, rctl, status, wufc;
+ 	int retval = 0;
++	u16 smb_ctrl;
+ 
+ 	/* Runtime suspend should only enable wakeup for link changes */
+ 	if (runtime)
+@@ -6691,14 +6689,31 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
+ 	if (adapter->hw.phy.type == e1000_phy_igp_3) {
+ 		e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
+ 	} else if (hw->mac.type >= e1000_pch_lpt) {
+-		if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC)))
++		if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) {
+ 			/* ULP does not support wake from unicast, multicast
+ 			 * or broadcast.
+ 			 */
+ 			retval = e1000_enable_ulp_lpt_lp(hw, !runtime);
++			if (retval)
++				return retval;
++		}
++
++		/* Force SMBUS to allow WOL */
++		/* Switching PHY interface always returns MDI error
++		 * so disable retry mechanism to avoid wasting time
++		 */
++		e1000e_disable_phy_retry(hw);
++
++		e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
++		smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
++		e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);
+ 
+-		if (retval)
+-			return retval;
++		e1000e_enable_phy_retry(hw);
++
++		/* Force SMBus mode in MAC */
++		ctrl_ext = er32(CTRL_EXT);
++		ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
++		ew32(CTRL_EXT, ctrl_ext);
+ 	}
+ 
+ 	/* Ensure that the appropriate bits are set in LPI_CTRL
+diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
+index 08c3d477dd6f7..395746bcf8f7c 100644
+--- a/drivers/net/ethernet/intel/e1000e/phy.c
++++ b/drivers/net/ethernet/intel/e1000e/phy.c
+@@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
+ 	return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0);
+ }
+ 
++void e1000e_disable_phy_retry(struct e1000_hw *hw)
++{
++	hw->phy.retry_enabled = false;
++}
++
++void e1000e_enable_phy_retry(struct e1000_hw *hw)
++{
++	hw->phy.retry_enabled = true;
++}
++
+ /**
+  *  e1000e_read_phy_reg_mdic - Read MDI control register
+  *  @hw: pointer to the HW structure
+@@ -118,57 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
+  **/
+ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
+ {
++	u32 i, mdic = 0, retry_counter, retry_max;
+ 	struct e1000_phy_info *phy = &hw->phy;
+-	u32 i, mdic = 0;
++	bool success;
+ 
+ 	if (offset > MAX_PHY_REG_ADDRESS) {
+ 		e_dbg("PHY Address %d is out of range\n", offset);
+ 		return -E1000_ERR_PARAM;
+ 	}
+ 
++	retry_max = phy->retry_enabled ? phy->retry_count : 0;
++
+ 	/* Set up Op-code, Phy Address, and register offset in the MDI
+ 	 * Control register.  The MAC will take care of interfacing with the
+ 	 * PHY to retrieve the desired data.
+ 	 */
+-	mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+-		(phy->addr << E1000_MDIC_PHY_SHIFT) |
+-		(E1000_MDIC_OP_READ));
++	for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
++		success = true;
+ 
+-	ew32(MDIC, mdic);
++		mdic = ((offset << E1000_MDIC_REG_SHIFT) |
++			(phy->addr << E1000_MDIC_PHY_SHIFT) |
++			(E1000_MDIC_OP_READ));
+ 
+-	/* Poll the ready bit to see if the MDI read completed
+-	 * Increasing the time out as testing showed failures with
+-	 * the lower time out
+-	 */
+-	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+-		udelay(50);
+-		mdic = er32(MDIC);
+-		if (mdic & E1000_MDIC_READY)
+-			break;
+-	}
+-	if (!(mdic & E1000_MDIC_READY)) {
+-		e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset);
+-		return -E1000_ERR_PHY;
+-	}
+-	if (mdic & E1000_MDIC_ERROR) {
+-		e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
+-		return -E1000_ERR_PHY;
+-	}
+-	if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+-		e_dbg("MDI Read offset error - requested %d, returned %d\n",
+-		      offset,
+-		      (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+-		return -E1000_ERR_PHY;
+-	}
+-	*data = (u16)mdic;
++		ew32(MDIC, mdic);
+ 
+-	/* Allow some time after each MDIC transaction to avoid
+-	 * reading duplicate data in the next MDIC transaction.
+-	 */
+-	if (hw->mac.type == e1000_pch2lan)
+-		udelay(100);
++		/* Poll the ready bit to see if the MDI read completed
++		 * Increasing the time out as testing showed failures with
++		 * the lower time out
++		 */
++		for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++			usleep_range(50, 60);
++			mdic = er32(MDIC);
++			if (mdic & E1000_MDIC_READY)
++				break;
++		}
++		if (!(mdic & E1000_MDIC_READY)) {
++			e_dbg("MDI Read PHY Reg Address %d did not complete\n",
++			      offset);
++			success = false;
++		}
++		if (mdic & E1000_MDIC_ERROR) {
++			e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
++			success = false;
++		}
++		if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
++			e_dbg("MDI Read offset error - requested %d, returned %d\n",
++			      offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
++			success = false;
++		}
+ 
+-	return 0;
++		/* Allow some time after each MDIC transaction to avoid
++		 * reading duplicate data in the next MDIC transaction.
++		 */
++		if (hw->mac.type == e1000_pch2lan)
++			usleep_range(100, 150);
++
++		if (success) {
++			*data = (u16)mdic;
++			return 0;
++		}
++
++		if (retry_counter != retry_max) {
++			e_dbg("Perform retry on PHY transaction...\n");
++			mdelay(10);
++		}
++	}
++
++	return -E1000_ERR_PHY;
+ }
+ 
+ /**
+@@ -181,57 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
+  **/
+ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
+ {
++	u32 i, mdic = 0, retry_counter, retry_max;
+ 	struct e1000_phy_info *phy = &hw->phy;
+-	u32 i, mdic = 0;
++	bool success;
+ 
+ 	if (offset > MAX_PHY_REG_ADDRESS) {
+ 		e_dbg("PHY Address %d is out of range\n", offset);
+ 		return -E1000_ERR_PARAM;
+ 	}
+ 
++	retry_max = phy->retry_enabled ? phy->retry_count : 0;
++
+ 	/* Set up Op-code, Phy Address, and register offset in the MDI
+ 	 * Control register.  The MAC will take care of interfacing with the
+ 	 * PHY to retrieve the desired data.
+ 	 */
+-	mdic = (((u32)data) |
+-		(offset << E1000_MDIC_REG_SHIFT) |
+-		(phy->addr << E1000_MDIC_PHY_SHIFT) |
+-		(E1000_MDIC_OP_WRITE));
++	for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
++		success = true;
+ 
+-	ew32(MDIC, mdic);
++		mdic = (((u32)data) |
++			(offset << E1000_MDIC_REG_SHIFT) |
++			(phy->addr << E1000_MDIC_PHY_SHIFT) |
++			(E1000_MDIC_OP_WRITE));
+ 
+-	/* Poll the ready bit to see if the MDI read completed
+-	 * Increasing the time out as testing showed failures with
+-	 * the lower time out
+-	 */
+-	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+-		udelay(50);
+-		mdic = er32(MDIC);
+-		if (mdic & E1000_MDIC_READY)
+-			break;
+-	}
+-	if (!(mdic & E1000_MDIC_READY)) {
+-		e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset);
+-		return -E1000_ERR_PHY;
+-	}
+-	if (mdic & E1000_MDIC_ERROR) {
+-		e_dbg("MDI Write PHY Red Address %d Error\n", offset);
+-		return -E1000_ERR_PHY;
+-	}
+-	if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+-		e_dbg("MDI Write offset error - requested %d, returned %d\n",
+-		      offset,
+-		      (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+-		return -E1000_ERR_PHY;
+-	}
++		ew32(MDIC, mdic);
+ 
+-	/* Allow some time after each MDIC transaction to avoid
+-	 * reading duplicate data in the next MDIC transaction.
+-	 */
+-	if (hw->mac.type == e1000_pch2lan)
+-		udelay(100);
++		/* Poll the ready bit to see if the MDI read completed
++		 * Increasing the time out as testing showed failures with
++		 * the lower time out
++		 */
++		for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++			usleep_range(50, 60);
++			mdic = er32(MDIC);
++			if (mdic & E1000_MDIC_READY)
++				break;
++		}
++		if (!(mdic & E1000_MDIC_READY)) {
++			e_dbg("MDI Write PHY Reg Address %d did not complete\n",
++			      offset);
++			success = false;
++		}
++		if (mdic & E1000_MDIC_ERROR) {
++			e_dbg("MDI Write PHY Reg Address %d Error\n", offset);
++			success = false;
++		}
++		if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
++			e_dbg("MDI Write offset error - requested %d, returned %d\n",
++			      offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
++			success = false;
++		}
+ 
+-	return 0;
++		/* Allow some time after each MDIC transaction to avoid
++		 * reading duplicate data in the next MDIC transaction.
++		 */
++		if (hw->mac.type == e1000_pch2lan)
++			usleep_range(100, 150);
++
++		if (success)
++			return 0;
++
++		if (retry_counter != retry_max) {
++			e_dbg("Perform retry on PHY transaction...\n");
++			mdelay(10);
++		}
++	}
++
++	return -E1000_ERR_PHY;
+ }
+ 
+ /**
+@@ -1793,8 +1834,7 @@ s32 e1000e_get_cable_length_m88(struct e1000_hw *hw)
+ 	if (ret_val)
+ 		return ret_val;
+ 
+-	index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+-		 M88E1000_PSSR_CABLE_LENGTH_SHIFT);
++	index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
+ 
+ 	if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
+ 		return -E1000_ERR_PHY;
+@@ -3234,8 +3274,7 @@ s32 e1000_get_cable_length_82577(struct e1000_hw *hw)
+ 	if (ret_val)
+ 		return ret_val;
+ 
+-	length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >>
+-		  I82577_DSTATUS_CABLE_LENGTH_SHIFT);
++	length = FIELD_GET(I82577_DSTATUS_CABLE_LENGTH, phy_data);
+ 
+ 	if (length == E1000_CABLE_LENGTH_UNDEFINED)
+ 		return -E1000_ERR_PHY;
+diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h
+index c48777d095235..049bb325b4b14 100644
+--- a/drivers/net/ethernet/intel/e1000e/phy.h
++++ b/drivers/net/ethernet/intel/e1000e/phy.h
+@@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data);
+ s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data);
+ void e1000_power_up_phy_copper(struct e1000_hw *hw);
+ void e1000_power_down_phy_copper(struct e1000_hw *hw);
++void e1000e_disable_phy_retry(struct e1000_hw *hw);
++void e1000e_enable_phy_retry(struct e1000_hw *hw);
+ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
+ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
+ s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data);
+diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+index af1b0cde36703..aed5e0bf6313e 100644
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2019 Intel Corporation. */
+ 
++#include <linux/bitfield.h>
+ #include "fm10k_pf.h"
+ #include "fm10k_vf.h"
+ 
+@@ -1575,8 +1576,7 @@ static s32 fm10k_get_fault_pf(struct fm10k_hw *hw, int type,
+ 	if (func & FM10K_FAULT_FUNC_PF)
+ 		fault->func = 0;
+ 	else
+-		fault->func = 1 + ((func & FM10K_FAULT_FUNC_VF_MASK) >>
+-				   FM10K_FAULT_FUNC_VF_SHIFT);
++		fault->func = 1 + FIELD_GET(FM10K_FAULT_FUNC_VF_MASK, func);
+ 
+ 	/* record fault type */
+ 	fault->type = func & FM10K_FAULT_FUNC_TYPE_MASK;
+diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+index dc8ccd378ec92..7fb1961f29210 100644
+--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
++++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2019 Intel Corporation. */
+ 
++#include <linux/bitfield.h>
+ #include "fm10k_vf.h"
+ 
+ /**
+@@ -126,15 +127,14 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
+ 	hw->mac.max_queues = i;
+ 
+ 	/* fetch default VLAN and ITR scale */
+-	hw->mac.default_vid = (fm10k_read_reg(hw, FM10K_TXQCTL(0)) &
+-			       FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT;
++	hw->mac.default_vid = FIELD_GET(FM10K_TXQCTL_VID_MASK,
++					fm10k_read_reg(hw, FM10K_TXQCTL(0)));
+ 	/* Read the ITR scale from TDLEN. See the definition of
+ 	 * FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is
+ 	 * used here.
+ 	 */
+-	hw->mac.itr_scale = (fm10k_read_reg(hw, FM10K_TDLEN(0)) &
+-			     FM10K_TDLEN_ITR_SCALE_MASK) >>
+-			    FM10K_TDLEN_ITR_SCALE_SHIFT;
++	hw->mac.itr_scale = FIELD_GET(FM10K_TDLEN_ITR_SCALE_MASK,
++				      fm10k_read_reg(hw, FM10K_TDLEN(0)));
+ 
+ 	return 0;
+ 
+diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
+index 55bb0b5310d5b..3e6839ac1f0f1 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e.h
++++ b/drivers/net/ethernet/intel/i40e/i40e.h
+@@ -4,47 +4,20 @@
+ #ifndef _I40E_H_
+ #define _I40E_H_
+ 
+-#include <net/tcp.h>
+-#include <net/udp.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/module.h>
+-#include <linux/pci.h>
+-#include <linux/netdevice.h>
+-#include <linux/ioport.h>
+-#include <linux/iommu.h>
+-#include <linux/slab.h>
+-#include <linux/list.h>
+-#include <linux/hashtable.h>
+-#include <linux/string.h>
+-#include <linux/in.h>
+-#include <linux/ip.h>
+-#include <linux/sctp.h>
+-#include <linux/pkt_sched.h>
+-#include <linux/ipv6.h>
+-#include <net/checksum.h>
+-#include <net/ip6_checksum.h>
+ #include <linux/ethtool.h>
+-#include <linux/if_vlan.h>
+-#include <linux/if_macvlan.h>
+-#include <linux/if_bridge.h>
+-#include <linux/clocksource.h>
+-#include <linux/net_tstamp.h>
++#include <linux/pci.h>
+ #include <linux/ptp_clock_kernel.h>
++#include <linux/types.h>
++#include <linux/avf/virtchnl.h>
++#include <linux/net/intel/i40e_client.h>
+ #include <net/pkt_cls.h>
+-#include <net/pkt_sched.h>
+-#include <net/tc_act/tc_gact.h>
+-#include <net/tc_act/tc_mirred.h>
+ #include <net/udp_tunnel.h>
+-#include <net/xdp_sock.h>
+-#include <linux/bitfield.h>
+-#include "i40e_type.h"
++#include "i40e_dcb.h"
++#include "i40e_debug.h"
++#include "i40e_io.h"
+ #include "i40e_prototype.h"
+-#include <linux/net/intel/i40e_client.h>
+-#include <linux/avf/virtchnl.h>
+-#include "i40e_virtchnl_pf.h"
++#include "i40e_register.h"
+ #include "i40e_txrx.h"
+-#include "i40e_dcb.h"
+ 
+ /* Useful i40e defaults */
+ #define I40E_MAX_VEB			16
+@@ -108,7 +81,7 @@
+ #define I40E_MAX_BW_INACTIVE_ACCUM	4 /* accumulate 4 credits max */
+ 
+ /* driver state flags */
+-enum i40e_state_t {
++enum i40e_state {
+ 	__I40E_TESTING,
+ 	__I40E_CONFIG_BUSY,
+ 	__I40E_CONFIG_DONE,
+@@ -156,7 +129,7 @@ enum i40e_state_t {
+ 	BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED)
+ 
+ /* VSI state flags */
+-enum i40e_vsi_state_t {
++enum i40e_vsi_state {
+ 	__I40E_VSI_DOWN,
+ 	__I40E_VSI_NEEDS_RESTART,
+ 	__I40E_VSI_SYNCING_FILTERS,
+@@ -992,6 +965,7 @@ struct i40e_q_vector {
+ 	struct rcu_head rcu;	/* to avoid race with update stats on free */
+ 	char name[I40E_INT_NAME_STR_LEN];
+ 	bool arm_wb_state;
++	bool in_busy_poll;
+ 	int irq_num;		/* IRQ assigned to this q_vector */
+ } ____cacheline_internodealigned_in_smp;
+ 
+@@ -1321,4 +1295,15 @@ static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
+ 	return pf->flags & I40E_FLAG_TC_MQPRIO;
+ }
+ 
++/**
++ * i40e_hw_to_pf - get pf pointer from the hardware structure
++ * @hw: pointer to the device HW structure
++ **/
++static inline struct i40e_pf *i40e_hw_to_pf(struct i40e_hw *hw)
++{
++	return container_of(hw, struct i40e_pf, hw);
++}
++
++struct device *i40e_hw_to_dev(struct i40e_hw *hw);
++
+ #endif /* _I40E_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+index 100eb77b8dfe6..9ce6e633cc2f0 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+@@ -1,9 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
+-#include "i40e_type.h"
++#include <linux/delay.h>
++#include "i40e_alloc.h"
+ #include "i40e_register.h"
+-#include "i40e_adminq.h"
+ #include "i40e_prototype.h"
+ 
+ static void i40e_resume_aq(struct i40e_hw *hw);
+@@ -51,7 +51,6 @@ static int i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
+ 	int ret_code;
+ 
+ 	ret_code = i40e_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
+-					 i40e_mem_atq_ring,
+ 					 (hw->aq.num_asq_entries *
+ 					 sizeof(struct i40e_aq_desc)),
+ 					 I40E_ADMINQ_DESC_ALIGNMENT);
+@@ -78,7 +77,6 @@ static int i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
+ 	int ret_code;
+ 
+ 	ret_code = i40e_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
+-					 i40e_mem_arq_ring,
+ 					 (hw->aq.num_arq_entries *
+ 					 sizeof(struct i40e_aq_desc)),
+ 					 I40E_ADMINQ_DESC_ALIGNMENT);
+@@ -136,7 +134,6 @@ static int i40e_alloc_arq_bufs(struct i40e_hw *hw)
+ 	for (i = 0; i < hw->aq.num_arq_entries; i++) {
+ 		bi = &hw->aq.arq.r.arq_bi[i];
+ 		ret_code = i40e_allocate_dma_mem(hw, bi,
+-						 i40e_mem_arq_buf,
+ 						 hw->aq.arq_buf_size,
+ 						 I40E_ADMINQ_DESC_ALIGNMENT);
+ 		if (ret_code)
+@@ -198,7 +195,6 @@ static int i40e_alloc_asq_bufs(struct i40e_hw *hw)
+ 	for (i = 0; i < hw->aq.num_asq_entries; i++) {
+ 		bi = &hw->aq.asq.r.asq_bi[i];
+ 		ret_code = i40e_allocate_dma_mem(hw, bi,
+-						 i40e_mem_asq_buf,
+ 						 hw->aq.asq_buf_size,
+ 						 I40E_ADMINQ_DESC_ALIGNMENT);
+ 		if (ret_code)
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+index 267f2e0a21ce8..80125bea80a2a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+@@ -4,7 +4,8 @@
+ #ifndef _I40E_ADMINQ_H_
+ #define _I40E_ADMINQ_H_
+ 
+-#include "i40e_osdep.h"
++#include <linux/mutex.h>
++#include "i40e_alloc.h"
+ #include "i40e_adminq_cmd.h"
+ 
+ #define I40E_ADMINQ_DESC(R, i)   \
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+index 3357d65a906bf..18a1c3b6d72c5 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+@@ -4,6 +4,8 @@
+ #ifndef _I40E_ADMINQ_CMD_H_
+ #define _I40E_ADMINQ_CMD_H_
+ 
++#include <linux/bits.h>
++
+ /* This header file defines the i40e Admin Queue commands and is shared between
+  * i40e Firmware and Software.
+  *
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+index a6c9a9e343d11..e0dde326255d6 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_alloc.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+@@ -4,25 +4,25 @@
+ #ifndef _I40E_ALLOC_H_
+ #define _I40E_ALLOC_H_
+ 
++#include <linux/types.h>
++
+ struct i40e_hw;
+ 
+-/* Memory allocation types */
+-enum i40e_memory_type {
+-	i40e_mem_arq_buf = 0,		/* ARQ indirect command buffer */
+-	i40e_mem_asq_buf = 1,
+-	i40e_mem_atq_buf = 2,		/* ATQ indirect command buffer */
+-	i40e_mem_arq_ring = 3,		/* ARQ descriptor ring */
+-	i40e_mem_atq_ring = 4,		/* ATQ descriptor ring */
+-	i40e_mem_pd = 5,		/* Page Descriptor */
+-	i40e_mem_bp = 6,		/* Backing Page - 4KB */
+-	i40e_mem_bp_jumbo = 7,		/* Backing Page - > 4KB */
+-	i40e_mem_reserved
++/* memory allocation tracking */
++struct i40e_dma_mem {
++	void *va;
++	dma_addr_t pa;
++	u32 size;
++};
++
++struct i40e_virt_mem {
++	void *va;
++	u32 size;
+ };
+ 
+ /* prototype for functions used for dynamic memory allocation */
+ int i40e_allocate_dma_mem(struct i40e_hw *hw,
+ 			  struct i40e_dma_mem *mem,
+-			  enum i40e_memory_type type,
+ 			  u64 size, u32 alignment);
+ int i40e_free_dma_mem(struct i40e_hw *hw,
+ 		      struct i40e_dma_mem *mem);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
+index 639c5a1ca853b..306758428aefd 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
+@@ -6,7 +6,6 @@
+ #include <linux/net/intel/i40e_client.h>
+ 
+ #include "i40e.h"
+-#include "i40e_prototype.h"
+ 
+ static LIST_HEAD(i40e_devices);
+ static DEFINE_MUTEX(i40e_device_mutex);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
+index 1b493854f5229..4d7caa1199719 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
+@@ -1,11 +1,15 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2021 Intel Corporation. */
+ 
+-#include "i40e.h"
+-#include "i40e_type.h"
+-#include "i40e_adminq.h"
+-#include "i40e_prototype.h"
+ #include <linux/avf/virtchnl.h>
++#include <linux/bitfield.h>
++#include <linux/delay.h>
++#include <linux/etherdevice.h>
++#include <linux/pci.h>
++#include "i40e_adminq_cmd.h"
++#include "i40e_devids.h"
++#include "i40e_prototype.h"
++#include "i40e_register.h"
+ 
+ /**
+  * i40e_set_mac_type - Sets MAC type
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+index f81e744c0fb36..d57dd30b024fa 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+@@ -1,9 +1,11 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2021 Intel Corporation. */
+ 
++#include <linux/bitfield.h>
+ #include "i40e_adminq.h"
+-#include "i40e_prototype.h"
++#include "i40e_alloc.h"
+ #include "i40e_dcb.h"
++#include "i40e_prototype.h"
+ 
+ /**
+  * i40e_get_dcbx_status
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+index 195421d863ab1..077a95dad32cf 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+@@ -2,8 +2,8 @@
+ /* Copyright(c) 2013 - 2021 Intel Corporation. */
+ 
+ #ifdef CONFIG_I40E_DCB
+-#include "i40e.h"
+ #include <net/dcbnl.h>
++#include "i40e.h"
+ 
+ #define I40E_DCBNL_STATUS_SUCCESS	0
+ #define I40E_DCBNL_STATUS_ERROR		1
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
+index 0e72abd178ae3..21b3518c40968 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
+@@ -1,9 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
++#include <linux/firmware.h>
+ #include "i40e.h"
+ 
+-#include <linux/firmware.h>
+ 
+ /**
+  * i40e_ddp_profiles_eq - checks if DDP profiles are the equivalent
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_debug.h b/drivers/net/ethernet/intel/i40e/i40e_debug.h
+new file mode 100644
+index 0000000000000..27ebc72d8bfe5
+--- /dev/null
++++ b/drivers/net/ethernet/intel/i40e/i40e_debug.h
+@@ -0,0 +1,47 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* Copyright(c) 2023 Intel Corporation. */
++
++#ifndef _I40E_DEBUG_H_
++#define _I40E_DEBUG_H_
++
++#include <linux/dev_printk.h>
++
++/* debug masks - set these bits in hw->debug_mask to control output */
++enum i40e_debug_mask {
++	I40E_DEBUG_INIT			= 0x00000001,
++	I40E_DEBUG_RELEASE		= 0x00000002,
++
++	I40E_DEBUG_LINK			= 0x00000010,
++	I40E_DEBUG_PHY			= 0x00000020,
++	I40E_DEBUG_HMC			= 0x00000040,
++	I40E_DEBUG_NVM			= 0x00000080,
++	I40E_DEBUG_LAN			= 0x00000100,
++	I40E_DEBUG_FLOW			= 0x00000200,
++	I40E_DEBUG_DCB			= 0x00000400,
++	I40E_DEBUG_DIAG			= 0x00000800,
++	I40E_DEBUG_FD			= 0x00001000,
++	I40E_DEBUG_PACKAGE		= 0x00002000,
++	I40E_DEBUG_IWARP		= 0x00F00000,
++	I40E_DEBUG_AQ_MESSAGE		= 0x01000000,
++	I40E_DEBUG_AQ_DESCRIPTOR	= 0x02000000,
++	I40E_DEBUG_AQ_DESC_BUFFER	= 0x04000000,
++	I40E_DEBUG_AQ_COMMAND		= 0x06000000,
++	I40E_DEBUG_AQ			= 0x0F000000,
++
++	I40E_DEBUG_USER			= 0xF0000000,
++
++	I40E_DEBUG_ALL			= 0xFFFFFFFF
++};
++
++struct i40e_hw;
++struct device *i40e_hw_to_dev(struct i40e_hw *hw);
++
++#define hw_dbg(hw, S, A...) dev_dbg(i40e_hw_to_dev(hw), S, ##A)
++
++#define i40e_debug(h, m, s, ...)				\
++do {								\
++	if (((m) & (h)->debug_mask))				\
++		dev_info(i40e_hw_to_dev(hw), s, ##__VA_ARGS__);	\
++} while (0)
++
++#endif /* _I40E_DEBUG_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+index 1a497cb077100..999c9708def53 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+@@ -5,8 +5,9 @@
+ 
+ #include <linux/fs.h>
+ #include <linux/debugfs.h>
+-
++#include <linux/if_bridge.h>
+ #include "i40e.h"
++#include "i40e_virtchnl_pf.h"
+ 
+ static struct dentry *i40e_dbg_root;
+ 
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
+index c3ce5f35211f0..ece3a6b9a5c61 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h
+@@ -4,7 +4,10 @@
+ #ifndef _I40E_DIAG_H_
+ #define _I40E_DIAG_H_
+ 
+-#include "i40e_type.h"
++#include "i40e_adminq_cmd.h"
++
++/* forward-declare the HW struct for the compiler */
++struct i40e_hw;
+ 
+ enum i40e_lb_mode {
+ 	I40E_LB_MODE_NONE       = 0x0,
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+index bd1321bf7e268..4e90570ba7803 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+@@ -3,9 +3,10 @@
+ 
+ /* ethtool support for i40e */
+ 
+-#include "i40e.h"
++#include "i40e_devids.h"
+ #include "i40e_diag.h"
+ #include "i40e_txrx_common.h"
++#include "i40e_virtchnl_pf.h"
+ 
+ /* ethtool statistics helpers */
+ 
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+index 96ee63aca7a10..1742624ca62ed 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+@@ -1,10 +1,8 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
+-#include "i40e.h"
+-#include "i40e_osdep.h"
+-#include "i40e_register.h"
+ #include "i40e_alloc.h"
++#include "i40e_debug.h"
+ #include "i40e_hmc.h"
+ #include "i40e_type.h"
+ 
+@@ -22,7 +20,6 @@ int i40e_add_sd_table_entry(struct i40e_hw *hw,
+ 			    enum i40e_sd_entry_type type,
+ 			    u64 direct_mode_sz)
+ {
+-	enum i40e_memory_type mem_type __attribute__((unused));
+ 	struct i40e_hmc_sd_entry *sd_entry;
+ 	bool dma_mem_alloc_done = false;
+ 	struct i40e_dma_mem mem;
+@@ -43,16 +40,13 @@ int i40e_add_sd_table_entry(struct i40e_hw *hw,
+ 
+ 	sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+ 	if (!sd_entry->valid) {
+-		if (I40E_SD_TYPE_PAGED == type) {
+-			mem_type = i40e_mem_pd;
++		if (type == I40E_SD_TYPE_PAGED)
+ 			alloc_len = I40E_HMC_PAGED_BP_SIZE;
+-		} else {
+-			mem_type = i40e_mem_bp_jumbo;
++		else
+ 			alloc_len = direct_mode_sz;
+-		}
+ 
+ 		/* allocate a 4K pd page or 2M backing page */
+-		ret_code = i40e_allocate_dma_mem(hw, &mem, mem_type, alloc_len,
++		ret_code = i40e_allocate_dma_mem(hw, &mem, alloc_len,
+ 						 I40E_HMC_PD_BP_BUF_ALIGNMENT);
+ 		if (ret_code)
+ 			goto exit;
+@@ -140,7 +134,7 @@ int i40e_add_pd_table_entry(struct i40e_hw *hw,
+ 			page = rsrc_pg;
+ 		} else {
+ 			/* allocate a 4K backing page */
+-			ret_code = i40e_allocate_dma_mem(hw, page, i40e_mem_bp,
++			ret_code = i40e_allocate_dma_mem(hw, page,
+ 						I40E_HMC_PAGED_BP_SIZE,
+ 						I40E_HMC_PD_BP_BUF_ALIGNMENT);
+ 			if (ret_code)
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+index 9960da07a5732..480e3a883cc7a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+@@ -4,6 +4,10 @@
+ #ifndef _I40E_HMC_H_
+ #define _I40E_HMC_H_
+ 
++#include "i40e_alloc.h"
++#include "i40e_io.h"
++#include "i40e_register.h"
++
+ #define I40E_HMC_MAX_BP_COUNT 512
+ 
+ /* forward-declare the HW struct for the compiler */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_io.h b/drivers/net/ethernet/intel/i40e/i40e_io.h
+new file mode 100644
+index 0000000000000..2a2ed9a1d476b
+--- /dev/null
++++ b/drivers/net/ethernet/intel/i40e/i40e_io.h
+@@ -0,0 +1,16 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* Copyright(c) 2023 Intel Corporation. */
++
++#ifndef _I40E_IO_H_
++#define _I40E_IO_H_
++
++/* get readq/writeq support for 32 bit kernels, use the low-first version */
++#include <linux/io-64-nonatomic-lo-hi.h>
++
++#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
++#define rd32(a, reg)		readl((a)->hw_addr + (reg))
++
++#define rd64(a, reg)		readq((a)->hw_addr + (reg))
++#define i40e_flush(a)		readl((a)->hw_addr + I40E_GLGEN_STAT)
++
++#endif /* _I40E_IO_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+index 474365bf06480..beaaf5c309d51 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+@@ -1,13 +1,10 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
+-#include "i40e.h"
+-#include "i40e_osdep.h"
+-#include "i40e_register.h"
+-#include "i40e_type.h"
+-#include "i40e_hmc.h"
++#include "i40e_alloc.h"
++#include "i40e_debug.h"
+ #include "i40e_lan_hmc.h"
+-#include "i40e_prototype.h"
++#include "i40e_type.h"
+ 
+ /* lan specific interface functions */
+ 
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+index 9f960404c2b37..305a276953b01 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+@@ -4,6 +4,8 @@
+ #ifndef _I40E_LAN_HMC_H_
+ #define _I40E_LAN_HMC_H_
+ 
++#include "i40e_hmc.h"
++
+ /* forward-declare the HW struct for the compiler */
+ struct i40e_hw;
+ 
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index ae32e83a69902..a21fc92aa2725 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -1,19 +1,22 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2021 Intel Corporation. */
+ 
+-#include <linux/etherdevice.h>
+-#include <linux/of_net.h>
+-#include <linux/pci.h>
+-#include <linux/bpf.h>
+ #include <generated/utsrelease.h>
+ #include <linux/crash_dump.h>
++#include <linux/if_bridge.h>
++#include <linux/if_macvlan.h>
++#include <linux/module.h>
++#include <net/pkt_cls.h>
++#include <net/xdp_sock_drv.h>
+ 
+ /* Local includes */
+ #include "i40e.h"
++#include "i40e_devids.h"
+ #include "i40e_diag.h"
++#include "i40e_lan_hmc.h"
++#include "i40e_virtchnl_pf.h"
+ #include "i40e_xsk.h"
+-#include <net/udp_tunnel.h>
+-#include <net/xdp_sock_drv.h>
++
+ /* All i40e tracepoints are defined by the include below, which
+  * must be included exactly once across the whole kernel with
+  * CREATE_TRACE_POINTS defined
+@@ -126,16 +129,27 @@ static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f,
+ }
+ 
+ /**
+- * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code
++ * i40e_hw_to_dev - get device pointer from the hardware structure
++ * @hw: pointer to the device HW structure
++ **/
++struct device *i40e_hw_to_dev(struct i40e_hw *hw)
++{
++	struct i40e_pf *pf = i40e_hw_to_pf(hw);
++
++	return &pf->pdev->dev;
++}
++
++/**
++ * i40e_allocate_dma_mem - OS specific memory alloc for shared code
+  * @hw:   pointer to the HW structure
+  * @mem:  ptr to mem struct to fill out
+  * @size: size of memory requested
+  * @alignment: what to align the allocation to
+  **/
+-int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
+-			    u64 size, u32 alignment)
++int i40e_allocate_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem,
++			  u64 size, u32 alignment)
+ {
+-	struct i40e_pf *pf = (struct i40e_pf *)hw->back;
++	struct i40e_pf *pf = i40e_hw_to_pf(hw);
+ 
+ 	mem->size = ALIGN(size, alignment);
+ 	mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa,
+@@ -147,13 +161,13 @@ int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
+ }
+ 
+ /**
+- * i40e_free_dma_mem_d - OS specific memory free for shared code
++ * i40e_free_dma_mem - OS specific memory free for shared code
+  * @hw:   pointer to the HW structure
+  * @mem:  ptr to mem struct to free
+  **/
+-int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
++int i40e_free_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem)
+ {
+-	struct i40e_pf *pf = (struct i40e_pf *)hw->back;
++	struct i40e_pf *pf = i40e_hw_to_pf(hw);
+ 
+ 	dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa);
+ 	mem->va = NULL;
+@@ -164,13 +178,13 @@ int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
+ }
+ 
+ /**
+- * i40e_allocate_virt_mem_d - OS specific memory alloc for shared code
++ * i40e_allocate_virt_mem - OS specific memory alloc for shared code
+  * @hw:   pointer to the HW structure
+  * @mem:  ptr to mem struct to fill out
+  * @size: size of memory requested
+  **/
+-int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
+-			     u32 size)
++int i40e_allocate_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem,
++			   u32 size)
+ {
+ 	mem->size = size;
+ 	mem->va = kzalloc(size, GFP_KERNEL);
+@@ -182,11 +196,11 @@ int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
+ }
+ 
+ /**
+- * i40e_free_virt_mem_d - OS specific memory free for shared code
++ * i40e_free_virt_mem - OS specific memory free for shared code
+  * @hw:   pointer to the HW structure
+  * @mem:  ptr to mem struct to free
+  **/
+-int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
++int i40e_free_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem)
+ {
+ 	/* it's ok to kfree a NULL pointer */
+ 	kfree(mem->va);
+@@ -1249,8 +1263,11 @@ int i40e_count_filters(struct i40e_vsi *vsi)
+ 	int bkt;
+ 	int cnt = 0;
+ 
+-	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+-		++cnt;
++	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
++		if (f->state == I40E_FILTER_NEW ||
++		    f->state == I40E_FILTER_ACTIVE)
++			++cnt;
++	}
+ 
+ 	return cnt;
+ }
+@@ -3905,6 +3922,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
+ 		     q_vector->tx.target_itr >> 1);
+ 		q_vector->tx.current_itr = q_vector->tx.target_itr;
+ 
++		/* Set ITR for software interrupts triggered after exiting
++		 * busy-loop polling.
++		 */
++		wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1),
++		     I40E_ITR_20K);
++
+ 		wr32(hw, I40E_PFINT_RATEN(vector - 1),
+ 		     i40e_intrl_usec_to_reg(vsi->int_rate_limit));
+ 
+@@ -15644,10 +15667,10 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
+  **/
+ static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw)
+ {
+-	struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev;
++	struct i40e_pf *pf = i40e_hw_to_pf(hw);
+ 
+-	hw->subsystem_device_id = pdev->subsystem_device ?
+-		pdev->subsystem_device :
++	hw->subsystem_device_id = pf->pdev->subsystem_device ?
++		pf->pdev->subsystem_device :
+ 		(ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX);
+ }
+ 
+@@ -15717,7 +15740,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ 	set_bit(__I40E_DOWN, pf->state);
+ 
+ 	hw = &pf->hw;
+-	hw->back = pf;
+ 
+ 	pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
+ 				I40E_MAX_CSR_SPACE);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+index 07a46adeab38e..e5aec09d58e27 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+@@ -1,6 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
++#include <linux/bitfield.h>
++#include <linux/delay.h>
++#include "i40e_alloc.h"
+ #include "i40e_prototype.h"
+ 
+ /**
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+deleted file mode 100644
+index 2bd4de03dafa2..0000000000000
+--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
++++ /dev/null
+@@ -1,59 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/* Copyright(c) 2013 - 2018 Intel Corporation. */
+-
+-#ifndef _I40E_OSDEP_H_
+-#define _I40E_OSDEP_H_
+-
+-#include <linux/types.h>
+-#include <linux/if_ether.h>
+-#include <linux/if_vlan.h>
+-#include <linux/tcp.h>
+-#include <linux/pci.h>
+-#include <linux/highuid.h>
+-
+-/* get readq/writeq support for 32 bit kernels, use the low-first version */
+-#include <linux/io-64-nonatomic-lo-hi.h>
+-
+-/* File to be the magic between shared code and
+- * actual OS primitives
+- */
+-
+-#define hw_dbg(hw, S, A...)							\
+-do {										\
+-	dev_dbg(&((struct i40e_pf *)hw->back)->pdev->dev, S, ##A);		\
+-} while (0)
+-
+-#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
+-#define rd32(a, reg)		readl((a)->hw_addr + (reg))
+-
+-#define rd64(a, reg)		readq((a)->hw_addr + (reg))
+-#define i40e_flush(a)		readl((a)->hw_addr + I40E_GLGEN_STAT)
+-
+-/* memory allocation tracking */
+-struct i40e_dma_mem {
+-	void *va;
+-	dma_addr_t pa;
+-	u32 size;
+-};
+-
+-#define i40e_allocate_dma_mem(h, m, unused, s, a) \
+-			i40e_allocate_dma_mem_d(h, m, s, a)
+-#define i40e_free_dma_mem(h, m) i40e_free_dma_mem_d(h, m)
+-
+-struct i40e_virt_mem {
+-	void *va;
+-	u32 size;
+-};
+-
+-#define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s)
+-#define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m)
+-
+-#define i40e_debug(h, m, s, ...)				\
+-do {								\
+-	if (((m) & (h)->debug_mask))				\
+-		pr_info("i40e %02x:%02x.%x " s,			\
+-			(h)->bus.bus_id, (h)->bus.device,	\
+-			(h)->bus.func, ##__VA_ARGS__);		\
+-} while (0)
+-
+-#endif /* _I40E_OSDEP_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+index 3eeee224f1fb2..2001fefa0c52d 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+@@ -4,9 +4,9 @@
+ #ifndef _I40E_PROTOTYPE_H_
+ #define _I40E_PROTOTYPE_H_
+ 
+-#include "i40e_type.h"
+-#include "i40e_alloc.h"
+ #include <linux/avf/virtchnl.h>
++#include "i40e_debug.h"
++#include "i40e_type.h"
+ 
+ /* Prototypes for shared code functions that are not in
+  * the standard function pointer structures.  These are
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+index 8a26811140b47..65c714d0bfffd 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+@@ -1,9 +1,10 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
+-#include "i40e.h"
+ #include <linux/ptp_classify.h>
+ #include <linux/posix-clock.h>
++#include "i40e.h"
++#include "i40e_devids.h"
+ 
+ /* The XL710 timesync is very much like Intel's 82599 design when it comes to
+  * the fundamental clock design. However, the clock operations are much simpler
+@@ -34,7 +35,7 @@ enum i40e_ptp_pin {
+ 	GPIO_4
+ };
+ 
+-enum i40e_can_set_pins_t {
++enum i40e_can_set_pins {
+ 	CANT_DO_PINS = -1,
+ 	CAN_SET_PINS,
+ 	CAN_DO_PINS
+@@ -192,7 +193,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw)
+  * return CAN_DO_PINS if pins can be manipulated within a NIC or
+  * return CANT_DO_PINS otherwise.
+  **/
+-static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf)
++static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf)
+ {
+ 	if (!i40e_is_ptp_pin_dev(&pf->hw)) {
+ 		dev_warn(&pf->pdev->dev,
+@@ -1070,7 +1071,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf)
+ static int i40e_ptp_set_pins(struct i40e_pf *pf,
+ 			     struct i40e_ptp_pins_settings *pins)
+ {
+-	enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf);
++	enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf);
+ 	int i = 0;
+ 
+ 	if (pin_caps == CANT_DO_PINS)
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
+index 7339003aa17cd..989c186824733 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
+@@ -202,7 +202,9 @@
+ #define I40E_GLGEN_MSCA_DEVADD_SHIFT 16
+ #define I40E_GLGEN_MSCA_PHYADD_SHIFT 21
+ #define I40E_GLGEN_MSCA_OPCODE_SHIFT 26
++#define I40E_GLGEN_MSCA_OPCODE_MASK(_i) I40E_MASK(_i, I40E_GLGEN_MSCA_OPCODE_SHIFT)
+ #define I40E_GLGEN_MSCA_STCODE_SHIFT 28
++#define I40E_GLGEN_MSCA_STCODE_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_STCODE_SHIFT)
+ #define I40E_GLGEN_MSCA_MDICMD_SHIFT 30
+ #define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT)
+ #define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31
+@@ -328,8 +330,11 @@
+ #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
+ #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
+ #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
++#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
+ #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
+ #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
++#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
++#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
+ #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
+ #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
+ #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+index 1df2f93388128..c962987d8b51b 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+@@ -1,14 +1,13 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
+-#include <linux/prefetch.h>
+ #include <linux/bpf_trace.h>
++#include <linux/prefetch.h>
++#include <linux/sctp.h>
+ #include <net/mpls.h>
+ #include <net/xdp.h>
+-#include "i40e.h"
+-#include "i40e_trace.h"
+-#include "i40e_prototype.h"
+ #include "i40e_txrx_common.h"
++#include "i40e_trace.h"
+ #include "i40e_xsk.h"
+ 
+ #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
+@@ -2644,7 +2643,22 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
+ 	return failure ? budget : (int)total_rx_packets;
+ }
+ 
+-static inline u32 i40e_buildreg_itr(const int type, u16 itr)
++/**
++ * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register
++ * @itr_idx: interrupt throttling index
++ * @interval: interrupt throttling interval value in usecs
++ * @force_swint: force software interrupt
++ *
++ * The function builds a value for I40E_PFINT_DYN_CTLN register that
++ * is used to update interrupt throttling interval for specified ITR index
++ * and optionally enforces a software interrupt. If the @itr_idx is equal
++ * to I40E_ITR_NONE then no interval change is applied and only @force_swint
++ * parameter is taken into account. If the interval change and enforced
++ * software interrupt are not requested then the built value just enables
++ * appropriate vector interrupt.
++ **/
++static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval,
++			     bool force_swint)
+ {
+ 	u32 val;
+ 
+@@ -2658,23 +2672,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+ 	 * an event in the PBA anyway so we need to rely on the automask
+ 	 * to hold pending events for us until the interrupt is re-enabled
+ 	 *
+-	 * The itr value is reported in microseconds, and the register
+-	 * value is recorded in 2 microsecond units. For this reason we
+-	 * only need to shift by the interval shift - 1 instead of the
+-	 * full value.
++	 * We have to shift the given value as it is reported in microseconds
++	 * and the register value is recorded in 2 microsecond units.
+ 	 */
+-	itr &= I40E_ITR_MASK;
++	interval >>= 1;
+ 
++	/* 1. Enable vector interrupt
++	 * 2. Update the interval for the specified ITR index
++	 *    (I40E_ITR_NONE in the register is used to indicate that
++	 *     no interval update is requested)
++	 */
+ 	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+-	      (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
+-	      (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
++	      FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) |
++	      FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval);
++
++	/* 3. Enforce software interrupt trigger if requested
++	 *    (These software interrupts rate is limited by ITR2 that is
++	 *     set to 20K interrupts per second)
++	 */
++	if (force_swint)
++		val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
++		       I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
++		       FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK,
++				  I40E_SW_ITR);
+ 
+ 	return val;
+ }
+ 
+-/* a small macro to shorten up some long lines */
+-#define INTREG I40E_PFINT_DYN_CTLN
+-
+ /* The act of updating the ITR will cause it to immediately trigger. In order
+  * to prevent this from throwing off adaptive update statistics we defer the
+  * update so that it can only happen so often. So after either Tx or Rx are
+@@ -2693,8 +2717,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
+ 					  struct i40e_q_vector *q_vector)
+ {
++	enum i40e_dyn_idx itr_idx = I40E_ITR_NONE;
+ 	struct i40e_hw *hw = &vsi->back->hw;
+-	u32 intval;
++	u16 interval = 0;
++	u32 itr_val;
+ 
+ 	/* If we don't have MSIX, then we only need to re-enable icr0 */
+ 	if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
+@@ -2716,8 +2742,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
+ 	 */
+ 	if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+ 		/* Rx ITR needs to be reduced, this is highest priority */
+-		intval = i40e_buildreg_itr(I40E_RX_ITR,
+-					   q_vector->rx.target_itr);
++		itr_idx = I40E_RX_ITR;
++		interval = q_vector->rx.target_itr;
+ 		q_vector->rx.current_itr = q_vector->rx.target_itr;
+ 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ 	} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+@@ -2726,25 +2752,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
+ 		/* Tx ITR needs to be reduced, this is second priority
+ 		 * Tx ITR needs to be increased more than Rx, fourth priority
+ 		 */
+-		intval = i40e_buildreg_itr(I40E_TX_ITR,
+-					   q_vector->tx.target_itr);
++		itr_idx = I40E_TX_ITR;
++		interval = q_vector->tx.target_itr;
+ 		q_vector->tx.current_itr = q_vector->tx.target_itr;
+ 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ 	} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+ 		/* Rx ITR needs to be increased, third priority */
+-		intval = i40e_buildreg_itr(I40E_RX_ITR,
+-					   q_vector->rx.target_itr);
++		itr_idx = I40E_RX_ITR;
++		interval = q_vector->rx.target_itr;
+ 		q_vector->rx.current_itr = q_vector->rx.target_itr;
+ 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+ 	} else {
+ 		/* No ITR update, lowest priority */
+-		intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+ 		if (q_vector->itr_countdown)
+ 			q_vector->itr_countdown--;
+ 	}
+ 
+-	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
+-		wr32(hw, INTREG(q_vector->reg_idx), intval);
++	/* Do not update interrupt control register if VSI is down */
++	if (test_bit(__I40E_VSI_DOWN, vsi->state))
++		return;
++
++	/* Update ITR interval if necessary and enforce software interrupt
++	 * if we are exiting busy poll.
++	 */
++	if (q_vector->in_busy_poll) {
++		itr_val = i40e_buildreg_itr(itr_idx, interval, true);
++		q_vector->in_busy_poll = false;
++	} else {
++		itr_val = i40e_buildreg_itr(itr_idx, interval, false);
++	}
++	wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val);
+ }
+ 
+ /**
+@@ -2859,6 +2896,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
+ 	 */
+ 	if (likely(napi_complete_done(napi, work_done)))
+ 		i40e_update_enable_itr(vsi, q_vector);
++	else
++		q_vector->in_busy_poll = true;
+ 
+ 	return min(work_done, budget - 1);
+ }
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+index 900b0d9ede9f5..2b1d50873a4d1 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+@@ -5,6 +5,7 @@
+ #define _I40E_TXRX_H_
+ 
+ #include <net/xdp.h>
++#include "i40e_type.h"
+ 
+ /* Interrupt Throttling and Rate Limiting Goodies */
+ #define I40E_DEFAULT_IRQ_WORK      256
+@@ -57,7 +58,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl)
+  * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+  * register but instead is a special value meaning "don't update" ITR0/1/2.
+  */
+-enum i40e_dyn_idx_t {
++enum i40e_dyn_idx {
+ 	I40E_IDX_ITR0 = 0,
+ 	I40E_IDX_ITR1 = 1,
+ 	I40E_IDX_ITR2 = 2,
+@@ -67,6 +68,7 @@ enum i40e_dyn_idx_t {
+ /* these are indexes into ITRN registers */
+ #define I40E_RX_ITR    I40E_IDX_ITR0
+ #define I40E_TX_ITR    I40E_IDX_ITR1
++#define I40E_SW_ITR    I40E_IDX_ITR2
+ 
+ /* Supported RSS offloads */
+ #define I40E_DEFAULT_RSS_HENA ( \
+@@ -305,7 +307,7 @@ struct i40e_rx_queue_stats {
+ 	u64 page_busy_count;
+ };
+ 
+-enum i40e_ring_state_t {
++enum i40e_ring_state {
+ 	__I40E_TX_FDIR_INIT_DONE,
+ 	__I40E_TX_XPS_INIT_DONE,
+ 	__I40E_RING_STATE_NBITS /* must be last */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+index 8c5118c8baafb..e26807fd21232 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+@@ -4,6 +4,8 @@
+ #ifndef I40E_TXRX_COMMON_
+ #define I40E_TXRX_COMMON_
+ 
++#include "i40e.h"
++
+ int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
+ void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+ 				   u64 qword1);
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
+index 232131bedc3e7..4092f82bcfb12 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
+@@ -4,12 +4,9 @@
+ #ifndef _I40E_TYPE_H_
+ #define _I40E_TYPE_H_
+ 
+-#include "i40e_osdep.h"
+-#include "i40e_register.h"
++#include <uapi/linux/if_ether.h>
+ #include "i40e_adminq.h"
+ #include "i40e_hmc.h"
+-#include "i40e_lan_hmc.h"
+-#include "i40e_devids.h"
+ 
+ /* I40E_MASK is a macro used on 32 bit registers */
+ #define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
+@@ -43,48 +40,14 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *);
+ #define I40E_QTX_CTL_VM_QUEUE	0x1
+ #define I40E_QTX_CTL_PF_QUEUE	0x2
+ 
+-/* debug masks - set these bits in hw->debug_mask to control output */
+-enum i40e_debug_mask {
+-	I40E_DEBUG_INIT			= 0x00000001,
+-	I40E_DEBUG_RELEASE		= 0x00000002,
+-
+-	I40E_DEBUG_LINK			= 0x00000010,
+-	I40E_DEBUG_PHY			= 0x00000020,
+-	I40E_DEBUG_HMC			= 0x00000040,
+-	I40E_DEBUG_NVM			= 0x00000080,
+-	I40E_DEBUG_LAN			= 0x00000100,
+-	I40E_DEBUG_FLOW			= 0x00000200,
+-	I40E_DEBUG_DCB			= 0x00000400,
+-	I40E_DEBUG_DIAG			= 0x00000800,
+-	I40E_DEBUG_FD			= 0x00001000,
+-	I40E_DEBUG_PACKAGE		= 0x00002000,
+-	I40E_DEBUG_IWARP		= 0x00F00000,
+-	I40E_DEBUG_AQ_MESSAGE		= 0x01000000,
+-	I40E_DEBUG_AQ_DESCRIPTOR	= 0x02000000,
+-	I40E_DEBUG_AQ_DESC_BUFFER	= 0x04000000,
+-	I40E_DEBUG_AQ_COMMAND		= 0x06000000,
+-	I40E_DEBUG_AQ			= 0x0F000000,
+-
+-	I40E_DEBUG_USER			= 0xF0000000,
+-
+-	I40E_DEBUG_ALL			= 0xFFFFFFFF
+-};
+-
+-#define I40E_MDIO_CLAUSE22_STCODE_MASK	I40E_MASK(1, \
+-						  I40E_GLGEN_MSCA_STCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK	I40E_MASK(1, \
+-						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK	I40E_MASK(2, \
+-						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+-
+-#define I40E_MDIO_CLAUSE45_STCODE_MASK	I40E_MASK(0, \
+-						  I40E_GLGEN_MSCA_STCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK	I40E_MASK(0, \
+-						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK	I40E_MASK(1, \
+-						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+-#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK	I40E_MASK(3, \
+-						I40E_GLGEN_MSCA_OPCODE_SHIFT)
++#define I40E_MDIO_CLAUSE22_STCODE_MASK		I40E_GLGEN_MSCA_STCODE_MASK
++#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK	I40E_GLGEN_MSCA_OPCODE_MASK(1)
++#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK	I40E_GLGEN_MSCA_OPCODE_MASK(2)
++
++#define I40E_MDIO_CLAUSE45_STCODE_MASK		I40E_GLGEN_MSCA_STCODE_MASK
++#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK	I40E_GLGEN_MSCA_OPCODE_MASK(0)
++#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK	I40E_GLGEN_MSCA_OPCODE_MASK(1)
++#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK	I40E_GLGEN_MSCA_OPCODE_MASK(3)
+ 
+ #define I40E_PHY_COM_REG_PAGE                   0x1E
+ #define I40E_PHY_LED_LINK_MODE_MASK             0xF0
+@@ -525,7 +488,6 @@ struct i40e_dcbx_config {
+ /* Port hardware description */
+ struct i40e_hw {
+ 	u8 __iomem *hw_addr;
+-	void *back;
+ 
+ 	/* subsystem structs */
+ 	struct i40e_phy_info phy;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+index 082c099209995..7d47a05274548 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+@@ -2,6 +2,8 @@
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
+ #include "i40e.h"
++#include "i40e_lan_hmc.h"
++#include "i40e_virtchnl_pf.h"
+ 
+ /*********************notification routines***********************/
+ 
+@@ -1628,8 +1630,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ {
+ 	struct i40e_hw *hw = &pf->hw;
+ 	struct i40e_vf *vf;
+-	int i, v;
+ 	u32 reg;
++	int i;
+ 
+ 	/* If we don't have any VFs, then there is nothing to reset */
+ 	if (!pf->num_alloc_vfs)
+@@ -1640,11 +1642,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ 		return false;
+ 
+ 	/* Begin reset on all VFs at once */
+-	for (v = 0; v < pf->num_alloc_vfs; v++) {
+-		vf = &pf->vf[v];
++	for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+ 		/* If VF is being reset no need to trigger reset again */
+ 		if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+-			i40e_trigger_vf_reset(&pf->vf[v], flr);
++			i40e_trigger_vf_reset(vf, flr);
+ 	}
+ 
+ 	/* HW requires some time to make sure it can flush the FIFO for a VF
+@@ -1653,14 +1654,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ 	 * the VFs using a simple iterator that increments once that VF has
+ 	 * finished resetting.
+ 	 */
+-	for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
++	for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) {
+ 		usleep_range(10000, 20000);
+ 
+ 		/* Check each VF in sequence, beginning with the VF to fail
+ 		 * the previous check.
+ 		 */
+-		while (v < pf->num_alloc_vfs) {
+-			vf = &pf->vf[v];
++		while (vf < &pf->vf[pf->num_alloc_vfs]) {
+ 			if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
+ 				reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
+ 				if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
+@@ -1670,7 +1670,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ 			/* If the current VF has finished resetting, move on
+ 			 * to the next VF in sequence.
+ 			 */
+-			v++;
++			++vf;
+ 		}
+ 	}
+ 
+@@ -1680,39 +1680,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ 	/* Display a warning if at least one VF didn't manage to reset in
+ 	 * time, but continue on with the operation.
+ 	 */
+-	if (v < pf->num_alloc_vfs)
++	if (vf < &pf->vf[pf->num_alloc_vfs])
+ 		dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
+-			pf->vf[v].vf_id);
++			vf->vf_id);
+ 	usleep_range(10000, 20000);
+ 
+ 	/* Begin disabling all the rings associated with VFs, but do not wait
+ 	 * between each VF.
+ 	 */
+-	for (v = 0; v < pf->num_alloc_vfs; v++) {
++	for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+ 		/* On initial reset, we don't have any queues to disable */
+-		if (pf->vf[v].lan_vsi_idx == 0)
++		if (vf->lan_vsi_idx == 0)
+ 			continue;
+ 
+ 		/* If VF is reset in another thread just continue */
+ 		if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+ 			continue;
+ 
+-		i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
++		i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]);
+ 	}
+ 
+ 	/* Now that we've notified HW to disable all of the VF rings, wait
+ 	 * until they finish.
+ 	 */
+-	for (v = 0; v < pf->num_alloc_vfs; v++) {
++	for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+ 		/* On initial reset, we don't have any queues to disable */
+-		if (pf->vf[v].lan_vsi_idx == 0)
++		if (vf->lan_vsi_idx == 0)
+ 			continue;
+ 
+ 		/* If VF is reset in another thread just continue */
+ 		if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+ 			continue;
+ 
+-		i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
++		i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]);
+ 	}
+ 
+ 	/* Hw may need up to 50ms to finish disabling the RX queues. We
+@@ -1721,12 +1721,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+ 	mdelay(50);
+ 
+ 	/* Finish the reset on each VF */
+-	for (v = 0; v < pf->num_alloc_vfs; v++) {
++	for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
+ 		/* If VF is reset in another thread just continue */
+ 		if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+ 			continue;
+ 
+-		i40e_cleanup_reset_vf(&pf->vf[v]);
++		i40e_cleanup_reset_vf(vf);
+ 	}
+ 
+ 	i40e_flush(hw);
+@@ -3143,11 +3143,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
+ 		/* Allow to delete VF primary MAC only if it was not set
+ 		 * administratively by PF or if VF is trusted.
+ 		 */
+-		if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
+-		    i40e_can_vf_change_mac(vf))
+-			was_unimac_deleted = true;
+-		else
+-			continue;
++		if (ether_addr_equal(addr, vf->default_lan_addr.addr)) {
++			if (i40e_can_vf_change_mac(vf))
++				was_unimac_deleted = true;
++			else
++				continue;
++		}
+ 
+ 		if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
+ 			ret = -EINVAL;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+index cf190762421cc..66f95e2f3146a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+@@ -4,7 +4,9 @@
+ #ifndef _I40E_VIRTCHNL_PF_H_
+ #define _I40E_VIRTCHNL_PF_H_
+ 
+-#include "i40e.h"
++#include <linux/avf/virtchnl.h>
++#include <linux/netdevice.h>
++#include "i40e_type.h"
+ 
+ #define I40E_MAX_VLANID 4095
+ 
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+index 1f8ae6f5d9807..65f38a57b3dfe 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+@@ -2,11 +2,7 @@
+ /* Copyright(c) 2018 Intel Corporation. */
+ 
+ #include <linux/bpf_trace.h>
+-#include <linux/stringify.h>
+ #include <net/xdp_sock_drv.h>
+-#include <net/xdp.h>
+-
+-#include "i40e.h"
+ #include "i40e_txrx_common.h"
+ #include "i40e_xsk.h"
+ 
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+index 821df248f8bee..ef156fad52f26 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+@@ -4,6 +4,8 @@
+ #ifndef _I40E_XSK_H_
+ #define _I40E_XSK_H_
+ 
++#include <linux/types.h>
++
+ /* This value should match the pragma in the loop_unrolled_for
+  * macro. Why 4? It is strictly empirical. It seems to be a good
+  * compromise between the advantage of having simultaneous outstanding
+@@ -20,7 +22,9 @@
+ #define loop_unrolled_for for
+ #endif
+ 
++struct i40e_ring;
+ struct i40e_vsi;
++struct net_device;
+ struct xsk_buff_pool;
+ 
+ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c
+index 1afd761d80520..f7988cf5efa58 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_common.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_common.c
+@@ -1,10 +1,11 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
++#include <linux/avf/virtchnl.h>
++#include <linux/bitfield.h>
+ #include "iavf_type.h"
+ #include "iavf_adminq.h"
+ #include "iavf_prototype.h"
+-#include <linux/avf/virtchnl.h>
+ 
+ /**
+  * iavf_set_mac_type - Sets MAC type
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 892c6a4f03bb8..1ac97bd606e38 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -1,11 +1,12 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
++#include <linux/bitfield.h>
++#include <linux/uaccess.h>
++
+ /* ethtool support for iavf */
+ #include "iavf.h"
+ 
+-#include <linux/uaccess.h>
+-
+ /* ethtool statistics helpers */
+ 
+ /**
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+index 03e774bd2a5b4..65ddcd81c993e 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+@@ -3,6 +3,7 @@
+ 
+ /* flow director ethtool support for iavf */
+ 
++#include <linux/bitfield.h>
+ #include "iavf.h"
+ 
+ #define GTPU_PORT	2152
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+index 8c5f6096b0022..f998ecf743c46 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2013 - 2018 Intel Corporation. */
+ 
++#include <linux/bitfield.h>
+ #include <linux/prefetch.h>
+ 
+ #include "iavf.h"
+diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+index 45f3e351653db..72ca2199c9572 100644
+--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
++++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+@@ -592,8 +592,9 @@ struct ice_aqc_recipe_data_elem {
+ struct ice_aqc_recipe_to_profile {
+ 	__le16 profile_id;
+ 	u8 rsvd[6];
+-	DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES);
++	__le64 recipe_assoc;
+ };
++static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16);
+ 
+ /* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
+  */
+diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
+index 23e197c3d02a7..4e675c7c199fa 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lag.c
++++ b/drivers/net/ethernet/intel/ice/ice_lag.c
+@@ -2000,14 +2000,14 @@ int ice_init_lag(struct ice_pf *pf)
+ 	/* associate recipes to profiles */
+ 	for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
+ 		err = ice_aq_get_recipe_to_profile(&pf->hw, n,
+-						   (u8 *)&recipe_bits, NULL);
++						   &recipe_bits, NULL);
+ 		if (err)
+ 			continue;
+ 
+ 		if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) {
+ 			recipe_bits |= BIT(lag->pf_recipe);
+ 			ice_aq_map_recipe_to_profile(&pf->hw, n,
+-						     (u8 *)&recipe_bits, NULL);
++						     recipe_bits, NULL);
+ 		}
+ 	}
+ 
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index 7f4bc110ead44..2004120a58acd 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -3084,27 +3084,26 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
+ }
+ 
+ /**
+- * ice_vsi_realloc_stat_arrays - Frees unused stat structures
++ * ice_vsi_realloc_stat_arrays - Frees unused stat structures or alloc new ones
+  * @vsi: VSI pointer
+- * @prev_txq: Number of Tx rings before ring reallocation
+- * @prev_rxq: Number of Rx rings before ring reallocation
+  */
+-static void
+-ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
++static int
++ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi)
+ {
++	u16 req_txq = vsi->req_txq ? vsi->req_txq : vsi->alloc_txq;
++	u16 req_rxq = vsi->req_rxq ? vsi->req_rxq : vsi->alloc_rxq;
++	struct ice_ring_stats **tx_ring_stats;
++	struct ice_ring_stats **rx_ring_stats;
+ 	struct ice_vsi_stats *vsi_stat;
+ 	struct ice_pf *pf = vsi->back;
++	u16 prev_txq = vsi->alloc_txq;
++	u16 prev_rxq = vsi->alloc_rxq;
+ 	int i;
+ 
+-	if (!prev_txq || !prev_rxq)
+-		return;
+-	if (vsi->type == ICE_VSI_CHNL)
+-		return;
+-
+ 	vsi_stat = pf->vsi_stats[vsi->idx];
+ 
+-	if (vsi->num_txq < prev_txq) {
+-		for (i = vsi->num_txq; i < prev_txq; i++) {
++	if (req_txq < prev_txq) {
++		for (i = req_txq; i < prev_txq; i++) {
+ 			if (vsi_stat->tx_ring_stats[i]) {
+ 				kfree_rcu(vsi_stat->tx_ring_stats[i], rcu);
+ 				WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL);
+@@ -3112,14 +3111,36 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
+ 		}
+ 	}
+ 
+-	if (vsi->num_rxq < prev_rxq) {
+-		for (i = vsi->num_rxq; i < prev_rxq; i++) {
++	tx_ring_stats = vsi_stat->tx_ring_stats;
++	vsi_stat->tx_ring_stats =
++		krealloc_array(vsi_stat->tx_ring_stats, req_txq,
++			       sizeof(*vsi_stat->tx_ring_stats),
++			       GFP_KERNEL | __GFP_ZERO);
++	if (!vsi_stat->tx_ring_stats) {
++		vsi_stat->tx_ring_stats = tx_ring_stats;
++		return -ENOMEM;
++	}
++
++	if (req_rxq < prev_rxq) {
++		for (i = req_rxq; i < prev_rxq; i++) {
+ 			if (vsi_stat->rx_ring_stats[i]) {
+ 				kfree_rcu(vsi_stat->rx_ring_stats[i], rcu);
+ 				WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL);
+ 			}
+ 		}
+ 	}
++
++	rx_ring_stats = vsi_stat->rx_ring_stats;
++	vsi_stat->rx_ring_stats =
++		krealloc_array(vsi_stat->rx_ring_stats, req_rxq,
++			       sizeof(*vsi_stat->rx_ring_stats),
++			       GFP_KERNEL | __GFP_ZERO);
++	if (!vsi_stat->rx_ring_stats) {
++		vsi_stat->rx_ring_stats = rx_ring_stats;
++		return -ENOMEM;
++	}
++
++	return 0;
+ }
+ 
+ /**
+@@ -3136,9 +3157,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+ {
+ 	struct ice_vsi_cfg_params params = {};
+ 	struct ice_coalesce_stored *coalesce;
+-	int ret, prev_txq, prev_rxq;
+-	int prev_num_q_vectors = 0;
++	int prev_num_q_vectors;
+ 	struct ice_pf *pf;
++	int ret;
+ 
+ 	if (!vsi)
+ 		return -EINVAL;
+@@ -3150,6 +3171,15 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+ 	if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
+ 		return -EINVAL;
+ 
++	ret = ice_vsi_realloc_stat_arrays(vsi);
++	if (ret)
++		goto err_vsi_cfg;
++
++	ice_vsi_decfg(vsi);
++	ret = ice_vsi_cfg_def(vsi, &params);
++	if (ret)
++		goto err_vsi_cfg;
++
+ 	coalesce = kcalloc(vsi->num_q_vectors,
+ 			   sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+ 	if (!coalesce)
+@@ -3157,14 +3187,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+ 
+ 	prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
+ 
+-	prev_txq = vsi->num_txq;
+-	prev_rxq = vsi->num_rxq;
+-
+-	ice_vsi_decfg(vsi);
+-	ret = ice_vsi_cfg_def(vsi, &params);
+-	if (ret)
+-		goto err_vsi_cfg;
+-
+ 	ret = ice_vsi_cfg_tc_lan(pf, vsi);
+ 	if (ret) {
+ 		if (vsi_flags & ICE_VSI_FLAG_INIT) {
+@@ -3176,8 +3198,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+ 		return ice_schedule_reset(pf, ICE_RESET_PFR);
+ 	}
+ 
+-	ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq);
+-
+ 	ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
+ 	kfree(coalesce);
+ 
+@@ -3185,8 +3205,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+ 
+ err_vsi_cfg_tc_lan:
+ 	ice_vsi_decfg(vsi);
+-err_vsi_cfg:
+ 	kfree(coalesce);
++err_vsi_cfg:
+ 	return ret;
+ }
+ 
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
+index 2f77b684ff765..4c6d58bb2690d 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.c
++++ b/drivers/net/ethernet/intel/ice/ice_switch.c
+@@ -2032,12 +2032,12 @@ ice_update_recipe_lkup_idx(struct ice_hw *hw,
+  * ice_aq_map_recipe_to_profile - Map recipe to packet profile
+  * @hw: pointer to the HW struct
+  * @profile_id: package profile ID to associate the recipe with
+- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
++ * @r_assoc: Recipe bitmap filled in and need to be returned as response
+  * @cd: pointer to command details structure or NULL
+  * Recipe to profile association (0x0291)
+  */
+ int
+-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
++ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
+ 			     struct ice_sq_cd *cd)
+ {
+ 	struct ice_aqc_recipe_to_profile *cmd;
+@@ -2049,7 +2049,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ 	/* Set the recipe ID bit in the bitmask to let the device know which
+ 	 * profile we are associating the recipe to
+ 	 */
+-	memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc));
++	cmd->recipe_assoc = cpu_to_le64(r_assoc);
+ 
+ 	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ }
+@@ -2058,12 +2058,12 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+  * ice_aq_get_recipe_to_profile - Map recipe to packet profile
+  * @hw: pointer to the HW struct
+  * @profile_id: package profile ID to associate the recipe with
+- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
++ * @r_assoc: Recipe bitmap filled in and need to be returned as response
+  * @cd: pointer to command details structure or NULL
+  * Associate profile ID with given recipe (0x0293)
+  */
+ int
+-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
++ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
+ 			     struct ice_sq_cd *cd)
+ {
+ 	struct ice_aqc_recipe_to_profile *cmd;
+@@ -2076,7 +2076,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ 
+ 	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+ 	if (!status)
+-		memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc));
++		*r_assoc = le64_to_cpu(cmd->recipe_assoc);
+ 
+ 	return status;
+ }
+@@ -2121,6 +2121,7 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid)
+ static void ice_get_recp_to_prof_map(struct ice_hw *hw)
+ {
+ 	DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
++	u64 recp_assoc;
+ 	u16 i;
+ 
+ 	for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) {
+@@ -2128,8 +2129,9 @@ static void ice_get_recp_to_prof_map(struct ice_hw *hw)
+ 
+ 		bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES);
+ 		bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES);
+-		if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL))
++		if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL))
+ 			continue;
++		bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
+ 		bitmap_copy(profile_to_recipe[i], r_bitmap,
+ 			    ICE_MAX_NUM_RECIPES);
+ 		for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES)
+@@ -5431,22 +5433,24 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+ 	 */
+ 	list_for_each_entry(fvit, &rm->fv_list, list_entry) {
+ 		DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
++		u64 recp_assoc;
+ 		u16 j;
+ 
+ 		status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id,
+-						      (u8 *)r_bitmap, NULL);
++						      &recp_assoc, NULL);
+ 		if (status)
+ 			goto err_unroll;
+ 
++		bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
+ 		bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap,
+ 			  ICE_MAX_NUM_RECIPES);
+ 		status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+ 		if (status)
+ 			goto err_unroll;
+ 
++		bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES);
+ 		status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id,
+-						      (u8 *)r_bitmap,
+-						      NULL);
++						      recp_assoc, NULL);
+ 		ice_release_change_lock(hw);
+ 
+ 		if (status)
+diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
+index db7e501b7e0a4..89ffa1b51b5ad 100644
+--- a/drivers/net/ethernet/intel/ice/ice_switch.h
++++ b/drivers/net/ethernet/intel/ice/ice_switch.h
+@@ -424,10 +424,10 @@ int ice_aq_add_recipe(struct ice_hw *hw,
+ 		      struct ice_aqc_recipe_data_elem *s_recipe_list,
+ 		      u16 num_recipes, struct ice_sq_cd *cd);
+ int
+-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
++ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
+ 			     struct ice_sq_cd *cd);
+ int
+-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
++ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
+ 			     struct ice_sq_cd *cd);
+ 
+ #endif /* _ICE_SWITCH_H_ */
+diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+index 80dc4bcdd3a41..b3e1bdcb80f84 100644
+--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
++++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+@@ -26,24 +26,22 @@ static void ice_port_vlan_on(struct ice_vsi *vsi)
+ 	struct ice_vsi_vlan_ops *vlan_ops;
+ 	struct ice_pf *pf = vsi->back;
+ 
+-	if (ice_is_dvm_ena(&pf->hw)) {
+-		vlan_ops = &vsi->outer_vlan_ops;
+-
+-		/* setup outer VLAN ops */
+-		vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+-		vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
++	/* setup inner VLAN ops */
++	vlan_ops = &vsi->inner_vlan_ops;
+ 
+-		/* setup inner VLAN ops */
+-		vlan_ops = &vsi->inner_vlan_ops;
++	if (ice_is_dvm_ena(&pf->hw)) {
+ 		vlan_ops->add_vlan = noop_vlan_arg;
+ 		vlan_ops->del_vlan = noop_vlan_arg;
+ 		vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+ 		vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+ 		vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+ 		vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+-	} else {
+-		vlan_ops = &vsi->inner_vlan_ops;
+ 
++		/* setup outer VLAN ops */
++		vlan_ops = &vsi->outer_vlan_ops;
++		vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
++		vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
++	} else {
+ 		vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
+ 		vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
+ 	}
+diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
+index 8d6e44ee1895a..64dfc362d1dc4 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
++++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
+@@ -222,8 +222,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
+ 	}
+ 
+ 	/* set lan id */
+-	hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >>
+-			E1000_STATUS_FUNC_SHIFT;
++	hw->bus.func = FIELD_GET(E1000_STATUS_FUNC_MASK, rd32(E1000_STATUS));
+ 
+ 	/* Set phy->phy_addr and phy->id. */
+ 	ret_val = igb_get_phy_id_82575(hw);
+@@ -262,8 +261,8 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
+ 			if (ret_val)
+ 				goto out;
+ 
+-			data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >>
+-			       E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT;
++			data = FIELD_GET(E1000_M88E1112_MAC_CTRL_1_MODE_MASK,
++					 data);
+ 			if (data == E1000_M88E1112_AUTO_COPPER_SGMII ||
+ 			    data == E1000_M88E1112_AUTO_COPPER_BASEX)
+ 				hw->mac.ops.check_for_link =
+@@ -330,8 +329,7 @@ static s32 igb_init_nvm_params_82575(struct e1000_hw *hw)
+ 	u32 eecd = rd32(E1000_EECD);
+ 	u16 size;
+ 
+-	size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+-		     E1000_EECD_SIZE_EX_SHIFT);
++	size = FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd);
+ 
+ 	/* Added to a constant, "size" becomes the left-shift value
+ 	 * for setting word_size.
+@@ -2798,7 +2796,7 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw)
+ 		return 0;
+ 
+ 	hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
+-	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
++	if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg)
+ 	    != NVM_ETS_TYPE_EMC)
+ 		return E1000_NOT_IMPLEMENTED;
+ 
+@@ -2808,10 +2806,8 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw)
+ 
+ 	for (i = 1; i < num_sensors; i++) {
+ 		hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
+-		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+-				NVM_ETS_DATA_INDEX_SHIFT);
+-		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+-				   NVM_ETS_DATA_LOC_SHIFT);
++		sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor);
++		sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor);
+ 
+ 		if (sensor_location != 0)
+ 			hw->phy.ops.read_i2c_byte(hw,
+@@ -2859,20 +2855,17 @@ static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw)
+ 		return 0;
+ 
+ 	hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
+-	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
++	if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg)
+ 	    != NVM_ETS_TYPE_EMC)
+ 		return E1000_NOT_IMPLEMENTED;
+ 
+-	low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >>
+-			    NVM_ETS_LTHRES_DELTA_SHIFT);
++	low_thresh_delta = FIELD_GET(NVM_ETS_LTHRES_DELTA_MASK, ets_cfg);
+ 	num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
+ 
+ 	for (i = 1; i <= num_sensors; i++) {
+ 		hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
+-		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+-				NVM_ETS_DATA_INDEX_SHIFT);
+-		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+-				   NVM_ETS_DATA_LOC_SHIFT);
++		sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor);
++		sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor);
+ 		therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK;
+ 
+ 		hw->phy.ops.write_i2c_byte(hw,
+diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
+index b9b9d35494d27..503b239868e8e 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
++++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
+@@ -5,9 +5,9 @@
+  * e1000_i211
+  */
+ 
+-#include <linux/types.h>
++#include <linux/bitfield.h>
+ #include <linux/if_ether.h>
+-
++#include <linux/types.h>
+ #include "e1000_hw.h"
+ #include "e1000_i210.h"
+ 
+@@ -473,7 +473,7 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
+ 		/* Check if we have second version location used */
+ 		else if ((i == 1) &&
+ 			 ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) {
+-			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
++			version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record);
+ 			status = 0;
+ 			break;
+ 		}
+@@ -483,8 +483,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
+ 		else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) &&
+ 			 ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) &&
+ 			 (i != 1))) {
+-			version = (*next_record & E1000_INVM_VER_FIELD_TWO)
+-				  >> 13;
++			version = FIELD_GET(E1000_INVM_VER_FIELD_TWO,
++					    *next_record);
+ 			status = 0;
+ 			break;
+ 		}
+@@ -493,15 +493,15 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
+ 		 */
+ 		else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) &&
+ 			 ((*record & 0x3) == 0)) {
+-			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
++			version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record);
+ 			status = 0;
+ 			break;
+ 		}
+ 	}
+ 
+ 	if (!status) {
+-		invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK)
+-					>> E1000_INVM_MAJOR_SHIFT;
++		invm_ver->invm_major = FIELD_GET(E1000_INVM_MAJOR_MASK,
++						 version);
+ 		invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK;
+ 	}
+ 	/* Read Image Type */
+@@ -520,7 +520,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw,
+ 			 ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) ||
+ 			 ((((*record & 0x3) != 0) && (i != 1)))) {
+ 			invm_ver->invm_img_type =
+-				(*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23;
++				FIELD_GET(E1000_INVM_IMGTYPE_FIELD,
++					  *next_record);
+ 			status = 0;
+ 			break;
+ 		}
+diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
+index caf91c6f52b4d..ceaec2cf08a43 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
++++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
+@@ -56,7 +56,7 @@ s32 igb_get_bus_info_pcie(struct e1000_hw *hw)
+ 	}
+ 
+ 	reg = rd32(E1000_STATUS);
+-	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
++	bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c
+index fa136e6e93285..2dcd64d6dec31 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_nvm.c
++++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c
+@@ -1,9 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2007 - 2018 Intel Corporation. */
+ 
+-#include <linux/if_ether.h>
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+-
++#include <linux/if_ether.h>
+ #include "e1000_mac.h"
+ #include "e1000_nvm.h"
+ 
+@@ -708,10 +708,10 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
+ 		 */
+ 		if ((etrack_test &  NVM_MAJOR_MASK) != NVM_ETRACK_VALID) {
+ 			hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+-			fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+-					      >> NVM_MAJOR_SHIFT;
+-			fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK)
+-					      >> NVM_MINOR_SHIFT;
++			fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK,
++						       fw_version);
++			fw_vers->eep_minor = FIELD_GET(NVM_MINOR_MASK,
++						       fw_version);
+ 			fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK);
+ 			goto etrack_id;
+ 		}
+@@ -753,15 +753,13 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
+ 		return;
+ 	}
+ 	hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+-	fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+-			      >> NVM_MAJOR_SHIFT;
++	fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK, fw_version);
+ 
+ 	/* check for old style version format in newer images*/
+ 	if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) {
+ 		eeprom_verl = (fw_version & NVM_COMB_VER_MASK);
+ 	} else {
+-		eeprom_verl = (fw_version & NVM_MINOR_MASK)
+-				>> NVM_MINOR_SHIFT;
++		eeprom_verl = FIELD_GET(NVM_MINOR_MASK, fw_version);
+ 	}
+ 	/* Convert minor value to hex before assigning to output struct
+ 	 * Val to be converted will not be higher than 99, per tool output
+diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
+index a018000f7db92..bed94e50a6693 100644
+--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
++++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
+@@ -1,9 +1,9 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2007 - 2018 Intel Corporation. */
+ 
+-#include <linux/if_ether.h>
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+-
++#include <linux/if_ether.h>
+ #include "e1000_mac.h"
+ #include "e1000_phy.h"
+ 
+@@ -1682,8 +1682,7 @@ s32 igb_get_cable_length_m88(struct e1000_hw *hw)
+ 	if (ret_val)
+ 		goto out;
+ 
+-	index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+-		M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++	index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
+ 	if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) {
+ 		ret_val = -E1000_ERR_PHY;
+ 		goto out;
+@@ -1796,8 +1795,7 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
+ 		if (ret_val)
+ 			goto out;
+ 
+-		index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+-			M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++		index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data);
+ 		if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) {
+ 			ret_val = -E1000_ERR_PHY;
+ 			goto out;
+@@ -2578,8 +2576,7 @@ s32 igb_get_cable_length_82580(struct e1000_hw *hw)
+ 	if (ret_val)
+ 		goto out;
+ 
+-	length = (phy_data & I82580_DSTATUS_CABLE_LENGTH) >>
+-		 I82580_DSTATUS_CABLE_LENGTH_SHIFT;
++	length = FIELD_GET(I82580_DSTATUS_CABLE_LENGTH, phy_data);
+ 
+ 	if (length == E1000_CABLE_LENGTH_UNDEFINED)
+ 		ret_val = -E1000_ERR_PHY;
+diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
+index 4ee849985e2b8..92b2be06a6e93 100644
+--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
++++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
+@@ -2434,7 +2434,7 @@ static int igb_get_ts_info(struct net_device *dev,
+ 	}
+ }
+ 
+-#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
++#define ETHER_TYPE_FULL_MASK cpu_to_be16(FIELD_MAX(U16_MAX))
+ static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter,
+ 				     struct ethtool_rxnfc *cmd)
+ {
+@@ -2733,8 +2733,8 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter,
+ 	u32 vlapqf;
+ 
+ 	vlapqf = rd32(E1000_VLAPQF);
+-	vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK)
+-				>> VLAN_PRIO_SHIFT;
++	vlan_priority = FIELD_GET(VLAN_PRIO_MASK,
++				  ntohs(input->filter.vlan_tci));
+ 	queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK;
+ 
+ 	/* check whether this vlan prio is already set */
+@@ -2817,7 +2817,7 @@ static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter,
+ 	u8 vlan_priority;
+ 	u32 vlapqf;
+ 
+-	vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
++	vlan_priority = FIELD_GET(VLAN_PRIO_MASK, vlan_tci);
+ 
+ 	vlapqf = rd32(E1000_VLAPQF);
+ 	vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority);
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index 11921141b6079..4431e7693d45f 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -7283,7 +7283,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+ static int igb_set_vf_multicasts(struct igb_adapter *adapter,
+ 				  u32 *msgbuf, u32 vf)
+ {
+-	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
++	int n = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]);
+ 	u16 *hash_list = (u16 *)&msgbuf[1];
+ 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+ 	int i;
+@@ -7543,7 +7543,7 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf,
+ 
+ static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+ {
+-	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
++	int add = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]);
+ 	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
+ 	int ret;
+ 
+diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c
+index a3cd7ac48d4b6..d15282ee5ea8f 100644
+--- a/drivers/net/ethernet/intel/igbvf/mbx.c
++++ b/drivers/net/ethernet/intel/igbvf/mbx.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright(c) 2009 - 2018 Intel Corporation. */
+ 
++#include <linux/bitfield.h>
+ #include "mbx.h"
+ 
+ /**
+diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
+index 7ff2752dd763a..c5012fa36af2f 100644
+--- a/drivers/net/ethernet/intel/igbvf/netdev.c
++++ b/drivers/net/ethernet/intel/igbvf/netdev.c
+@@ -3,25 +3,25 @@
+ 
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ 
+-#include <linux/module.h>
+-#include <linux/types.h>
+-#include <linux/init.h>
+-#include <linux/pci.h>
+-#include <linux/vmalloc.h>
+-#include <linux/pagemap.h>
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+-#include <linux/netdevice.h>
+-#include <linux/tcp.h>
+-#include <linux/ipv6.h>
+-#include <linux/slab.h>
+-#include <net/checksum.h>
+-#include <net/ip6_checksum.h>
+-#include <linux/mii.h>
+ #include <linux/ethtool.h>
+ #include <linux/if_vlan.h>
++#include <linux/init.h>
++#include <linux/ipv6.h>
++#include <linux/mii.h>
++#include <linux/module.h>
++#include <linux/netdevice.h>
++#include <linux/pagemap.h>
++#include <linux/pci.h>
+ #include <linux/prefetch.h>
+ #include <linux/sctp.h>
+-
++#include <linux/slab.h>
++#include <linux/tcp.h>
++#include <linux/types.h>
++#include <linux/vmalloc.h>
++#include <net/checksum.h>
++#include <net/ip6_checksum.h>
+ #include "igbvf.h"
+ 
+ char igbvf_driver_name[] = "igbvf";
+@@ -273,9 +273,8 @@ static bool igbvf_clean_rx_irq(struct igbvf_adapter *adapter,
+ 		 * that case, it fills the header buffer and spills the rest
+ 		 * into the page.
+ 		 */
+-		hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info)
+-		       & E1000_RXDADV_HDRBUFLEN_MASK) >>
+-		       E1000_RXDADV_HDRBUFLEN_SHIFT;
++		hlen = le16_get_bits(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info,
++				     E1000_RXDADV_HDRBUFLEN_MASK);
+ 		if (hlen > adapter->rx_ps_hdr_size)
+ 			hlen = adapter->rx_ps_hdr_size;
+ 
+diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
+index 17546a035ab19..d2562c8e8015e 100644
+--- a/drivers/net/ethernet/intel/igc/igc_i225.c
++++ b/drivers/net/ethernet/intel/igc/igc_i225.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright (c)  2018 Intel Corporation */
+ 
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+ 
+ #include "igc_hw.h"
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index fc1de116d5548..e83700ad7e622 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -1640,10 +1640,6 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+ 
+ 	if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
+ 		     skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+-		/* FIXME: add support for retrieving timestamps from
+-		 * the other timer registers before skipping the
+-		 * timestamping request.
+-		 */
+ 		unsigned long flags;
+ 		u32 tstamp_flags;
+ 
+diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
+index 53b77c969c857..d0d9e7170154c 100644
+--- a/drivers/net/ethernet/intel/igc/igc_phy.c
++++ b/drivers/net/ethernet/intel/igc/igc_phy.c
+@@ -1,6 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /* Copyright (c)  2018 Intel Corporation */
+ 
++#include <linux/bitfield.h>
+ #include "igc_phy.h"
+ 
+ /**
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+index b2a0f2aaa05be..2e6e0365154a1 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+@@ -684,7 +684,7 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
+ 	u32 reg;
+ 
+ 	reg = IXGBE_READ_REG(hw, IXGBE_STATUS);
+-	bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT;
++	bus->func = FIELD_GET(IXGBE_STATUS_LAN_ID, reg);
+ 	bus->lan_id = bus->func;
+ 
+ 	/* check for a port swap */
+@@ -695,8 +695,8 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
+ 	/* Get MAC instance from EEPROM for configuring CS4227 */
+ 	if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP) {
+ 		hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_4, &ee_ctrl_4);
+-		bus->instance_id = (ee_ctrl_4 & IXGBE_EE_CTRL_4_INST_ID) >>
+-				   IXGBE_EE_CTRL_4_INST_ID_SHIFT;
++		bus->instance_id = FIELD_GET(IXGBE_EE_CTRL_4_INST_ID,
++					     ee_ctrl_4);
+ 	}
+ }
+ 
+@@ -870,10 +870,9 @@ s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
+ 			 * SPI EEPROM is assumed here.  This code would need to
+ 			 * change if a future EEPROM is not SPI.
+ 			 */
+-			eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+-					    IXGBE_EEC_SIZE_SHIFT);
++			eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec);
+ 			eeprom->word_size = BIT(eeprom_size +
+-						 IXGBE_EEPROM_WORD_SIZE_SHIFT);
++						IXGBE_EEPROM_WORD_SIZE_SHIFT);
+ 		}
+ 
+ 		if (eec & IXGBE_EEC_ADDR_SIZE)
+@@ -3935,10 +3934,10 @@ s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
+ 		if (status)
+ 			return status;
+ 
+-		sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+-				IXGBE_ETS_DATA_INDEX_SHIFT);
+-		sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+-				   IXGBE_ETS_DATA_LOC_SHIFT);
++		sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK,
++					 ets_sensor);
++		sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK,
++					    ets_sensor);
+ 
+ 		if (sensor_location != 0) {
+ 			status = hw->phy.ops.read_i2c_byte(hw,
+@@ -3982,8 +3981,7 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
+ 	if (status)
+ 		return status;
+ 
+-	low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >>
+-			     IXGBE_ETS_LTHRES_DELTA_SHIFT);
++	low_thresh_delta = FIELD_GET(IXGBE_ETS_LTHRES_DELTA_MASK, ets_cfg);
+ 	num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
+ 	if (num_sensors > IXGBE_MAX_SENSORS)
+ 		num_sensors = IXGBE_MAX_SENSORS;
+@@ -3997,10 +3995,10 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
+ 			       ets_offset + 1 + i);
+ 			continue;
+ 		}
+-		sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+-				IXGBE_ETS_DATA_INDEX_SHIFT);
+-		sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+-				   IXGBE_ETS_DATA_LOC_SHIFT);
++		sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK,
++					 ets_sensor);
++		sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK,
++					    ets_sensor);
+ 		therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK;
+ 
+ 		hw->phy.ops.write_i2c_byte(hw,
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+index 13a6fca31004a..866024f2b9eeb 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+@@ -914,7 +914,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+ 		goto err_out;
+ 	}
+ 
+-	xs = kzalloc(sizeof(*xs), GFP_KERNEL);
++	algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
++	if (unlikely(!algo)) {
++		err = -ENOENT;
++		goto err_out;
++	}
++
++	xs = kzalloc(sizeof(*xs), GFP_ATOMIC);
+ 	if (unlikely(!xs)) {
+ 		err = -ENOMEM;
+ 		goto err_out;
+@@ -930,14 +936,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+ 		memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4));
+ 	xs->xso.dev = adapter->netdev;
+ 
+-	algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
+-	if (unlikely(!algo)) {
+-		err = -ENOENT;
+-		goto err_xs;
+-	}
+-
+ 	aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8;
+-	xs->aead = kzalloc(aead_len, GFP_KERNEL);
++	xs->aead = kzalloc(aead_len, GFP_ATOMIC);
+ 	if (unlikely(!xs->aead)) {
+ 		err = -ENOMEM;
+ 		goto err_xs;
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+index cb23aad5953b0..f245f3df40fca 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+@@ -11409,7 +11409,7 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev,
+ 	if ((pf_func & 1) == (pdev->devfn & 1)) {
+ 		unsigned int device_id;
+ 
+-		vf = (req_id & 0x7F) >> 1;
++		vf = FIELD_GET(0x7F, req_id);
+ 		e_dev_err("VF %d has caused a PCIe error\n", vf);
+ 		e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
+ 				"%8.8x\tdw3: %8.8x\n",
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+index 930dc50719364..f28140a05f091 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+@@ -276,9 +276,8 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
+ 		return 0;
+ 
+ 	if (hw->phy.nw_mng_if_sel) {
+-		phy_addr = (hw->phy.nw_mng_if_sel &
+-			    IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+-			   IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
++		phy_addr = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD,
++				     hw->phy.nw_mng_if_sel);
+ 		if (ixgbe_probe_phy(hw, phy_addr))
+ 			return 0;
+ 		else
+@@ -1447,8 +1446,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw)
+ 		ret_val = hw->eeprom.ops.read(hw, data_offset, &eword);
+ 		if (ret_val)
+ 			goto err_eeprom;
+-		control = (eword & IXGBE_CONTROL_MASK_NL) >>
+-			   IXGBE_CONTROL_SHIFT_NL;
++		control = FIELD_GET(IXGBE_CONTROL_MASK_NL, eword);
+ 		edata = eword & IXGBE_DATA_MASK_NL;
+ 		switch (control) {
+ 		case IXGBE_DELAY_NL:
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+index 198ab9d97618c..d0a6c220a12ac 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+@@ -363,8 +363,7 @@ int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
+ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
+ 				   u32 *msgbuf, u32 vf)
+ {
+-	int entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK)
+-		       >> IXGBE_VT_MSGINFO_SHIFT;
++	int entries = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
+ 	u16 *hash_list = (u16 *)&msgbuf[1];
+ 	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+ 	struct ixgbe_hw *hw = &adapter->hw;
+@@ -971,7 +970,7 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter,
+ static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter,
+ 				 u32 *msgbuf, u32 vf)
+ {
+-	u32 add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT;
++	u32 add = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
+ 	u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK);
+ 	u8 tcs = adapter->hw_tcs;
+ 
+@@ -994,8 +993,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
+ 				    u32 *msgbuf, u32 vf)
+ {
+ 	u8 *new_mac = ((u8 *)(&msgbuf[1]));
+-	int index = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
+-		    IXGBE_VT_MSGINFO_SHIFT;
++	int index = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]);
+ 	int err;
+ 
+ 	if (adapter->vfinfo[vf].pf_set_mac && !adapter->vfinfo[vf].trusted &&
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+index 15325c549d9b5..57a912e4653fc 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+@@ -187,16 +187,16 @@ s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw)
+ s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
+ {
+ 	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+-	u32 eec;
+-	u16 eeprom_size;
+ 
+ 	if (eeprom->type == ixgbe_eeprom_uninitialized) {
++		u16 eeprom_size;
++		u32 eec;
++
+ 		eeprom->semaphore_delay = 10;
+ 		eeprom->type = ixgbe_flash;
+ 
+ 		eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+-		eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+-				    IXGBE_EEC_SIZE_SHIFT);
++		eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec);
+ 		eeprom->word_size = BIT(eeprom_size +
+ 					IXGBE_EEPROM_WORD_SIZE_SHIFT);
+ 
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+index cdc912bba8089..c1adc94a5a657 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+@@ -630,16 +630,16 @@ static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw)
+ static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw)
+ {
+ 	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+-	u32 eec;
+-	u16 eeprom_size;
+ 
+ 	if (eeprom->type == ixgbe_eeprom_uninitialized) {
++		u16 eeprom_size;
++		u32 eec;
++
+ 		eeprom->semaphore_delay = 10;
+ 		eeprom->type = ixgbe_flash;
+ 
+ 		eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+-		eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+-				    IXGBE_EEC_SIZE_SHIFT);
++		eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec);
+ 		eeprom->word_size = BIT(eeprom_size +
+ 					IXGBE_EEPROM_WORD_SIZE_SHIFT);
+ 
+@@ -714,8 +714,7 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+ 	ret = ixgbe_iosf_wait(hw, &command);
+ 
+ 	if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
+-		error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
+-			 IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
++		error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command);
+ 		hw_dbg(hw, "Failed to read, error %x\n", error);
+ 		ret = -EIO;
+ 		goto out;
+@@ -1415,8 +1414,7 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+ 	ret = ixgbe_iosf_wait(hw, &command);
+ 
+ 	if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
+-		error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
+-			 IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
++		error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command);
+ 		hw_dbg(hw, "Failed to write, error %x\n", error);
+ 		return -EIO;
+ 	}
+@@ -3229,9 +3227,8 @@ static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw)
+ 	 */
+ 	if (hw->mac.type == ixgbe_mac_x550em_a &&
+ 	    hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) {
+-		hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel &
+-				      IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+-				     IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
++		hw->phy.mdio.prtad = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD,
++					       hw->phy.nw_mng_if_sel);
+ 	}
+ }
+ 
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+index 6c18d3d2442eb..2539c985f695a 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+@@ -808,6 +808,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id,
+ 	if (!is_lmac_valid(cgx, lmac_id))
+ 		return -ENODEV;
+ 
++	cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
++	cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
++	cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0;
++	cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
++
+ 	cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
+ 	cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
+ 	cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0;
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+index 9181ac5de912a..19075f217d00c 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+@@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
+ 			continue;
+ 		lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
+ 		for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
++			if (iter >= MAX_LMAC_COUNT)
++				continue;
+ 			lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu),
+ 					      iter);
+ 			rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+index 55639c133dd02..91a4ea529d077 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+@@ -1669,7 +1669,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz,
+ 	struct npc_coalesced_kpu_prfl *img_data = NULL;
+ 	int i = 0, rc = -EINVAL;
+ 	void __iomem *kpu_prfl_addr;
+-	u16 offset;
++	u32 offset;
+ 
+ 	img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr;
+ 	if (le64_to_cpu(img_data->signature) == KPU_SIGN &&
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index b40bd0e467514..3f46d5e0fb2ec 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1933,7 +1933,7 @@ int otx2_open(struct net_device *netdev)
+ 	 * mcam entries are enabled to receive the packets. Hence disable the
+ 	 * packet I/O.
+ 	 */
+-	if (err == EIO)
++	if (err == -EIO)
+ 		goto err_disable_rxtx;
+ 	else if (err)
+ 		goto err_tx_stop_queues;
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+index aaf1faed4133e..7bb92e2dacda6 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+@@ -14,6 +14,7 @@
+ #include <linux/module.h>
+ #include <linux/phy.h>
+ #include <linux/platform_device.h>
++#include <linux/rtnetlink.h>
+ #include <linux/skbuff.h>
+ 
+ #include "mlxbf_gige.h"
+@@ -139,13 +140,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
+ 	control |= MLXBF_GIGE_CONTROL_PORT_EN;
+ 	writeq(control, priv->base + MLXBF_GIGE_CONTROL);
+ 
+-	err = mlxbf_gige_request_irqs(priv);
+-	if (err)
+-		return err;
+ 	mlxbf_gige_cache_stats(priv);
+ 	err = mlxbf_gige_clean_port(priv);
+ 	if (err)
+-		goto free_irqs;
++		return err;
+ 
+ 	/* Clear driver's valid_polarity to match hardware,
+ 	 * since the above call to clean_port() resets the
+@@ -157,7 +155,7 @@ static int mlxbf_gige_open(struct net_device *netdev)
+ 
+ 	err = mlxbf_gige_tx_init(priv);
+ 	if (err)
+-		goto free_irqs;
++		goto phy_deinit;
+ 	err = mlxbf_gige_rx_init(priv);
+ 	if (err)
+ 		goto tx_deinit;
+@@ -166,6 +164,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
+ 	napi_enable(&priv->napi);
+ 	netif_start_queue(netdev);
+ 
++	err = mlxbf_gige_request_irqs(priv);
++	if (err)
++		goto napi_deinit;
++
+ 	/* Set bits in INT_EN that we care about */
+ 	int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
+ 		 MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
+@@ -182,11 +184,17 @@ static int mlxbf_gige_open(struct net_device *netdev)
+ 
+ 	return 0;
+ 
++napi_deinit:
++	netif_stop_queue(netdev);
++	napi_disable(&priv->napi);
++	netif_napi_del(&priv->napi);
++	mlxbf_gige_rx_deinit(priv);
++
+ tx_deinit:
+ 	mlxbf_gige_tx_deinit(priv);
+ 
+-free_irqs:
+-	mlxbf_gige_free_irqs(priv);
++phy_deinit:
++	phy_stop(phydev);
+ 	return err;
+ }
+ 
+@@ -487,8 +495,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev)
+ {
+ 	struct mlxbf_gige *priv = platform_get_drvdata(pdev);
+ 
+-	writeq(0, priv->base + MLXBF_GIGE_INT_EN);
+-	mlxbf_gige_clean_port(priv);
++	rtnl_lock();
++	netif_device_detach(priv->netdev);
++
++	if (netif_running(priv->netdev))
++		dev_close(priv->netdev);
++
++	rtnl_unlock();
+ }
+ 
+ static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
+diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
+index c81cdeb4d4e7e..0b6174748d2b4 100644
+--- a/drivers/net/ethernet/microchip/lan743x_main.c
++++ b/drivers/net/ethernet/microchip/lan743x_main.c
+@@ -25,6 +25,8 @@
+ #define PCS_POWER_STATE_DOWN	0x6
+ #define PCS_POWER_STATE_UP	0x4
+ 
++#define RFE_RD_FIFO_TH_3_DWORDS	0x3
++
+ static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter)
+ {
+ 	u32 chip_rev;
+@@ -3223,6 +3225,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
+ 	lan743x_pci_cleanup(adapter);
+ }
+ 
++static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter)
++{
++	u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_;
++
++	if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) {
++		u32 misc_ctl;
++
++		misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0);
++		misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_;
++		misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_,
++				       RFE_RD_FIFO_TH_3_DWORDS);
++		lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl);
++	}
++}
++
+ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+ 				 struct pci_dev *pdev)
+ {
+@@ -3238,6 +3255,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+ 		pci11x1x_strap_get_status(adapter);
+ 		spin_lock_init(&adapter->eth_syslock_spinlock);
+ 		mutex_init(&adapter->sgmii_rw_lock);
++		pci11x1x_set_rfe_rd_fifo_threshold(adapter);
+ 	} else {
+ 		adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS;
+ 		adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS;
+diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
+index 52609fc13ad95..f0b486f85450e 100644
+--- a/drivers/net/ethernet/microchip/lan743x_main.h
++++ b/drivers/net/ethernet/microchip/lan743x_main.h
+@@ -26,6 +26,7 @@
+ #define ID_REV_CHIP_REV_MASK_		(0x0000FFFF)
+ #define ID_REV_CHIP_REV_A0_		(0x00000000)
+ #define ID_REV_CHIP_REV_B0_		(0x00000010)
++#define ID_REV_CHIP_REV_PCI11X1X_B0_	(0x000000B0)
+ 
+ #define FPGA_REV			(0x04)
+ #define FPGA_REV_GET_MINOR_(fpga_rev)	(((fpga_rev) >> 8) & 0x000000FF)
+@@ -311,6 +312,9 @@
+ #define SGMII_CTL_LINK_STATUS_SOURCE_	BIT(8)
+ #define SGMII_CTL_SGMII_POWER_DN_	BIT(1)
+ 
++#define MISC_CTL_0			(0x920)
++#define MISC_CTL_0_RFE_READ_FIFO_MASK_	GENMASK(6, 4)
++
+ /* Vendor Specific SGMII MMD details */
+ #define SR_VSMMD_PCS_ID1		0x0004
+ #define SR_VSMMD_PCS_ID2		0x0005
+diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
+index 48ea4aeeea5d4..e443d69e39511 100644
+--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
+@@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
+ 
+ 	*alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
+ 
+-	*datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN);
++	*datasize = mtu + ETH_HLEN;
+ }
+ 
+ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
+diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
+index 81fd31f6fac46..e6f1da66c4500 100644
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -1201,17 +1201,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
+ 	RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
+ }
+ 
++static void rtl_dash_loop_wait(struct rtl8169_private *tp,
++			       const struct rtl_cond *c,
++			       unsigned long usecs, int n, bool high)
++{
++	if (!tp->dash_enabled)
++		return;
++	rtl_loop_wait(tp, c, usecs, n, high);
++}
++
++static void rtl_dash_loop_wait_high(struct rtl8169_private *tp,
++				    const struct rtl_cond *c,
++				    unsigned long d, int n)
++{
++	rtl_dash_loop_wait(tp, c, d, n, true);
++}
++
++static void rtl_dash_loop_wait_low(struct rtl8169_private *tp,
++				   const struct rtl_cond *c,
++				   unsigned long d, int n)
++{
++	rtl_dash_loop_wait(tp, c, d, n, false);
++}
++
+ static void rtl8168dp_driver_start(struct rtl8169_private *tp)
+ {
+ 	r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
+-	rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
++	rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ }
+ 
+ static void rtl8168ep_driver_start(struct rtl8169_private *tp)
+ {
+ 	r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
+ 	r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
+-	rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
++	rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
+ }
+ 
+ static void rtl8168_driver_start(struct rtl8169_private *tp)
+@@ -1225,7 +1248,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp)
+ static void rtl8168dp_driver_stop(struct rtl8169_private *tp)
+ {
+ 	r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP);
+-	rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
++	rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ }
+ 
+ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
+@@ -1233,7 +1256,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
+ 	rtl8168ep_stop_cmac(tp);
+ 	r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
+ 	r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
+-	rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
++	rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
+ }
+ 
+ static void rtl8168_driver_stop(struct rtl8169_private *tp)
+@@ -5055,6 +5078,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
+ 	struct mii_bus *new_bus;
+ 	int ret;
+ 
++	/* On some boards with this chip version the BIOS is buggy and misses
++	 * to reset the PHY page selector. This results in the PHY ID read
++	 * accessing registers on a different page, returning a more or
++	 * less random value. Fix this by resetting the page selector first.
++	 */
++	if (tp->mac_version == RTL_GIGA_MAC_VER_25 ||
++	    tp->mac_version == RTL_GIGA_MAC_VER_26)
++		r8169_mdio_write(tp, 0x1f, 0);
++
+ 	new_bus = devm_mdiobus_alloc(&pdev->dev);
+ 	if (!new_bus)
+ 		return -ENOMEM;
+diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
+index 8fec0dbbbe7bb..c6897e6ea362d 100644
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -1288,25 +1288,16 @@ static int ravb_poll(struct napi_struct *napi, int budget)
+ 	struct net_device *ndev = napi->dev;
+ 	struct ravb_private *priv = netdev_priv(ndev);
+ 	const struct ravb_hw_info *info = priv->info;
+-	bool gptp = info->gptp || info->ccc_gac;
+-	struct ravb_rx_desc *desc;
+ 	unsigned long flags;
+ 	int q = napi - priv->napi;
+ 	int mask = BIT(q);
+ 	int quota = budget;
+-	unsigned int entry;
++	bool unmask;
+ 
+-	if (!gptp) {
+-		entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+-		desc = &priv->gbeth_rx_ring[entry];
+-	}
+ 	/* Processing RX Descriptor Ring */
+ 	/* Clear RX interrupt */
+ 	ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
+-	if (gptp || desc->die_dt != DT_FEMPTY) {
+-		if (ravb_rx(ndev, &quota, q))
+-			goto out;
+-	}
++	unmask = !ravb_rx(ndev, &quota, q);
+ 
+ 	/* Processing TX Descriptor Ring */
+ 	spin_lock_irqsave(&priv->lock, flags);
+@@ -1316,6 +1307,18 @@ static int ravb_poll(struct napi_struct *napi, int budget)
+ 	netif_wake_subqueue(ndev, q);
+ 	spin_unlock_irqrestore(&priv->lock, flags);
+ 
++	/* Receive error message handling */
++	priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
++	if (info->nc_queues)
++		priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
++	if (priv->rx_over_errors != ndev->stats.rx_over_errors)
++		ndev->stats.rx_over_errors = priv->rx_over_errors;
++	if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
++		ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
++
++	if (!unmask)
++		goto out;
++
+ 	napi_complete(napi);
+ 
+ 	/* Re-enable RX/TX interrupts */
+@@ -1329,14 +1332,6 @@ static int ravb_poll(struct napi_struct *napi, int budget)
+ 	}
+ 	spin_unlock_irqrestore(&priv->lock, flags);
+ 
+-	/* Receive error message handling */
+-	priv->rx_over_errors =  priv->stats[RAVB_BE].rx_over_errors;
+-	if (info->nc_queues)
+-		priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
+-	if (priv->rx_over_errors != ndev->stats.rx_over_errors)
+-		ndev->stats.rx_over_errors = priv->rx_over_errors;
+-	if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
+-		ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
+ out:
+ 	return budget - quota;
+ }
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+index c6ff1fa0e04d8..683c34e609638 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+@@ -92,19 +92,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw,
+ 				     u32 prio, u32 queue)
+ {
+ 	void __iomem *ioaddr = hw->pcsr;
+-	u32 base_register;
+-	u32 value;
++	u32 clear_mask = 0;
++	u32 ctrl2, ctrl3;
++	int i;
+ 
+-	base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
+-	if (queue >= 4)
+-		queue -= 4;
++	ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2);
++	ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3);
+ 
+-	value = readl(ioaddr + base_register);
++	/* The software must ensure that the same priority
++	 * is not mapped to multiple Rx queues
++	 */
++	for (i = 0; i < 4; i++)
++		clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) &
++						GMAC_RXQCTRL_PSRQX_MASK(i));
++
++	ctrl2 &= ~clear_mask;
++	ctrl3 &= ~clear_mask;
++
++	/* First assign new priorities to a queue, then
++	 * clear them from others queues
++	 */
++	if (queue < 4) {
++		ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
++						GMAC_RXQCTRL_PSRQX_MASK(queue);
+ 
+-	value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue);
+-	value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
++		writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
++		writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
++	} else {
++		queue -= 4;
++
++		ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ 						GMAC_RXQCTRL_PSRQX_MASK(queue);
+-	writel(value, ioaddr + base_register);
++
++		writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
++		writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
++	}
+ }
+ 
+ static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+index b5509f244ecd1..24c53b7255a2e 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+@@ -105,17 +105,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio,
+ 				   u32 queue)
+ {
+ 	void __iomem *ioaddr = hw->pcsr;
+-	u32 value, reg;
++	u32 clear_mask = 0;
++	u32 ctrl2, ctrl3;
++	int i;
+ 
+-	reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3;
+-	if (queue >= 4)
++	ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2);
++	ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3);
++
++	/* The software must ensure that the same priority
++	 * is not mapped to multiple Rx queues
++	 */
++	for (i = 0; i < 4; i++)
++		clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) &
++						XGMAC_PSRQ(i));
++
++	ctrl2 &= ~clear_mask;
++	ctrl3 &= ~clear_mask;
++
++	/* First assign new priorities to a queue, then
++	 * clear them from others queues
++	 */
++	if (queue < 4) {
++		ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
++						XGMAC_PSRQ(queue);
++
++		writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
++		writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
++	} else {
+ 		queue -= 4;
+ 
+-	value = readl(ioaddr + reg);
+-	value &= ~XGMAC_PSRQ(queue);
+-	value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue);
++		ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
++						XGMAC_PSRQ(queue);
+ 
+-	writel(value, ioaddr + reg);
++		writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
++		writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
++	}
+ }
+ 
+ static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio,
+diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+index e457ac9ae6d88..ad5c213dac077 100644
+--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
++++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+@@ -20,6 +20,8 @@
+ #include "txgbe_phy.h"
+ #include "txgbe_hw.h"
+ 
++#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw"
++
+ static int txgbe_swnodes_register(struct txgbe *txgbe)
+ {
+ 	struct txgbe_nodes *nodes = &txgbe->nodes;
+@@ -551,8 +553,8 @@ static int txgbe_clock_register(struct txgbe *txgbe)
+ 	char clk_name[32];
+ 	struct clk *clk;
+ 
+-	snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d",
+-		 pci_dev_id(pdev));
++	snprintf(clk_name, sizeof(clk_name), "%s.%d",
++		 TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev));
+ 
+ 	clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000);
+ 	if (IS_ERR(clk))
+@@ -614,7 +616,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe)
+ 
+ 	info.parent = &pdev->dev;
+ 	info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]);
+-	info.name = "i2c_designware";
++	info.name = TXGBE_I2C_CLK_DEV_NAME;
+ 	info.id = pci_dev_id(pdev);
+ 
+ 	info.res = &DEFINE_RES_IRQ(pdev->irq);
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index f81c4bcd85a2a..cbd98ea4a84af 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -2388,6 +2388,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+ 	struct hwtstamp_config config;
+ 	int txcfg = 0, rxcfg = 0;
+ 	int pkt_ts_enable;
++	int tx_mod;
+ 
+ 	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ 		return -EFAULT;
+@@ -2437,9 +2438,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+ 	lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable);
+ 	lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable);
+ 
+-	if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC)
++	tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD);
++	if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) {
+ 		lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
+-				      PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
++				      tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
++	} else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) {
++		lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
++				      tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
++	}
+ 
+ 	if (config.rx_filter != HWTSTAMP_FILTER_NONE)
+ 		lan8814_config_ts_intr(ptp_priv->phydev, true);
+@@ -2497,7 +2503,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts,
+ 	}
+ }
+ 
+-static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
++static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
+ {
+ 	struct ptp_header *ptp_header;
+ 	u32 type;
+@@ -2507,7 +2513,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
+ 	ptp_header = ptp_parse_header(skb, type);
+ 	skb_pull_inline(skb, ETH_HLEN);
+ 
++	if (!ptp_header)
++		return false;
++
+ 	*sig = (__force u16)(ntohs(ptp_header->sequence_id));
++	return true;
+ }
+ 
+ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
+@@ -2519,7 +2529,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
+ 	bool ret = false;
+ 	u16 skb_sig;
+ 
+-	lan8814_get_sig_rx(skb, &skb_sig);
++	if (!lan8814_get_sig_rx(skb, &skb_sig))
++		return ret;
+ 
+ 	/* Iterate over all RX timestamps and match it with the received skbs */
+ 	spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags);
+@@ -2799,7 +2810,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm)
+ 	return 0;
+ }
+ 
+-static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
++static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
+ {
+ 	struct ptp_header *ptp_header;
+ 	u32 type;
+@@ -2807,7 +2818,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
+ 	type = ptp_classify_raw(skb);
+ 	ptp_header = ptp_parse_header(skb, type);
+ 
++	if (!ptp_header)
++		return false;
++
+ 	*sig = (__force u16)(ntohs(ptp_header->sequence_id));
++	return true;
+ }
+ 
+ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
+@@ -2821,7 +2836,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
+ 
+ 	spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags);
+ 	skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) {
+-		lan8814_get_sig_tx(skb, &skb_sig);
++		if (!lan8814_get_sig_tx(skb, &skb_sig))
++			continue;
+ 
+ 		if (memcmp(&skb_sig, &seq_id, sizeof(seq_id)))
+ 			continue;
+@@ -2875,7 +2891,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv,
+ 
+ 	spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags);
+ 	skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) {
+-		lan8814_get_sig_rx(skb, &skb_sig);
++		if (!lan8814_get_sig_rx(skb, &skb_sig))
++			continue;
+ 
+ 		if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id)))
+ 			continue;
+diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
+index d837c18874161..e0e9b4c53cb02 100644
+--- a/drivers/net/usb/ax88179_178a.c
++++ b/drivers/net/usb/ax88179_178a.c
+@@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev)
+ 
+ 	if (is_valid_ether_addr(mac)) {
+ 		eth_hw_addr_set(dev->net, mac);
++		if (!is_local_ether_addr(mac))
++			dev->net->addr_assign_type = NET_ADDR_PERM;
+ 	} else {
+ 		netdev_info(dev->net, "invalid MAC address, using random\n");
+ 		eth_hw_addr_random(dev->net);
+diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+index 168eda2132fb8..9dcc1506bd0b0 100644
+--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
++++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+@@ -278,7 +278,7 @@ static inline void iwl_free_rxb(struct iwl_rx_cmd_buffer *r)
+ #define IWL_MGMT_TID		15
+ #define IWL_FRAME_LIMIT	64
+ #define IWL_MAX_RX_HW_QUEUES	16
+-#define IWL_9000_MAX_RX_HW_QUEUES	6
++#define IWL_9000_MAX_RX_HW_QUEUES	1
+ 
+ /**
+  * enum iwl_wowlan_status - WoWLAN image/device status
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+index aaa9840d0d4c5..ee9d14250a261 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+@@ -352,7 +352,9 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
+ 		ieee80211_hw_set(hw, HAS_RATE_CONTROL);
+ 	}
+ 
+-	if (iwl_mvm_has_new_rx_api(mvm))
++	/* We want to use the mac80211's reorder buffer for 9000 */
++	if (iwl_mvm_has_new_rx_api(mvm) &&
++	    mvm->trans->trans_cfg->device_family > IWL_DEVICE_FAMILY_9000)
+ 		ieee80211_hw_set(hw, SUPPORTS_REORDERING_BUFFER);
+ 
+ 	if (fw_has_capa(&mvm->fw->ucode_capa,
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
+index 2ecd32bed752f..045c862a8fc4f 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
+@@ -132,14 +132,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm)
+ 	if (ret)
+ 		return ERR_PTR(ret);
+ 
+-	if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size))
++	if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) !=
++			 resp_size)) {
++		iwl_free_resp(&cmd);
+ 		return ERR_PTR(-EIO);
++	}
+ 
+ 	resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL);
++	iwl_free_resp(&cmd);
++
+ 	if (!resp)
+ 		return ERR_PTR(-ENOMEM);
+ 
+-	iwl_free_resp(&cmd);
+ 	return resp;
+ }
+ 
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+index bac0228b8c866..e9360b555ac93 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+@@ -236,21 +236,13 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm,
+ static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm,
+ 					    struct napi_struct *napi,
+ 					    struct sk_buff *skb, int queue,
+-					    struct ieee80211_sta *sta,
+-					    struct ieee80211_link_sta *link_sta)
++					    struct ieee80211_sta *sta)
+ {
+ 	if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) {
+ 		kfree_skb(skb);
+ 		return;
+ 	}
+ 
+-	if (sta && sta->valid_links && link_sta) {
+-		struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
+-
+-		rx_status->link_valid = 1;
+-		rx_status->link_id = link_sta->link_id;
+-	}
+-
+ 	ieee80211_rx_napi(mvm->hw, sta, skb, napi);
+ }
+ 
+@@ -636,7 +628,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm,
+ 		while ((skb = __skb_dequeue(skb_list))) {
+ 			iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb,
+ 							reorder_buf->queue,
+-							sta, NULL /* FIXME */);
++							sta);
+ 			reorder_buf->num_stored--;
+ 		}
+ 	}
+@@ -963,6 +955,9 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
+ 	baid = (reorder & IWL_RX_MPDU_REORDER_BAID_MASK) >>
+ 		IWL_RX_MPDU_REORDER_BAID_SHIFT;
+ 
++	if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000)
++		return false;
++
+ 	/*
+ 	 * This also covers the case of receiving a Block Ack Request
+ 	 * outside a BA session; we'll pass it to mac80211 and that
+@@ -2486,6 +2481,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
+ 			if (IS_ERR(sta))
+ 				sta = NULL;
+ 			link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]);
++
++			if (sta && sta->valid_links && link_sta) {
++				rx_status->link_valid = 1;
++				rx_status->link_id = link_sta->link_id;
++			}
+ 		}
+ 	} else if (!is_multicast_ether_addr(hdr->addr2)) {
+ 		/*
+@@ -2621,9 +2621,14 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
+ 
+ 	if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc) &&
+ 	    likely(!iwl_mvm_time_sync_frame(mvm, skb, hdr->addr2)) &&
+-	    likely(!iwl_mvm_mei_filter_scan(mvm, skb)))
+-		iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta,
+-						link_sta);
++	    likely(!iwl_mvm_mei_filter_scan(mvm, skb))) {
++		if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000 &&
++		    (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU) &&
++		    !(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME))
++			rx_status->flag |= RX_FLAG_AMSDU_MORE;
++
++		iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta);
++	}
+ out:
+ 	rcu_read_unlock();
+ }
+diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c
+index 9f43f256db1d0..f0a4783baf1f3 100644
+--- a/drivers/net/wwan/t7xx/t7xx_cldma.c
++++ b/drivers/net/wwan/t7xx/t7xx_cldma.c
+@@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno)
+ {
+ 	u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE;
+ 
+-	return ioread64(hw_info->ap_pdn_base + offset);
++	return ioread64_lo_hi(hw_info->ap_pdn_base + offset);
+ }
+ 
+ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address,
+@@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn
+ 
+ 	reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 :
+ 				hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0;
+-	iowrite64(address, reg + offset);
++	iowrite64_lo_hi(address, reg + offset);
+ }
+ 
+ void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno,
+diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
+index cc70360364b7d..554ba4669cc8d 100644
+--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
++++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
+@@ -139,8 +139,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool
+ 				return -ENODEV;
+ 			}
+ 
+-			gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 +
+-					    queue->index * sizeof(u64));
++			gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base +
++						  REG_CLDMA_DL_CURRENT_ADDRL_0 +
++						  queue->index * sizeof(u64));
+ 			if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100)
+ 				return 0;
+ 
+@@ -318,8 +319,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue)
+ 		struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info;
+ 
+ 		/* Check current processing TGPD, 64-bit address is in a table by Q index */
+-		ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
+-					queue->index * sizeof(u64));
++		ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
++					      queue->index * sizeof(u64));
+ 		if (req->gpd_addr != ul_curr_addr) {
+ 			spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags);
+ 			dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n",
+diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
+index 76da4c15e3de1..f071ec7ff23d5 100644
+--- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
++++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
+@@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_
+ 	for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) {
+ 		offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i;
+ 		reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
+-		iowrite64(0, reg);
++		iowrite64_lo_hi(0, reg);
+ 	}
+ }
+ 
+@@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_
+ 
+ 	reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset;
+ 	value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT;
+-	iowrite64(value, reg);
++	iowrite64_lo_hi(value, reg);
+ 
+ 	reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset;
+ 	iowrite32(cfg->trsl_id, reg);
+ 
+ 	reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
+ 	value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0);
+-	iowrite64(value, reg);
++	iowrite64_lo_hi(value, reg);
+ 
+ 	/* Ensure ATR is set */
+-	ioread64(reg);
++	ioread64_lo_hi(reg);
+ 	return 0;
+ }
+ 
+diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
+index ad29f370034e4..8d2aee88526c6 100644
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
+ 		return NULL;
+ 	}
+ 	skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
++	skb_mark_for_recycle(skb);
+ 
+ 	/* Align ip header to a 16 bytes boundary */
+ 	skb_reserve(skb, NET_IP_ALIGN);
+diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
+index 3bf27052832f3..4d57a4e341054 100644
+--- a/drivers/of/dynamic.c
++++ b/drivers/of/dynamic.c
+@@ -9,6 +9,7 @@
+ 
+ #define pr_fmt(fmt)	"OF: " fmt
+ 
++#include <linux/device.h>
+ #include <linux/of.h>
+ #include <linux/spinlock.h>
+ #include <linux/slab.h>
+@@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs)
+ {
+ 	struct of_changeset_entry *ce, *cen;
+ 
++	/*
++	 * When a device is deleted, the device links to/from it are also queued
++	 * for deletion. Until these device links are freed, the devices
++	 * themselves aren't freed. If the device being deleted is due to an
++	 * overlay change, this device might be holding a reference to a device
++	 * node that will be freed. So, wait until all already pending device
++	 * links are deleted before freeing a device node. This ensures we don't
++	 * free any device node that has a non-zero reference count.
++	 */
++	device_link_wait_removal();
++
+ 	list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
+ 		__of_changeset_entry_destroy(ce);
+ }
+diff --git a/drivers/of/module.c b/drivers/of/module.c
+index 0e8aa974f0f2b..f58e624953a20 100644
+--- a/drivers/of/module.c
++++ b/drivers/of/module.c
+@@ -16,6 +16,14 @@ ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len)
+ 	ssize_t csize;
+ 	ssize_t tsize;
+ 
++	/*
++	 * Prevent a kernel oops in vsnprintf() -- it only allows passing a
++	 * NULL ptr when the length is also 0. Also filter out the negative
++	 * lengths...
++	 */
++	if ((len > 0 && !str) || len < 0)
++		return -EINVAL;
++
+ 	/* Name & Type */
+ 	/* %p eats all alphanum characters, so %c must be used here */
+ 	csize = snprintf(str, len, "of:N%pOFn%c%s", np, 'T',
+diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
+index c78a6fd6c57f6..b4efdddb2ad91 100644
+--- a/drivers/perf/riscv_pmu.c
++++ b/drivers/perf/riscv_pmu.c
+@@ -313,6 +313,10 @@ static int riscv_pmu_event_init(struct perf_event *event)
+ 	u64 event_config = 0;
+ 	uint64_t cmask;
+ 
++	/* driver does not support branch stack sampling */
++	if (has_branch_stack(event))
++		return -EOPNOTSUPP;
++
+ 	hwc->flags = 0;
+ 	mapped_event = rvpmu->event_map(event, &event_config);
+ 	if (mapped_event < 0) {
+diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
+index cd783290bde5e..1148b4ecabdde 100644
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev,
+ 	}
+ }
+ 
++/**
++ * qeth_irq() - qeth interrupt handler
++ * @cdev: ccw device
++ * @intparm: expect pointer to iob
++ * @irb: Interruption Response Block
++ *
++ * In the good path:
++ * corresponding qeth channel is locked with last used iob as active_cmd.
++ * But this function is also called for error interrupts.
++ *
++ * Caller ensures that:
++ * Interrupts are disabled; ccw device lock is held;
++ *
++ */
+ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
+ 		struct irb *irb)
+ {
+@@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
+ 		iob = (struct qeth_cmd_buffer *) (addr_t)intparm;
+ 	}
+ 
+-	qeth_unlock_channel(card, channel);
+-
+ 	rc = qeth_check_irb_error(card, cdev, irb);
+ 	if (rc) {
+ 		/* IO was terminated, free its resources. */
++		qeth_unlock_channel(card, channel);
+ 		if (iob)
+ 			qeth_cancel_cmd(iob, rc);
+ 		return;
+@@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
+ 		rc = qeth_get_problem(card, cdev, irb);
+ 		if (rc) {
+ 			card->read_or_write_problem = 1;
++			qeth_unlock_channel(card, channel);
+ 			if (iob)
+ 				qeth_cancel_cmd(iob, rc);
+ 			qeth_clear_ipacmd_list(card);
+@@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
+ 		}
+ 	}
+ 
++	if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) {
++		/* channel command hasn't started: retry.
++		 * active_cmd is still set to last iob
++		 */
++		QETH_CARD_TEXT(card, 2, "irqcc1");
++		rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob),
++					      (addr_t)iob, 0, 0, iob->timeout);
++		if (rc) {
++			QETH_DBF_MESSAGE(2,
++					 "ccw retry on %x failed, rc = %i\n",
++					 CARD_DEVID(card), rc);
++			QETH_CARD_TEXT_(card, 2, " err%d", rc);
++			qeth_unlock_channel(card, channel);
++			qeth_cancel_cmd(iob, rc);
++		}
++		return;
++	}
++
++	qeth_unlock_channel(card, channel);
++
+ 	if (iob) {
+ 		/* sanity check: */
+ 		if (irb->scsw.cmd.count > iob->length) {
+diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c
+index ca2e932dd9b70..f684eb5e04898 100644
+--- a/drivers/scsi/myrb.c
++++ b/drivers/scsi/myrb.c
+@@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev,
+ 
+ 		name = myrb_devstate_name(ldev_info->state);
+ 		if (name)
+-			ret = snprintf(buf, 32, "%s\n", name);
++			ret = snprintf(buf, 64, "%s\n", name);
+ 		else
+-			ret = snprintf(buf, 32, "Invalid (%02X)\n",
++			ret = snprintf(buf, 64, "Invalid (%02X)\n",
+ 				       ldev_info->state);
+ 	} else {
+ 		struct myrb_pdev_state *pdev_info = sdev->hostdata;
+@@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev,
+ 		else
+ 			name = myrb_devstate_name(pdev_info->state);
+ 		if (name)
+-			ret = snprintf(buf, 32, "%s\n", name);
++			ret = snprintf(buf, 64, "%s\n", name);
+ 		else
+-			ret = snprintf(buf, 32, "Invalid (%02X)\n",
++			ret = snprintf(buf, 64, "Invalid (%02X)\n",
+ 				       pdev_info->state);
+ 	}
+ 	return ret;
+@@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev,
+ 
+ 		name = myrb_raidlevel_name(ldev_info->raid_level);
+ 		if (!name)
+-			return snprintf(buf, 32, "Invalid (%02X)\n",
++			return snprintf(buf, 64, "Invalid (%02X)\n",
+ 					ldev_info->state);
+-		return snprintf(buf, 32, "%s\n", name);
++		return snprintf(buf, 64, "%s\n", name);
+ 	}
+-	return snprintf(buf, 32, "Physical Drive\n");
++	return snprintf(buf, 64, "Physical Drive\n");
+ }
+ static DEVICE_ATTR_RO(raid_level);
+ 
+@@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev,
+ 	unsigned char status;
+ 
+ 	if (sdev->channel < myrb_logical_channel(sdev->host))
+-		return snprintf(buf, 32, "physical device - not rebuilding\n");
++		return snprintf(buf, 64, "physical device - not rebuilding\n");
+ 
+ 	status = myrb_get_rbld_progress(cb, &rbld_buf);
+ 
+ 	if (rbld_buf.ldev_num != sdev->id ||
+ 	    status != MYRB_STATUS_SUCCESS)
+-		return snprintf(buf, 32, "not rebuilding\n");
++		return snprintf(buf, 64, "not rebuilding\n");
+ 
+-	return snprintf(buf, 32, "rebuilding block %u of %u\n",
++	return snprintf(buf, 64, "rebuilding block %u of %u\n",
+ 			rbld_buf.ldev_size - rbld_buf.blocks_left,
+ 			rbld_buf.ldev_size);
+ }
+diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
+index a1eec65a9713f..e824be9d9bbb9 100644
+--- a/drivers/scsi/myrs.c
++++ b/drivers/scsi/myrs.c
+@@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev,
+ 
+ 		name = myrs_devstate_name(ldev_info->dev_state);
+ 		if (name)
+-			ret = snprintf(buf, 32, "%s\n", name);
++			ret = snprintf(buf, 64, "%s\n", name);
+ 		else
+-			ret = snprintf(buf, 32, "Invalid (%02X)\n",
++			ret = snprintf(buf, 64, "Invalid (%02X)\n",
+ 				       ldev_info->dev_state);
+ 	} else {
+ 		struct myrs_pdev_info *pdev_info;
+@@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev,
+ 		pdev_info = sdev->hostdata;
+ 		name = myrs_devstate_name(pdev_info->dev_state);
+ 		if (name)
+-			ret = snprintf(buf, 32, "%s\n", name);
++			ret = snprintf(buf, 64, "%s\n", name);
+ 		else
+-			ret = snprintf(buf, 32, "Invalid (%02X)\n",
++			ret = snprintf(buf, 64, "Invalid (%02X)\n",
+ 				       pdev_info->dev_state);
+ 	}
+ 	return ret;
+@@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev,
+ 		ldev_info = sdev->hostdata;
+ 		name = myrs_raid_level_name(ldev_info->raid_level);
+ 		if (!name)
+-			return snprintf(buf, 32, "Invalid (%02X)\n",
++			return snprintf(buf, 64, "Invalid (%02X)\n",
+ 					ldev_info->dev_state);
+ 
+ 	} else
+ 		name = myrs_raid_level_name(MYRS_RAID_PHYSICAL);
+ 
+-	return snprintf(buf, 32, "%s\n", name);
++	return snprintf(buf, 64, "%s\n", name);
+ }
+ static DEVICE_ATTR_RO(raid_level);
+ 
+@@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev,
+ 	unsigned char status;
+ 
+ 	if (sdev->channel < cs->ctlr_info->physchan_present)
+-		return snprintf(buf, 32, "physical device - not rebuilding\n");
++		return snprintf(buf, 64, "physical device - not rebuilding\n");
+ 
+ 	ldev_info = sdev->hostdata;
+ 	ldev_num = ldev_info->ldev_num;
+@@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev,
+ 		return -EIO;
+ 	}
+ 	if (ldev_info->rbld_active) {
+-		return snprintf(buf, 32, "rebuilding block %zu of %zu\n",
++		return snprintf(buf, 64, "rebuilding block %zu of %zu\n",
+ 				(size_t)ldev_info->rbld_lba,
+ 				(size_t)ldev_info->cfg_devsize);
+ 	} else
+-		return snprintf(buf, 32, "not rebuilding\n");
++		return snprintf(buf, 64, "not rebuilding\n");
+ }
+ 
+ static ssize_t rebuild_store(struct device *dev,
+@@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev,
+ 	unsigned short ldev_num;
+ 
+ 	if (sdev->channel < cs->ctlr_info->physchan_present)
+-		return snprintf(buf, 32, "physical device - not checking\n");
++		return snprintf(buf, 64, "physical device - not checking\n");
+ 
+ 	ldev_info = sdev->hostdata;
+ 	if (!ldev_info)
+@@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev,
+ 	ldev_num = ldev_info->ldev_num;
+ 	myrs_get_ldev_info(cs, ldev_num, ldev_info);
+ 	if (ldev_info->cc_active)
+-		return snprintf(buf, 32, "checking block %zu of %zu\n",
++		return snprintf(buf, 64, "checking block %zu of %zu\n",
+ 				(size_t)ldev_info->cc_lba,
+ 				(size_t)ldev_info->cfg_devsize);
+ 	else
+-		return snprintf(buf, 32, "not checking\n");
++		return snprintf(buf, 64, "not checking\n");
+ }
+ 
+ static ssize_t consistency_check_store(struct device *dev,
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index e80c33cdad2b9..c62f677084b4c 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -3754,7 +3754,7 @@ static int sd_probe(struct device *dev)
+ 
+ 	error = device_add_disk(dev, gd, NULL);
+ 	if (error) {
+-		put_device(&sdkp->disk_dev);
++		device_unregister(&sdkp->disk_dev);
+ 		put_disk(gd);
+ 		goto out;
+ 	}
+diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
+index 3638e974f5d49..06bf58b7e5d72 100644
+--- a/drivers/spi/spi-pci1xxxx.c
++++ b/drivers/spi/spi-pci1xxxx.c
+@@ -275,6 +275,8 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
+ 		spi_bus->spi_int[iter] = devm_kzalloc(&pdev->dev,
+ 						      sizeof(struct pci1xxxx_spi_internal),
+ 						      GFP_KERNEL);
++		if (!spi_bus->spi_int[iter])
++			return -ENOMEM;
+ 		spi_sub_ptr = spi_bus->spi_int[iter];
+ 		spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller));
+ 		if (!spi_sub_ptr->spi_host)
+diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
+index 0e48ffd499b9f..652eadbefe24c 100644
+--- a/drivers/spi/spi-s3c64xx.c
++++ b/drivers/spi/spi-s3c64xx.c
+@@ -3,19 +3,20 @@
+ // Copyright (c) 2009 Samsung Electronics Co., Ltd.
+ //      Jaswinder Singh <jassi.brar@samsung.com>
+ 
+-#include <linux/init.h>
+-#include <linux/module.h>
+-#include <linux/interrupt.h>
+-#include <linux/delay.h>
++#include <linux/bitops.h>
++#include <linux/bits.h>
+ #include <linux/clk.h>
++#include <linux/delay.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/dmaengine.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/module.h>
++#include <linux/of.h>
++#include <linux/platform_data/spi-s3c64xx.h>
+ #include <linux/platform_device.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/spi/spi.h>
+-#include <linux/of.h>
+-
+-#include <linux/platform_data/spi-s3c64xx.h>
+ 
+ #define MAX_SPI_PORTS		12
+ #define S3C64XX_SPI_QUIRK_CS_AUTO	(1 << 1)
+@@ -76,6 +77,7 @@
+ #define S3C64XX_SPI_INT_RX_FIFORDY_EN		(1<<1)
+ #define S3C64XX_SPI_INT_TX_FIFORDY_EN		(1<<0)
+ 
++#define S3C64XX_SPI_ST_TX_FIFO_LVL_SHIFT	6
+ #define S3C64XX_SPI_ST_RX_OVERRUN_ERR		(1<<5)
+ #define S3C64XX_SPI_ST_RX_UNDERRUN_ERR		(1<<4)
+ #define S3C64XX_SPI_ST_TX_OVERRUN_ERR		(1<<3)
+@@ -106,9 +108,11 @@
+ #define FIFO_LVL_MASK(i) ((i)->port_conf->fifo_lvl_mask[i->port_id])
+ #define S3C64XX_SPI_ST_TX_DONE(v, i) (((v) & \
+ 				(1 << (i)->port_conf->tx_st_done)) ? 1 : 0)
+-#define TX_FIFO_LVL(v, i) (((v) >> 6) & FIFO_LVL_MASK(i))
+-#define RX_FIFO_LVL(v, i) (((v) >> (i)->port_conf->rx_lvl_offset) & \
+-					FIFO_LVL_MASK(i))
++#define TX_FIFO_LVL(v, sdd)	(((v) & (sdd)->tx_fifomask) >>		\
++				 __ffs((sdd)->tx_fifomask))
++#define RX_FIFO_LVL(v, sdd)	(((v) & (sdd)->rx_fifomask) >>		\
++				 __ffs((sdd)->rx_fifomask))
++#define FIFO_DEPTH(i) ((FIFO_LVL_MASK(i) >> 1) + 1)
+ 
+ #define S3C64XX_SPI_MAX_TRAILCNT	0x3ff
+ #define S3C64XX_SPI_TRAILCNT_OFF	19
+@@ -133,6 +137,10 @@ struct s3c64xx_spi_dma_data {
+  * struct s3c64xx_spi_port_config - SPI Controller hardware info
+  * @fifo_lvl_mask: Bit-mask for {TX|RX}_FIFO_LVL bits in SPI_STATUS register.
+  * @rx_lvl_offset: Bit offset of RX_FIFO_LVL bits in SPI_STATUS regiter.
++ * @rx_fifomask: SPI_STATUS.RX_FIFO_LVL mask. Shifted mask defining the field's
++ *               length and position.
++ * @tx_fifomask: SPI_STATUS.TX_FIFO_LVL mask. Shifted mask defining the field's
++ *               length and position.
+  * @tx_st_done: Bit offset of TX_DONE bit in SPI_STATUS regiter.
+  * @clk_div: Internal clock divider
+  * @quirks: Bitmask of known quirks
+@@ -150,6 +158,8 @@ struct s3c64xx_spi_dma_data {
+ struct s3c64xx_spi_port_config {
+ 	int	fifo_lvl_mask[MAX_SPI_PORTS];
+ 	int	rx_lvl_offset;
++	u32	rx_fifomask;
++	u32	tx_fifomask;
+ 	int	tx_st_done;
+ 	int	quirks;
+ 	int	clk_div;
+@@ -179,6 +189,11 @@ struct s3c64xx_spi_port_config {
+  * @tx_dma: Local transmit DMA data (e.g. chan and direction)
+  * @port_conf: Local SPI port configuartion data
+  * @port_id: Port identification number
++ * @fifo_depth: depth of the FIFO.
++ * @rx_fifomask: SPI_STATUS.RX_FIFO_LVL mask. Shifted mask defining the field's
++ *               length and position.
++ * @tx_fifomask: SPI_STATUS.TX_FIFO_LVL mask. Shifted mask defining the field's
++ *               length and position.
+  */
+ struct s3c64xx_spi_driver_data {
+ 	void __iomem                    *regs;
+@@ -198,6 +213,9 @@ struct s3c64xx_spi_driver_data {
+ 	struct s3c64xx_spi_dma_data	tx_dma;
+ 	const struct s3c64xx_spi_port_config	*port_conf;
+ 	unsigned int			port_id;
++	unsigned int			fifo_depth;
++	u32				rx_fifomask;
++	u32				tx_fifomask;
+ };
+ 
+ static void s3c64xx_flush_fifo(struct s3c64xx_spi_driver_data *sdd)
+@@ -405,12 +423,10 @@ static bool s3c64xx_spi_can_dma(struct spi_controller *host,
+ {
+ 	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
+ 
+-	if (sdd->rx_dma.ch && sdd->tx_dma.ch) {
+-		return xfer->len > (FIFO_LVL_MASK(sdd) >> 1) + 1;
+-	} else {
+-		return false;
+-	}
++	if (sdd->rx_dma.ch && sdd->tx_dma.ch)
++		return xfer->len >= sdd->fifo_depth;
+ 
++	return false;
+ }
+ 
+ static int s3c64xx_enable_datapath(struct s3c64xx_spi_driver_data *sdd,
+@@ -495,9 +511,7 @@ static u32 s3c64xx_spi_wait_for_timeout(struct s3c64xx_spi_driver_data *sdd,
+ 	void __iomem *regs = sdd->regs;
+ 	unsigned long val = 1;
+ 	u32 status;
+-
+-	/* max fifo depth available */
+-	u32 max_fifo = (FIFO_LVL_MASK(sdd) >> 1) + 1;
++	u32 max_fifo = sdd->fifo_depth;
+ 
+ 	if (timeout_ms)
+ 		val = msecs_to_loops(timeout_ms);
+@@ -604,7 +618,7 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
+ 	 * For any size less than the fifo size the below code is
+ 	 * executed atleast once.
+ 	 */
+-	loops = xfer->len / ((FIFO_LVL_MASK(sdd) >> 1) + 1);
++	loops = xfer->len / sdd->fifo_depth;
+ 	buf = xfer->rx_buf;
+ 	do {
+ 		/* wait for data to be received in the fifo */
+@@ -741,7 +755,7 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host,
+ 				    struct spi_transfer *xfer)
+ {
+ 	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
+-	const unsigned int fifo_len = (FIFO_LVL_MASK(sdd) >> 1) + 1;
++	const unsigned int fifo_len = sdd->fifo_depth;
+ 	const void *tx_buf = NULL;
+ 	void *rx_buf = NULL;
+ 	int target_len = 0, origin_len = 0;
+@@ -769,10 +783,9 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host,
+ 			return status;
+ 	}
+ 
+-	if (!is_polling(sdd) && (xfer->len > fifo_len) &&
++	if (!is_polling(sdd) && xfer->len >= fifo_len &&
+ 	    sdd->rx_dma.ch && sdd->tx_dma.ch) {
+ 		use_dma = 1;
+-
+ 	} else if (xfer->len >= fifo_len) {
+ 		tx_buf = xfer->tx_buf;
+ 		rx_buf = xfer->rx_buf;
+@@ -1146,6 +1159,23 @@ static inline const struct s3c64xx_spi_port_config *s3c64xx_spi_get_port_config(
+ 	return (const struct s3c64xx_spi_port_config *)platform_get_device_id(pdev)->driver_data;
+ }
+ 
++static void s3c64xx_spi_set_fifomask(struct s3c64xx_spi_driver_data *sdd)
++{
++	const struct s3c64xx_spi_port_config *port_conf = sdd->port_conf;
++
++	if (port_conf->rx_fifomask)
++		sdd->rx_fifomask = port_conf->rx_fifomask;
++	else
++		sdd->rx_fifomask = FIFO_LVL_MASK(sdd) <<
++			port_conf->rx_lvl_offset;
++
++	if (port_conf->tx_fifomask)
++		sdd->tx_fifomask = port_conf->tx_fifomask;
++	else
++		sdd->tx_fifomask = FIFO_LVL_MASK(sdd) <<
++			S3C64XX_SPI_ST_TX_FIFO_LVL_SHIFT;
++}
++
+ static int s3c64xx_spi_probe(struct platform_device *pdev)
+ {
+ 	struct resource	*mem_res;
+@@ -1191,6 +1221,10 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
+ 		sdd->port_id = pdev->id;
+ 	}
+ 
++	sdd->fifo_depth = FIFO_DEPTH(sdd);
++
++	s3c64xx_spi_set_fifomask(sdd);
++
+ 	sdd->cur_bpw = 8;
+ 
+ 	sdd->tx_dma.direction = DMA_MEM_TO_DEV;
+@@ -1280,7 +1314,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
+ 	dev_dbg(&pdev->dev, "Samsung SoC SPI Driver loaded for Bus SPI-%d with %d Targets attached\n",
+ 					sdd->port_id, host->num_chipselect);
+ 	dev_dbg(&pdev->dev, "\tIOmem=[%pR]\tFIFO %dbytes\n",
+-					mem_res, (FIFO_LVL_MASK(sdd) >> 1) + 1);
++		mem_res, sdd->fifo_depth);
+ 
+ 	pm_runtime_mark_last_busy(&pdev->dev);
+ 	pm_runtime_put_autosuspend(&pdev->dev);
+diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
+index 4853141cd10c8..894622b6556a6 100644
+--- a/drivers/usb/typec/ucsi/ucsi_glink.c
++++ b/drivers/usb/typec/ucsi/ucsi_glink.c
+@@ -254,6 +254,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work)
+ static void pmic_glink_ucsi_register(struct work_struct *work)
+ {
+ 	struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work);
++	int orientation;
++	int i;
++
++	for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) {
++		if (!ucsi->port_orientation[i])
++			continue;
++		orientation = gpiod_get_value(ucsi->port_orientation[i]);
++
++		if (orientation >= 0) {
++			typec_switch_set(ucsi->port_switch[i],
++					 orientation ? TYPEC_ORIENTATION_REVERSE
++					     : TYPEC_ORIENTATION_NORMAL);
++		}
++	}
+ 
+ 	ucsi_register(ucsi->ucsi);
+ }
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index fc8eb8d86ca25..5acb2cb79d4bf 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -2410,12 +2410,65 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
+ 	return try_release_extent_state(tree, page, mask);
+ }
+ 
++struct btrfs_fiemap_entry {
++	u64 offset;
++	u64 phys;
++	u64 len;
++	u32 flags;
++};
++
++/*
++ * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file
++ * range from the inode's io tree, unlock the subvolume tree search path, flush
++ * the fiemap cache and relock the file range and research the subvolume tree.
++ * The value here is something negative that can't be confused with a valid
++ * errno value and different from 1 because that's also a return value from
++ * fiemap_fill_next_extent() and also it's often used to mean some btree search
++ * did not find a key, so make it some distinct negative value.
++ */
++#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1))
++
+ /*
+- * To cache previous fiemap extent
++ * Used to:
+  *
+- * Will be used for merging fiemap extent
++ * - Cache the next entry to be emitted to the fiemap buffer, so that we can
++ *   merge extents that are contiguous and can be grouped as a single one;
++ *
++ * - Store extents ready to be written to the fiemap buffer in an intermediary
++ *   buffer. This intermediary buffer is to ensure that in case the fiemap
++ *   buffer is memory mapped to the fiemap target file, we don't deadlock
++ *   during btrfs_page_mkwrite(). This is because during fiemap we are locking
++ *   an extent range in order to prevent races with delalloc flushing and
++ *   ordered extent completion, which is needed in order to reliably detect
++ *   delalloc in holes and prealloc extents. And this can lead to a deadlock
++ *   if the fiemap buffer is memory mapped to the file we are running fiemap
++ *   against (a silly, useless in practice scenario, but possible) because
++ *   btrfs_page_mkwrite() will try to lock the same extent range.
+  */
+ struct fiemap_cache {
++	/* An array of ready fiemap entries. */
++	struct btrfs_fiemap_entry *entries;
++	/* Number of entries in the entries array. */
++	int entries_size;
++	/* Index of the next entry in the entries array to write to. */
++	int entries_pos;
++	/*
++	 * Once the entries array is full, this indicates what's the offset for
++	 * the next file extent item we must search for in the inode's subvolume
++	 * tree after unlocking the extent range in the inode's io tree and
++	 * releasing the search path.
++	 */
++	u64 next_search_offset;
++	/*
++	 * This matches struct fiemap_extent_info::fi_mapped_extents, we use it
++	 * to count ourselves emitted extents and stop instead of relying on
++	 * fiemap_fill_next_extent() because we buffer ready fiemap entries at
++	 * the @entries array, and we want to stop as soon as we hit the max
++	 * amount of extents to map, not just to save time but also to make the
++	 * logic at extent_fiemap() simpler.
++	 */
++	unsigned int extents_mapped;
++	/* Fields for the cached extent (unsubmitted, not ready, extent). */
+ 	u64 offset;
+ 	u64 phys;
+ 	u64 len;
+@@ -2423,6 +2476,28 @@ struct fiemap_cache {
+ 	bool cached;
+ };
+ 
++static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo,
++			      struct fiemap_cache *cache)
++{
++	for (int i = 0; i < cache->entries_pos; i++) {
++		struct btrfs_fiemap_entry *entry = &cache->entries[i];
++		int ret;
++
++		ret = fiemap_fill_next_extent(fieinfo, entry->offset,
++					      entry->phys, entry->len,
++					      entry->flags);
++		/*
++		 * Ignore 1 (reached max entries) because we keep track of that
++		 * ourselves in emit_fiemap_extent().
++		 */
++		if (ret < 0)
++			return ret;
++	}
++	cache->entries_pos = 0;
++
++	return 0;
++}
++
+ /*
+  * Helper to submit fiemap extent.
+  *
+@@ -2437,8 +2512,8 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+ 				struct fiemap_cache *cache,
+ 				u64 offset, u64 phys, u64 len, u32 flags)
+ {
++	struct btrfs_fiemap_entry *entry;
+ 	u64 cache_end;
+-	int ret = 0;
+ 
+ 	/* Set at the end of extent_fiemap(). */
+ 	ASSERT((flags & FIEMAP_EXTENT_LAST) == 0);
+@@ -2451,7 +2526,9 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+ 	 * find an extent that starts at an offset behind the end offset of the
+ 	 * previous extent we processed. This happens if fiemap is called
+ 	 * without FIEMAP_FLAG_SYNC and there are ordered extents completing
+-	 * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
++	 * after we had to unlock the file range, release the search path, emit
++	 * the fiemap extents stored in the buffer (cache->entries array) and
++	 * the lock the remainder of the range and re-search the btree.
+ 	 *
+ 	 * For example we are in leaf X processing its last item, which is the
+ 	 * file extent item for file range [512K, 1M[, and after
+@@ -2564,11 +2641,35 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+ 
+ emit:
+ 	/* Not mergeable, need to submit cached one */
+-	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+-				      cache->len, cache->flags);
+-	cache->cached = false;
+-	if (ret)
+-		return ret;
++
++	if (cache->entries_pos == cache->entries_size) {
++		/*
++		 * We will need to research for the end offset of the last
++		 * stored extent and not from the current offset, because after
++		 * unlocking the range and releasing the path, if there's a hole
++		 * between that end offset and this current offset, a new extent
++		 * may have been inserted due to a new write, so we don't want
++		 * to miss it.
++		 */
++		entry = &cache->entries[cache->entries_size - 1];
++		cache->next_search_offset = entry->offset + entry->len;
++		cache->cached = false;
++
++		return BTRFS_FIEMAP_FLUSH_CACHE;
++	}
++
++	entry = &cache->entries[cache->entries_pos];
++	entry->offset = cache->offset;
++	entry->phys = cache->phys;
++	entry->len = cache->len;
++	entry->flags = cache->flags;
++	cache->entries_pos++;
++	cache->extents_mapped++;
++
++	if (cache->extents_mapped == fieinfo->fi_extents_max) {
++		cache->cached = false;
++		return 1;
++	}
+ assign:
+ 	cache->cached = true;
+ 	cache->offset = offset;
+@@ -2694,8 +2795,8 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path
+ 	 * neighbour leaf).
+ 	 * We also need the private clone because holding a read lock on an
+ 	 * extent buffer of the subvolume's b+tree will make lockdep unhappy
+-	 * when we call fiemap_fill_next_extent(), because that may cause a page
+-	 * fault when filling the user space buffer with fiemap data.
++	 * when we check if extents are shared, as backref walking may need to
++	 * lock the same leaf we are processing.
+ 	 */
+ 	clone = btrfs_clone_extent_buffer(path->nodes[0]);
+ 	if (!clone)
+@@ -2735,34 +2836,16 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
+ 	 * it beyond i_size.
+ 	 */
+ 	while (cur_offset < end && cur_offset < i_size) {
+-		struct extent_state *cached_state = NULL;
+ 		u64 delalloc_start;
+ 		u64 delalloc_end;
+ 		u64 prealloc_start;
+-		u64 lockstart;
+-		u64 lockend;
+ 		u64 prealloc_len = 0;
+ 		bool delalloc;
+ 
+-		lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
+-		lockend = round_up(end, inode->root->fs_info->sectorsize);
+-
+-		/*
+-		 * We are only locking for the delalloc range because that's the
+-		 * only thing that can change here.  With fiemap we have a lock
+-		 * on the inode, so no buffered or direct writes can happen.
+-		 *
+-		 * However mmaps and normal page writeback will cause this to
+-		 * change arbitrarily.  We have to lock the extent lock here to
+-		 * make sure that nobody messes with the tree while we're doing
+-		 * btrfs_find_delalloc_in_range.
+-		 */
+-		lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ 		delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
+ 							delalloc_cached_state,
+ 							&delalloc_start,
+ 							&delalloc_end);
+-		unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ 		if (!delalloc)
+ 			break;
+ 
+@@ -2930,6 +3013,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ 		  u64 start, u64 len)
+ {
+ 	const u64 ino = btrfs_ino(inode);
++	struct extent_state *cached_state = NULL;
+ 	struct extent_state *delalloc_cached_state = NULL;
+ 	struct btrfs_path *path;
+ 	struct fiemap_cache cache = { 0 };
+@@ -2942,18 +3026,23 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ 	bool stopped = false;
+ 	int ret;
+ 
++	cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry);
++	cache.entries = kmalloc_array(cache.entries_size,
++				      sizeof(struct btrfs_fiemap_entry),
++				      GFP_KERNEL);
+ 	backref_ctx = btrfs_alloc_backref_share_check_ctx();
+ 	path = btrfs_alloc_path();
+-	if (!backref_ctx || !path) {
++	if (!cache.entries || !backref_ctx || !path) {
+ 		ret = -ENOMEM;
+ 		goto out;
+ 	}
+ 
++restart:
+ 	range_start = round_down(start, sectorsize);
+ 	range_end = round_up(start + len, sectorsize);
+ 	prev_extent_end = range_start;
+ 
+-	btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
++	lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
+ 
+ 	ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
+ 	if (ret < 0)
+@@ -3079,7 +3168,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ 		if (ret < 0) {
+ 			goto out_unlock;
+ 		} else if (ret > 0) {
+-			/* fiemap_fill_next_extent() told us to stop. */
++			/* emit_fiemap_extent() told us to stop. */
+ 			stopped = true;
+ 			break;
+ 		}
+@@ -3102,16 +3191,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ 	}
+ 
+ check_eof_delalloc:
+-	/*
+-	 * Release (and free) the path before emitting any final entries to
+-	 * fiemap_fill_next_extent() to keep lockdep happy. This is because
+-	 * once we find no more file extent items exist, we may have a
+-	 * non-cloned leaf, and fiemap_fill_next_extent() can trigger page
+-	 * faults when copying data to the user space buffer.
+-	 */
+-	btrfs_free_path(path);
+-	path = NULL;
+-
+ 	if (!stopped && prev_extent_end < range_end) {
+ 		ret = fiemap_process_hole(inode, fieinfo, &cache,
+ 					  &delalloc_cached_state, backref_ctx,
+@@ -3125,28 +3204,16 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ 		const u64 i_size = i_size_read(&inode->vfs_inode);
+ 
+ 		if (prev_extent_end < i_size) {
+-			struct extent_state *cached_state = NULL;
+ 			u64 delalloc_start;
+ 			u64 delalloc_end;
+-			u64 lockstart;
+-			u64 lockend;
+ 			bool delalloc;
+ 
+-			lockstart = round_down(prev_extent_end, sectorsize);
+-			lockend = round_up(i_size, sectorsize);
+-
+-			/*
+-			 * See the comment in fiemap_process_hole as to why
+-			 * we're doing the locking here.
+-			 */
+-			lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ 			delalloc = btrfs_find_delalloc_in_range(inode,
+ 								prev_extent_end,
+ 								i_size - 1,
+ 								&delalloc_cached_state,
+ 								&delalloc_start,
+ 								&delalloc_end);
+-			unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
+ 			if (!delalloc)
+ 				cache.flags |= FIEMAP_EXTENT_LAST;
+ 		} else {
+@@ -3154,12 +3221,39 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
+ 		}
+ 	}
+ 
+-	ret = emit_last_fiemap_cache(fieinfo, &cache);
+-
+ out_unlock:
+-	btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
++	unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
++
++	if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
++		btrfs_release_path(path);
++		ret = flush_fiemap_cache(fieinfo, &cache);
++		if (ret)
++			goto out;
++		len -= cache.next_search_offset - start;
++		start = cache.next_search_offset;
++		goto restart;
++	} else if (ret < 0) {
++		goto out;
++	}
++
++	/*
++	 * Must free the path before emitting to the fiemap buffer because we
++	 * may have a non-cloned leaf and if the fiemap buffer is memory mapped
++	 * to a file, a write into it (through btrfs_page_mkwrite()) may trigger
++	 * waiting for an ordered extent that in order to complete needs to
++	 * modify that leaf, therefore leading to a deadlock.
++	 */
++	btrfs_free_path(path);
++	path = NULL;
++
++	ret = flush_fiemap_cache(fieinfo, &cache);
++	if (ret)
++		goto out;
++
++	ret = emit_last_fiemap_cache(fieinfo, &cache);
+ out:
+ 	free_extent_state(delalloc_cached_state);
++	kfree(cache.entries);
+ 	btrfs_free_backref_share_ctx(backref_ctx);
+ 	btrfs_free_path(path);
+ 	return ret;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index ca79c2b8adc46..1ac14223ffb50 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -7813,6 +7813,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
+ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 			u64 start, u64 len)
+ {
++	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+ 	int	ret;
+ 
+ 	ret = fiemap_prep(inode, fieinfo, start, &len, 0);
+@@ -7838,7 +7839,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 			return ret;
+ 	}
+ 
+-	return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
++	btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
++
++	/*
++	 * We did an initial flush to avoid holding the inode's lock while
++	 * triggering writeback and waiting for the completion of IO and ordered
++	 * extents. Now after we locked the inode we do it again, because it's
++	 * possible a new write may have happened in between those two steps.
++	 */
++	if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
++		ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
++		if (ret) {
++			btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
++			return ret;
++		}
++	}
++
++	ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
++	btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
++
++	return ret;
+ }
+ 
+ static int btrfs_writepages(struct address_space *mapping,
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 522596060252f..c7e52d980cd75 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -2886,12 +2886,9 @@ static void
+ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+ {
+ 	struct nfs4_client *clp = cb->cb_clp;
+-	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ 
+-	spin_lock(&nn->client_lock);
+ 	clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+-	put_client_renew_locked(clp);
+-	spin_unlock(&nn->client_lock);
++	drop_client(clp);
+ }
+ 
+ static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+@@ -6273,7 +6270,7 @@ deleg_reaper(struct nfsd_net *nn)
+ 		list_add(&clp->cl_ra_cblist, &cblist);
+ 
+ 		/* release in nfsd4_cb_recall_any_release */
+-		atomic_inc(&clp->cl_rpc_users);
++		kref_get(&clp->cl_nfsdfs.cl_ref);
+ 		set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ 		clp->cl_ra_time = ktime_get_boottime_seconds();
+ 	}
+diff --git a/fs/pipe.c b/fs/pipe.c
+index a234035cc375d..ba4376341ddd2 100644
+--- a/fs/pipe.c
++++ b/fs/pipe.c
+@@ -425,6 +425,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
+ 	bool was_empty = false;
+ 	bool wake_next_writer = false;
+ 
++	/*
++	 * Reject writing to watch queue pipes before the point where we lock
++	 * the pipe.
++	 * Otherwise, lockdep would be unhappy if the caller already has another
++	 * pipe locked.
++	 * If we had to support locking a normal pipe and a notification pipe at
++	 * the same time, we could set up lockdep annotations for that, but
++	 * since we don't actually need that, it's simpler to just bail here.
++	 */
++	if (pipe_has_watch_queue(pipe))
++		return -EXDEV;
++
+ 	/* Null write succeeds. */
+ 	if (unlikely(total_len == 0))
+ 		return 0;
+@@ -437,11 +449,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
+ 		goto out;
+ 	}
+ 
+-	if (pipe_has_watch_queue(pipe)) {
+-		ret = -EXDEV;
+-		goto out;
+-	}
+-
+ 	/*
+ 	 * If it wasn't empty we try to merge new data into
+ 	 * the last buffer.
+diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
+index 15e1215bc4e5a..1a9e705d65002 100644
+--- a/fs/smb/client/cached_dir.c
++++ b/fs/smb/client/cached_dir.c
+@@ -401,6 +401,7 @@ smb2_close_cached_fid(struct kref *ref)
+ {
+ 	struct cached_fid *cfid = container_of(ref, struct cached_fid,
+ 					       refcount);
++	int rc;
+ 
+ 	spin_lock(&cfid->cfids->cfid_list_lock);
+ 	if (cfid->on_list) {
+@@ -414,9 +415,10 @@ smb2_close_cached_fid(struct kref *ref)
+ 	cfid->dentry = NULL;
+ 
+ 	if (cfid->is_open) {
+-		SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
++		rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
+ 			   cfid->fid.volatile_fid);
+-		atomic_dec(&cfid->tcon->num_remote_opens);
++		if (rc != -EBUSY && rc != -EAGAIN)
++			atomic_dec(&cfid->tcon->num_remote_opens);
+ 	}
+ 
+ 	free_cached_dir(cfid);
+diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
+index 7206167f4184a..6c85edb8635d0 100644
+--- a/fs/smb/client/cifs_debug.c
++++ b/fs/smb/client/cifs_debug.c
+@@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
+ 	spin_lock(&cifs_tcp_ses_lock);
+ 	list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+ 		list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
++			if (cifs_ses_exiting(ses))
++				continue;
+ 			list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ 				spin_lock(&tcon->open_file_lock);
+ 				list_for_each_entry(cfile, &tcon->openFileList, tlist) {
+@@ -654,6 +656,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
+ 			}
+ #endif /* CONFIG_CIFS_STATS2 */
+ 			list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
++				if (cifs_ses_exiting(ses))
++					continue;
+ 				list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ 					atomic_set(&tcon->num_smbs_sent, 0);
+ 					spin_lock(&tcon->stat_lock);
+@@ -732,6 +736,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
+ 			}
+ #endif /* STATS2 */
+ 		list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
++			if (cifs_ses_exiting(ses))
++				continue;
+ 			list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ 				i++;
+ 				seq_printf(m, "\n%d) %s", i, tcon->tree_name);
+diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
+index 2131638f26d0b..fcb93a66e47cb 100644
+--- a/fs/smb/client/cifsfs.c
++++ b/fs/smb/client/cifsfs.c
+@@ -159,6 +159,7 @@ struct workqueue_struct	*decrypt_wq;
+ struct workqueue_struct	*fileinfo_put_wq;
+ struct workqueue_struct	*cifsoplockd_wq;
+ struct workqueue_struct	*deferredclose_wq;
++struct workqueue_struct	*serverclose_wq;
+ __u32 cifs_lock_secret;
+ 
+ /*
+@@ -1877,6 +1878,13 @@ init_cifs(void)
+ 		goto out_destroy_cifsoplockd_wq;
+ 	}
+ 
++	serverclose_wq = alloc_workqueue("serverclose",
++					   WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
++	if (!serverclose_wq) {
++		rc = -ENOMEM;
++		goto out_destroy_serverclose_wq;
++	}
++
+ 	rc = cifs_init_inodecache();
+ 	if (rc)
+ 		goto out_destroy_deferredclose_wq;
+@@ -1951,6 +1959,8 @@ init_cifs(void)
+ 	destroy_workqueue(decrypt_wq);
+ out_destroy_cifsiod_wq:
+ 	destroy_workqueue(cifsiod_wq);
++out_destroy_serverclose_wq:
++	destroy_workqueue(serverclose_wq);
+ out_clean_proc:
+ 	cifs_proc_clean();
+ 	return rc;
+@@ -1980,6 +1990,7 @@ exit_cifs(void)
+ 	destroy_workqueue(cifsoplockd_wq);
+ 	destroy_workqueue(decrypt_wq);
+ 	destroy_workqueue(fileinfo_put_wq);
++	destroy_workqueue(serverclose_wq);
+ 	destroy_workqueue(cifsiod_wq);
+ 	cifs_proc_clean();
+ }
+diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
+index 35a12413bbee6..a878b1e5aa313 100644
+--- a/fs/smb/client/cifsglob.h
++++ b/fs/smb/client/cifsglob.h
+@@ -425,10 +425,10 @@ struct smb_version_operations {
+ 	/* set fid protocol-specific info */
+ 	void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
+ 	/* close a file */
+-	void (*close)(const unsigned int, struct cifs_tcon *,
++	int (*close)(const unsigned int, struct cifs_tcon *,
+ 		      struct cifs_fid *);
+ 	/* close a file, returning file attributes and timestamps */
+-	void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
++	int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
+ 		      struct cifsFileInfo *pfile_info);
+ 	/* send a flush request to the server */
+ 	int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
+@@ -1408,6 +1408,7 @@ struct cifsFileInfo {
+ 	bool invalidHandle:1;	/* file closed via session abend */
+ 	bool swapfile:1;
+ 	bool oplock_break_cancelled:1;
++	bool offload:1; /* offload final part of _put to a wq */
+ 	unsigned int oplock_epoch; /* epoch from the lease break */
+ 	__u32 oplock_level; /* oplock/lease level from the lease break */
+ 	int count;
+@@ -1416,6 +1417,7 @@ struct cifsFileInfo {
+ 	struct cifs_search_info srch_inf;
+ 	struct work_struct oplock_break; /* work for oplock breaks */
+ 	struct work_struct put; /* work for the final part of _put */
++	struct work_struct serverclose; /* work for serverclose */
+ 	struct delayed_work deferred;
+ 	bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
+ 	char *symlink_target;
+@@ -2073,6 +2075,7 @@ extern struct workqueue_struct *decrypt_wq;
+ extern struct workqueue_struct *fileinfo_put_wq;
+ extern struct workqueue_struct *cifsoplockd_wq;
+ extern struct workqueue_struct *deferredclose_wq;
++extern struct workqueue_struct *serverclose_wq;
+ extern __u32 cifs_lock_secret;
+ 
+ extern mempool_t *cifs_mid_poolp;
+@@ -2278,4 +2281,14 @@ struct smb2_compound_vars {
+ 	struct smb2_file_link_info link_info;
+ };
+ 
++static inline bool cifs_ses_exiting(struct cifs_ses *ses)
++{
++	bool ret;
++
++	spin_lock(&ses->ses_lock);
++	ret = ses->ses_status == SES_EXITING;
++	spin_unlock(&ses->ses_lock);
++	return ret;
++}
++
+ #endif	/* _CIFS_GLOB_H */
+diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
+index 4c958129181d3..97776dd12b6b8 100644
+--- a/fs/smb/client/connect.c
++++ b/fs/smb/client/connect.c
+@@ -178,6 +178,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
+ 
+ 	spin_lock(&cifs_tcp_ses_lock);
+ 	list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++		if (cifs_ses_exiting(ses))
++			continue;
+ 		spin_lock(&ses->chan_lock);
+ 		for (i = 0; i < ses->chan_count; i++) {
+ 			if (!ses->chans[i].server)
+@@ -3981,13 +3983,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses)
+ }
+ 
+ static struct cifs_tcon *
+-cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
++__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+ {
+ 	int rc;
+ 	struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
+ 	struct cifs_ses *ses;
+ 	struct cifs_tcon *tcon = NULL;
+ 	struct smb3_fs_context *ctx;
++	char *origin_fullpath = NULL;
+ 
+ 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ 	if (ctx == NULL)
+@@ -4011,6 +4014,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+ 	ctx->sign = master_tcon->ses->sign;
+ 	ctx->seal = master_tcon->seal;
+ 	ctx->witness = master_tcon->use_witness;
++	ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses;
+ 
+ 	rc = cifs_set_vol_auth(ctx, master_tcon->ses);
+ 	if (rc) {
+@@ -4030,12 +4034,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+ 		goto out;
+ 	}
+ 
++#ifdef CONFIG_CIFS_DFS_UPCALL
++	spin_lock(&master_tcon->tc_lock);
++	if (master_tcon->origin_fullpath) {
++		spin_unlock(&master_tcon->tc_lock);
++		origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source);
++		if (IS_ERR(origin_fullpath)) {
++			tcon = ERR_CAST(origin_fullpath);
++			origin_fullpath = NULL;
++			cifs_put_smb_ses(ses);
++			goto out;
++		}
++	} else {
++		spin_unlock(&master_tcon->tc_lock);
++	}
++#endif
++
+ 	tcon = cifs_get_tcon(ses, ctx);
+ 	if (IS_ERR(tcon)) {
+ 		cifs_put_smb_ses(ses);
+ 		goto out;
+ 	}
+ 
++#ifdef CONFIG_CIFS_DFS_UPCALL
++	if (origin_fullpath) {
++		spin_lock(&tcon->tc_lock);
++		tcon->origin_fullpath = origin_fullpath;
++		spin_unlock(&tcon->tc_lock);
++		origin_fullpath = NULL;
++		queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
++				   dfs_cache_get_ttl() * HZ);
++	}
++#endif
++
+ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ 	if (cap_unix(ses))
+ 		reset_cifs_unix_caps(0, tcon, NULL, ctx);
+@@ -4044,11 +4075,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+ out:
+ 	kfree(ctx->username);
+ 	kfree_sensitive(ctx->password);
++	kfree(origin_fullpath);
+ 	kfree(ctx);
+ 
+ 	return tcon;
+ }
+ 
++static struct cifs_tcon *
++cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
++{
++	struct cifs_tcon *ret;
++
++	cifs_mount_lock();
++	ret = __cifs_construct_tcon(cifs_sb, fsuid);
++	cifs_mount_unlock();
++	return ret;
++}
++
+ struct cifs_tcon *
+ cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
+ {
+diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
+index 580a27a3a7e62..855468a32904e 100644
+--- a/fs/smb/client/dir.c
++++ b/fs/smb/client/dir.c
+@@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ 	int disposition;
+ 	struct TCP_Server_Info *server = tcon->ses->server;
+ 	struct cifs_open_parms oparms;
++	int rdwr_for_fscache = 0;
+ 
+ 	*oplock = 0;
+ 	if (tcon->ses->server->oplocks)
+@@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ 		return PTR_ERR(full_path);
+ 	}
+ 
++	/* If we're caching, we need to be able to fill in around partial writes. */
++	if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY)
++		rdwr_for_fscache = 1;
++
+ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ 	if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
+ 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+@@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ 		desired_access |= GENERIC_READ; /* is this too little? */
+ 	if (OPEN_FMODE(oflags) & FMODE_WRITE)
+ 		desired_access |= GENERIC_WRITE;
++	if (rdwr_for_fscache == 1)
++		desired_access |= GENERIC_READ;
+ 
+ 	disposition = FILE_OVERWRITE_IF;
+ 	if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+@@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ 	if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
+ 		create_options |= CREATE_OPTION_READONLY;
+ 
++retry_open:
+ 	oparms = (struct cifs_open_parms) {
+ 		.tcon = tcon,
+ 		.cifs_sb = cifs_sb,
+@@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
+ 	rc = server->ops->open(xid, &oparms, oplock, buf);
+ 	if (rc) {
+ 		cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
++		if (rc == -EACCES && rdwr_for_fscache == 1) {
++			desired_access &= ~GENERIC_READ;
++			rdwr_for_fscache = 2;
++			goto retry_open;
++		}
+ 		goto out;
+ 	}
++	if (rdwr_for_fscache == 2)
++		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
+ 
+ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ 	/*
+diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
+index c711d5eb2987e..53a8c633221b9 100644
+--- a/fs/smb/client/file.c
++++ b/fs/smb/client/file.c
+@@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
+ 	 */
+ }
+ 
+-static inline int cifs_convert_flags(unsigned int flags)
++static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
+ {
+ 	if ((flags & O_ACCMODE) == O_RDONLY)
+ 		return GENERIC_READ;
+ 	else if ((flags & O_ACCMODE) == O_WRONLY)
+-		return GENERIC_WRITE;
++		return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
+ 	else if ((flags & O_ACCMODE) == O_RDWR) {
+ 		/* GENERIC_ALL is too much permission to request
+ 		   can cause unnecessary access denied on create */
+@@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
+ 	int create_options = CREATE_NOT_DIR;
+ 	struct TCP_Server_Info *server = tcon->ses->server;
+ 	struct cifs_open_parms oparms;
++	int rdwr_for_fscache = 0;
+ 
+ 	if (!server->ops->open)
+ 		return -ENOSYS;
+ 
+-	desired_access = cifs_convert_flags(f_flags);
++	/* If we're caching, we need to be able to fill in around partial writes. */
++	if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
++		rdwr_for_fscache = 1;
++
++	desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
+ 
+ /*********************************************************************
+  *  open flag mapping table:
+@@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
+ 	if (f_flags & O_DIRECT)
+ 		create_options |= CREATE_NO_BUFFER;
+ 
++retry_open:
+ 	oparms = (struct cifs_open_parms) {
+ 		.tcon = tcon,
+ 		.cifs_sb = cifs_sb,
+@@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
+ 	};
+ 
+ 	rc = server->ops->open(xid, &oparms, oplock, buf);
+-	if (rc)
++	if (rc) {
++		if (rc == -EACCES && rdwr_for_fscache == 1) {
++			desired_access = cifs_convert_flags(f_flags, 0);
++			rdwr_for_fscache = 2;
++			goto retry_open;
++		}
+ 		return rc;
++	}
++	if (rdwr_for_fscache == 2)
++		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
+ 
+ 	/* TODO: Add support for calling posix query info but with passing in fid */
+ 	if (tcon->unix_ext)
+@@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem)
+ }
+ 
+ static void cifsFileInfo_put_work(struct work_struct *work);
++void serverclose_work(struct work_struct *work);
+ 
+ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
+ 				       struct tcon_link *tlink, __u32 oplock,
+@@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
+ 	cfile->tlink = cifs_get_tlink(tlink);
+ 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
+ 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
++	INIT_WORK(&cfile->serverclose, serverclose_work);
+ 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
+ 	mutex_init(&cfile->fh_mutex);
+ 	spin_lock_init(&cfile->file_info_lock);
+@@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work)
+ 	cifsFileInfo_put_final(cifs_file);
+ }
+ 
++void serverclose_work(struct work_struct *work)
++{
++	struct cifsFileInfo *cifs_file = container_of(work,
++			struct cifsFileInfo, serverclose);
++
++	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
++
++	struct TCP_Server_Info *server = tcon->ses->server;
++	int rc = 0;
++	int retries = 0;
++	int MAX_RETRIES = 4;
++
++	do {
++		if (server->ops->close_getattr)
++			rc = server->ops->close_getattr(0, tcon, cifs_file);
++		else if (server->ops->close)
++			rc = server->ops->close(0, tcon, &cifs_file->fid);
++
++		if (rc == -EBUSY || rc == -EAGAIN) {
++			retries++;
++			msleep(250);
++		}
++	} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
++	);
++
++	if (retries == MAX_RETRIES)
++		pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
++
++	if (cifs_file->offload)
++		queue_work(fileinfo_put_wq, &cifs_file->put);
++	else
++		cifsFileInfo_put_final(cifs_file);
++}
++
+ /**
+  * cifsFileInfo_put - release a reference of file priv data
+  *
+@@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
+ 	struct cifs_fid fid = {};
+ 	struct cifs_pending_open open;
+ 	bool oplock_break_cancelled;
++	bool serverclose_offloaded = false;
+ 
+ 	spin_lock(&tcon->open_file_lock);
+ 	spin_lock(&cifsi->open_file_lock);
+ 	spin_lock(&cifs_file->file_info_lock);
++
++	cifs_file->offload = offload;
+ 	if (--cifs_file->count > 0) {
+ 		spin_unlock(&cifs_file->file_info_lock);
+ 		spin_unlock(&cifsi->open_file_lock);
+@@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
+ 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
+ 		struct TCP_Server_Info *server = tcon->ses->server;
+ 		unsigned int xid;
++		int rc = 0;
+ 
+ 		xid = get_xid();
+ 		if (server->ops->close_getattr)
+-			server->ops->close_getattr(xid, tcon, cifs_file);
++			rc = server->ops->close_getattr(xid, tcon, cifs_file);
+ 		else if (server->ops->close)
+-			server->ops->close(xid, tcon, &cifs_file->fid);
++			rc = server->ops->close(xid, tcon, &cifs_file->fid);
+ 		_free_xid(xid);
++
++		if (rc == -EBUSY || rc == -EAGAIN) {
++			// Server close failed, hence offloading it as an async op
++			queue_work(serverclose_wq, &cifs_file->serverclose);
++			serverclose_offloaded = true;
++		}
+ 	}
+ 
+ 	if (oplock_break_cancelled)
+@@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
+ 
+ 	cifs_del_pending_open(&open);
+ 
+-	if (offload)
+-		queue_work(fileinfo_put_wq, &cifs_file->put);
+-	else
+-		cifsFileInfo_put_final(cifs_file);
++	// if serverclose has been offloaded to wq (on failure), it will
++	// handle offloading put as well. If serverclose not offloaded,
++	// we need to handle offloading put here.
++	if (!serverclose_offloaded) {
++		if (offload)
++			queue_work(fileinfo_put_wq, &cifs_file->put);
++		else
++			cifsFileInfo_put_final(cifs_file);
++	}
+ }
+ 
+ int cifs_open(struct inode *inode, struct file *file)
+@@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file)
+ use_cache:
+ 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
+ 			   file->f_mode & FMODE_WRITE);
+-	if (file->f_flags & O_DIRECT &&
+-	    (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
+-	     file->f_flags & O_APPEND))
+-		cifs_invalidate_cache(file_inode(file),
+-				      FSCACHE_INVAL_DIO_WRITE);
++	if (!(file->f_flags & O_DIRECT))
++		goto out;
++	if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
++		goto out;
++	cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
+ 
+ out:
+ 	free_dentry_path(page);
+@@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ 	int disposition = FILE_OPEN;
+ 	int create_options = CREATE_NOT_DIR;
+ 	struct cifs_open_parms oparms;
++	int rdwr_for_fscache = 0;
+ 
+ 	xid = get_xid();
+ 	mutex_lock(&cfile->fh_mutex);
+@@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ 	}
+ #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
+ 
+-	desired_access = cifs_convert_flags(cfile->f_flags);
++	/* If we're caching, we need to be able to fill in around partial writes. */
++	if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
++		rdwr_for_fscache = 1;
++
++	desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
+ 
+ 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
+ 	if (cfile->f_flags & O_SYNC)
+@@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ 	if (server->ops->get_lease_key)
+ 		server->ops->get_lease_key(inode, &cfile->fid);
+ 
++retry_open:
+ 	oparms = (struct cifs_open_parms) {
+ 		.tcon = tcon,
+ 		.cifs_sb = cifs_sb,
+@@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ 		/* indicate that we need to relock the file */
+ 		oparms.reconnect = true;
+ 	}
++	if (rc == -EACCES && rdwr_for_fscache == 1) {
++		desired_access = cifs_convert_flags(cfile->f_flags, 0);
++		rdwr_for_fscache = 2;
++		goto retry_open;
++	}
+ 
+ 	if (rc) {
+ 		mutex_unlock(&cfile->fh_mutex);
+@@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
+ 		goto reopen_error_exit;
+ 	}
+ 
++	if (rdwr_for_fscache == 2)
++		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
++
+ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ reopen_success:
+ #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
+diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
+index e4a6b240d2263..58567ae617b9f 100644
+--- a/fs/smb/client/fs_context.c
++++ b/fs/smb/client/fs_context.c
+@@ -37,7 +37,7 @@
+ #include "rfc1002pdu.h"
+ #include "fs_context.h"
+ 
+-static DEFINE_MUTEX(cifs_mount_mutex);
++DEFINE_MUTEX(cifs_mount_mutex);
+ 
+ static const match_table_t cifs_smb_version_tokens = {
+ 	{ Smb_1, SMB1_VERSION_STRING },
+@@ -752,9 +752,9 @@ static int smb3_get_tree(struct fs_context *fc)
+ 
+ 	if (err)
+ 		return err;
+-	mutex_lock(&cifs_mount_mutex);
++	cifs_mount_lock();
+ 	ret = smb3_get_tree_common(fc);
+-	mutex_unlock(&cifs_mount_mutex);
++	cifs_mount_unlock();
+ 	return ret;
+ }
+ 
+diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
+index cf46916286d02..8cfc25b609b6b 100644
+--- a/fs/smb/client/fs_context.h
++++ b/fs/smb/client/fs_context.h
+@@ -293,4 +293,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
+ #define MAX_CACHED_FIDS 16
+ extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
+ 
++extern struct mutex cifs_mount_mutex;
++
++static inline void cifs_mount_lock(void)
++{
++	mutex_lock(&cifs_mount_mutex);
++}
++
++static inline void cifs_mount_unlock(void)
++{
++	mutex_unlock(&cifs_mount_mutex);
++}
++
+ #endif
+diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
+index e5cad149f5a2d..a4ee801b29394 100644
+--- a/fs/smb/client/fscache.c
++++ b/fs/smb/client/fscache.c
+@@ -12,6 +12,16 @@
+ #include "cifs_fs_sb.h"
+ #include "cifsproto.h"
+ 
++/*
++ * Key for fscache inode.  [!] Contents must match comparisons in cifs_find_inode().
++ */
++struct cifs_fscache_inode_key {
++
++	__le64  uniqueid;	/* server inode number */
++	__le64  createtime;	/* creation time on server */
++	u8	type;		/* S_IFMT file type */
++} __packed;
++
+ static void cifs_fscache_fill_volume_coherency(
+ 	struct cifs_tcon *tcon,
+ 	struct cifs_fscache_volume_coherency_data *cd)
+@@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
+ void cifs_fscache_get_inode_cookie(struct inode *inode)
+ {
+ 	struct cifs_fscache_inode_coherency_data cd;
++	struct cifs_fscache_inode_key key;
+ 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ 	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ 
++	key.uniqueid	= cpu_to_le64(cifsi->uniqueid);
++	key.createtime	= cpu_to_le64(cifsi->createtime);
++	key.type	= (inode->i_mode & S_IFMT) >> 12;
+ 	cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd);
+ 
+ 	cifsi->netfs.cache =
+ 		fscache_acquire_cookie(tcon->fscache, 0,
+-				       &cifsi->uniqueid, sizeof(cifsi->uniqueid),
++				       &key, sizeof(key),
+ 				       &cd, sizeof(cd),
+ 				       i_size_read(&cifsi->netfs.inode));
+ 	if (cifsi->netfs.cache)
+diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h
+index a3d73720914f8..1f2ea9f5cc9a8 100644
+--- a/fs/smb/client/fscache.h
++++ b/fs/smb/client/fscache.h
+@@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode,
+ 		__cifs_readahead_to_fscache(inode, pos, len);
+ }
+ 
++static inline bool cifs_fscache_enabled(struct inode *inode)
++{
++	return fscache_cookie_enabled(cifs_inode_cookie(inode));
++}
++
+ #else /* CONFIG_CIFS_FSCACHE */
+ static inline
+ void cifs_fscache_fill_coherency(struct inode *inode,
+@@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
+ static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
+ static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
+ static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
++static inline bool cifs_fscache_enabled(struct inode *inode) { return false; }
+ 
+ static inline int cifs_fscache_query_occupancy(struct inode *inode,
+ 					       pgoff_t first, unsigned int nr_pages,
+diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
+index cb9e719e67ae2..fa6330d586e89 100644
+--- a/fs/smb/client/inode.c
++++ b/fs/smb/client/inode.c
+@@ -1390,6 +1390,8 @@ cifs_find_inode(struct inode *inode, void *opaque)
+ {
+ 	struct cifs_fattr *fattr = opaque;
+ 
++	/* [!] The compared values must be the same in struct cifs_fscache_inode_key. */
++
+ 	/* don't match inode with different uniqueid */
+ 	if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
+ 		return 0;
+diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
+index 73ededa8eba5c..204dd7c47126e 100644
+--- a/fs/smb/client/ioctl.c
++++ b/fs/smb/client/ioctl.c
+@@ -246,7 +246,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
+ 		spin_lock(&cifs_tcp_ses_lock);
+ 		list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
+ 			list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
+-				if (ses_it->Suid == out.session_id) {
++				spin_lock(&ses_it->ses_lock);
++				if (ses_it->ses_status != SES_EXITING &&
++				    ses_it->Suid == out.session_id) {
+ 					ses = ses_it;
+ 					/*
+ 					 * since we are using the session outside the crit
+@@ -254,9 +256,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
+ 					 * so increment its refcount
+ 					 */
+ 					cifs_smb_ses_inc_refcount(ses);
++					spin_unlock(&ses_it->ses_lock);
+ 					found = true;
+ 					goto search_end;
+ 				}
++				spin_unlock(&ses_it->ses_lock);
+ 			}
+ 		}
+ search_end:
+diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
+index c2137ea3c2538..ef573e3f8e52a 100644
+--- a/fs/smb/client/misc.c
++++ b/fs/smb/client/misc.c
+@@ -489,6 +489,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
+ 	/* look up tcon based on tid & uid */
+ 	spin_lock(&cifs_tcp_ses_lock);
+ 	list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++		if (cifs_ses_exiting(ses))
++			continue;
+ 		list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ 			if (tcon->tid != buf->Tid)
+ 				continue;
+diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
+index 64e25233e85de..1aebcf95c1951 100644
+--- a/fs/smb/client/smb1ops.c
++++ b/fs/smb/client/smb1ops.c
+@@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
+ 	cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
+ }
+ 
+-static void
++static int
+ cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
+ 		struct cifs_fid *fid)
+ {
+-	CIFSSMBClose(xid, tcon, fid->netfid);
++	return CIFSSMBClose(xid, tcon, fid->netfid);
+ }
+ 
+ static int
+diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
+index 82b84a4941dd2..cc72be5a93a93 100644
+--- a/fs/smb/client/smb2misc.c
++++ b/fs/smb/client/smb2misc.c
+@@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
+ 	/* look up tcon based on tid & uid */
+ 	spin_lock(&cifs_tcp_ses_lock);
+ 	list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++		if (cifs_ses_exiting(ses))
++			continue;
+ 		list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ 			spin_lock(&tcon->open_file_lock);
+ 			cifs_stats_inc(
+@@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
+ 	/* look up tcon based on tid & uid */
+ 	spin_lock(&cifs_tcp_ses_lock);
+ 	list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++		if (cifs_ses_exiting(ses))
++			continue;
+ 		list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ 
+ 			spin_lock(&tcon->open_file_lock);
+diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
+index 978a9f409857a..04fea874d0a33 100644
+--- a/fs/smb/client/smb2ops.c
++++ b/fs/smb/client/smb2ops.c
+@@ -1392,14 +1392,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
+ 	memcpy(cfile->fid.create_guid, fid->create_guid, 16);
+ }
+ 
+-static void
++static int
+ smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
+ 		struct cifs_fid *fid)
+ {
+-	SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
++	return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+ }
+ 
+-static void
++static int
+ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
+ 		   struct cifsFileInfo *cfile)
+ {
+@@ -1410,7 +1410,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
+ 	rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
+ 		   cfile->fid.volatile_fid, &file_inf);
+ 	if (rc)
+-		return;
++		return rc;
+ 
+ 	inode = d_inode(cfile->dentry);
+ 
+@@ -1439,6 +1439,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
+ 
+ 	/* End of file and Attributes should not have to be updated on close */
+ 	spin_unlock(&inode->i_lock);
++	return rc;
+ }
+ 
+ static int
+@@ -2429,6 +2430,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
+ 
+ 	spin_lock(&cifs_tcp_ses_lock);
+ 	list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
++		if (cifs_ses_exiting(ses))
++			continue;
+ 		list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ 			if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
+ 				spin_lock(&tcon->tc_lock);
+diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
+index 4d7d0bdf7a472..94bd4c6d2d682 100644
+--- a/fs/smb/client/smb2pdu.c
++++ b/fs/smb/client/smb2pdu.c
+@@ -3549,9 +3549,9 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
+ 			memcpy(&pbuf->network_open_info,
+ 			       &rsp->network_open_info,
+ 			       sizeof(pbuf->network_open_info));
++		atomic_dec(&tcon->num_remote_opens);
+ 	}
+ 
+-	atomic_dec(&tcon->num_remote_opens);
+ close_exit:
+ 	SMB2_close_free(&rqst);
+ 	free_rsp_buf(resp_buftype, rsp);
+diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
+index 0ebf91ffa2361..4464a62228cf3 100644
+--- a/fs/smb/server/ksmbd_netlink.h
++++ b/fs/smb/server/ksmbd_netlink.h
+@@ -166,7 +166,8 @@ struct ksmbd_share_config_response {
+ 	__u16	force_uid;
+ 	__u16	force_gid;
+ 	__s8	share_name[KSMBD_REQ_MAX_SHARE_NAME];
+-	__u32	reserved[112];		/* Reserved room */
++	__u32	reserved[111];		/* Reserved room */
++	__u32	payload_sz;
+ 	__u32	veto_list_sz;
+ 	__s8	____payload[];
+ };
+diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c
+index 328a412259dc1..a2f0a2edceb8a 100644
+--- a/fs/smb/server/mgmt/share_config.c
++++ b/fs/smb/server/mgmt/share_config.c
+@@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
+ 	share->name = kstrdup(name, GFP_KERNEL);
+ 
+ 	if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
+-		share->path = kstrdup(ksmbd_share_config_path(resp),
++		int path_len = PATH_MAX;
++
++		if (resp->payload_sz)
++			path_len = resp->payload_sz - resp->veto_list_sz;
++
++		share->path = kstrndup(ksmbd_share_config_path(resp), path_len,
+ 				      GFP_KERNEL);
+ 		if (share->path)
+ 			share->path_sz = strlen(share->path);
+diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c
+index 27a9dce3e03ab..8600f32c981a1 100644
+--- a/fs/smb/server/smb2ops.c
++++ b/fs/smb/server/smb2ops.c
+@@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
+ 	    conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
+ 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+ 
++	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
++	    (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
++	     conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
++		conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
++
+ 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+ }
+@@ -275,11 +280,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
+ 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
+ 			SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
+ 
+-	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+-	    (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+-	     conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
+-		conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+-
+ 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+ 
+diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
+index 199c31c275e5b..924f08326eef4 100644
+--- a/fs/smb/server/smb2pdu.c
++++ b/fs/smb/server/smb2pdu.c
+@@ -5631,8 +5631,9 @@ static int smb2_rename(struct ksmbd_work *work,
+ 	if (!file_info->ReplaceIfExists)
+ 		flags = RENAME_NOREPLACE;
+ 
+-	smb_break_all_levII_oplock(work, fp, 0);
+ 	rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
++	if (!rc)
++		smb_break_all_levII_oplock(work, fp, 0);
+ out:
+ 	kfree(new_name);
+ 	return rc;
+diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
+index f29bb03f0dc47..8752ac82c557b 100644
+--- a/fs/smb/server/transport_ipc.c
++++ b/fs/smb/server/transport_ipc.c
+@@ -65,6 +65,7 @@ struct ipc_msg_table_entry {
+ 	struct hlist_node	ipc_table_hlist;
+ 
+ 	void			*response;
++	unsigned int		msg_sz;
+ };
+ 
+ static struct delayed_work ipc_timer_work;
+@@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz)
+ 		}
+ 
+ 		memcpy(entry->response, payload, sz);
++		entry->msg_sz = sz;
+ 		wake_up_interruptible(&entry->wait);
+ 		ret = 0;
+ 		break;
+@@ -453,6 +455,34 @@ static int ipc_msg_send(struct ksmbd_ipc_msg *msg)
+ 	return ret;
+ }
+ 
++static int ipc_validate_msg(struct ipc_msg_table_entry *entry)
++{
++	unsigned int msg_sz = entry->msg_sz;
++
++	if (entry->type == KSMBD_EVENT_RPC_REQUEST) {
++		struct ksmbd_rpc_command *resp = entry->response;
++
++		msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz;
++	} else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) {
++		struct ksmbd_spnego_authen_response *resp = entry->response;
++
++		msg_sz = sizeof(struct ksmbd_spnego_authen_response) +
++				resp->session_key_len + resp->spnego_blob_len;
++	} else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) {
++		struct ksmbd_share_config_response *resp = entry->response;
++
++		if (resp->payload_sz) {
++			if (resp->payload_sz < resp->veto_list_sz)
++				return -EINVAL;
++
++			msg_sz = sizeof(struct ksmbd_share_config_response) +
++					resp->payload_sz;
++		}
++	}
++
++	return entry->msg_sz != msg_sz ? -EINVAL : 0;
++}
++
+ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
+ {
+ 	struct ipc_msg_table_entry entry;
+@@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle
+ 	ret = wait_event_interruptible_timeout(entry.wait,
+ 					       entry.response != NULL,
+ 					       IPC_WAIT_TIMEOUT);
++	if (entry.response) {
++		ret = ipc_validate_msg(&entry);
++		if (ret) {
++			kvfree(entry.response);
++			entry.response = NULL;
++		}
++	}
+ out:
+ 	down_write(&ipc_msg_table_lock);
+ 	hash_del(&entry.ipc_table_hlist);
+diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
+index 1fb8f4df60cbb..9848af78215bf 100644
+--- a/fs/vboxsf/super.c
++++ b/fs/vboxsf/super.c
+@@ -151,7 +151,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
+ 		if (!sbi->nls) {
+ 			vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
+ 			err = -EINVAL;
+-			goto fail_free;
++			goto fail_destroy_idr;
+ 		}
+ 	}
+ 
+@@ -224,6 +224,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
+ 		ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ 	if (sbi->nls)
+ 		unload_nls(sbi->nls);
++fail_destroy_idr:
+ 	idr_destroy(&sbi->ino_idr);
+ 	kfree(sbi);
+ 	return err;
+diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
+index 31029f4f7be85..c4aabbf002f7c 100644
+--- a/include/kvm/arm_pmu.h
++++ b/include/kvm/arm_pmu.h
+@@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void);
+  */
+ #define kvm_pmu_update_vcpu_events(vcpu)				\
+ 	do {								\
+-		if (!has_vhe() && kvm_vcpu_has_pmu(vcpu))		\
++		if (!has_vhe() && kvm_arm_support_pmu_v3())		\
+ 			vcpu->arch.pmu.events = *kvm_get_pmu_events();	\
+ 	} while (0)
+ 
+diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
+index d0807ad43f933..6e950594215a0 100644
+--- a/include/linux/avf/virtchnl.h
++++ b/include/linux/avf/virtchnl.h
+@@ -4,6 +4,11 @@
+ #ifndef _VIRTCHNL_H_
+ #define _VIRTCHNL_H_
+ 
++#include <linux/bitops.h>
++#include <linux/bits.h>
++#include <linux/overflow.h>
++#include <uapi/linux/if_ether.h>
++
+ /* Description:
+  * This header file describes the Virtual Function (VF) - Physical Function
+  * (PF) communication protocol used by the drivers for all devices starting
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h
+index 9b08d792fa95a..2ebb5d4d43dc6 100644
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -1524,12 +1524,26 @@ struct bpf_link {
+ 	enum bpf_link_type type;
+ 	const struct bpf_link_ops *ops;
+ 	struct bpf_prog *prog;
+-	struct work_struct work;
++	/* rcu is used before freeing, work can be used to schedule that
++	 * RCU-based freeing before that, so they never overlap
++	 */
++	union {
++		struct rcu_head rcu;
++		struct work_struct work;
++	};
+ };
+ 
+ struct bpf_link_ops {
+ 	void (*release)(struct bpf_link *link);
++	/* deallocate link resources callback, called without RCU grace period
++	 * waiting
++	 */
+ 	void (*dealloc)(struct bpf_link *link);
++	/* deallocate link resources callback, called after RCU grace period;
++	 * if underlying BPF program is sleepable we go through tasks trace
++	 * RCU GP and then "classic" RCU GP
++	 */
++	void (*dealloc_deferred)(struct bpf_link *link);
+ 	int (*detach)(struct bpf_link *link);
+ 	int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
+ 			   struct bpf_prog *old_prog);
+diff --git a/include/linux/device.h b/include/linux/device.h
+index 99496a0a5ddb3..a070160fbcb8e 100644
+--- a/include/linux/device.h
++++ b/include/linux/device.h
+@@ -1250,6 +1250,7 @@ void device_link_del(struct device_link *link);
+ void device_link_remove(void *consumer, struct device *supplier);
+ void device_links_supplier_sync_state_pause(void);
+ void device_links_supplier_sync_state_resume(void);
++void device_link_wait_removal(void);
+ 
+ /* Create alias, so I can be autoloaded. */
+ #define MODULE_ALIAS_CHARDEV(major,minor) \
+diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
+index 731beb3198c4f..8215e193178aa 100644
+--- a/include/linux/io_uring_types.h
++++ b/include/linux/io_uring_types.h
+@@ -250,7 +250,6 @@ struct io_ring_ctx {
+ 
+ 		struct io_submit_state	submit_state;
+ 
+-		struct io_buffer_list	*io_bl;
+ 		struct xarray		io_bl_xa;
+ 
+ 		struct io_hash_table	cancel_table_locked;
+diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
+index 35f3a4a8ceb1e..acf7e1a3f3def 100644
+--- a/include/linux/secretmem.h
++++ b/include/linux/secretmem.h
+@@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio)
+ 	/*
+ 	 * Using folio_mapping() is quite slow because of the actual call
+ 	 * instruction.
+-	 * We know that secretmem pages are not compound and LRU so we can
++	 * We know that secretmem pages are not compound, so we can
+ 	 * save a couple of cycles here.
+ 	 */
+-	if (folio_test_large(folio) || !folio_test_lru(folio))
++	if (folio_test_large(folio))
+ 		return false;
+ 
+ 	mapping = (struct address_space *)
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 2922059908cc5..9e61f6df6bc55 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -736,8 +736,6 @@ typedef unsigned char *sk_buff_data_t;
+  *	@list: queue head
+  *	@ll_node: anchor in an llist (eg socket defer_list)
+  *	@sk: Socket we are owned by
+- *	@ip_defrag_offset: (aka @sk) alternate use of @sk, used in
+- *		fragmentation management
+  *	@dev: Device we arrived on/are leaving by
+  *	@dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
+  *	@cb: Control buffer. Free for use by every layer. Put private vars here
+@@ -860,10 +858,7 @@ struct sk_buff {
+ 		struct llist_node	ll_node;
+ 	};
+ 
+-	union {
+-		struct sock		*sk;
+-		int			ip_defrag_offset;
+-	};
++	struct sock		*sk;
+ 
+ 	union {
+ 		ktime_t		tstamp;
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index d04188714dca1..94e63b2695406 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -140,6 +140,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
+ 	}
+ }
+ 
++DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
++#if IS_ENABLED(CONFIG_IPV6)
++DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
++#endif
++
++static inline bool udp_encap_needed(void)
++{
++	if (static_branch_unlikely(&udp_encap_needed_key))
++		return true;
++
++#if IS_ENABLED(CONFIG_IPV6)
++	if (static_branch_unlikely(&udpv6_encap_needed_key))
++		return true;
++#endif
++
++	return false;
++}
++
+ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
+ {
+ 	if (!skb_is_gso(skb))
+@@ -153,6 +171,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
+ 	    !udp_test_bit(ACCEPT_FRAGLIST, sk))
+ 		return true;
+ 
++	/* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
++	 * land in a tunnel as the socket check in udp_gro_receive cannot be
++	 * foolproof.
++	 */
++	if (udp_encap_needed() &&
++	    READ_ONCE(udp_sk(sk)->encap_rcv) &&
++	    !(skb_shinfo(skb)->gso_type &
++	      (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
++		return true;
++
+ 	return false;
+ }
+ 
+diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
+index 0d231024570a3..03e68a8e229f5 100644
+--- a/include/net/bluetooth/hci.h
++++ b/include/net/bluetooth/hci.h
+@@ -176,6 +176,15 @@ enum {
+ 	 */
+ 	HCI_QUIRK_USE_BDADDR_PROPERTY,
+ 
++	/* When this quirk is set, the Bluetooth Device Address provided by
++	 * the 'local-bd-address' fwnode property is incorrectly specified in
++	 * big-endian order.
++	 *
++	 * This quirk can be set before hci_register_dev is called or
++	 * during the hdev->setup vendor callback.
++	 */
++	HCI_QUIRK_BDADDR_PROPERTY_BROKEN,
++
+ 	/* When this quirk is set, the duplicate filtering during
+ 	 * scanning is based on Bluetooth devices addresses. To allow
+ 	 * RSSI based updates, restart scanning if needed.
+diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
+index 01a73bf74fa19..6ecac01115d9c 100644
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -173,6 +173,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
+ 			       void (*delack_handler)(struct timer_list *),
+ 			       void (*keepalive_handler)(struct timer_list *));
+ void inet_csk_clear_xmit_timers(struct sock *sk);
++void inet_csk_clear_xmit_timers_sync(struct sock *sk);
+ 
+ static inline void inet_csk_schedule_ack(struct sock *sk)
+ {
+diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
+index 4d43adf186064..cd526fd31b458 100644
+--- a/include/net/mana/mana.h
++++ b/include/net/mana/mana.h
+@@ -39,7 +39,6 @@ enum TRI_STATE {
+ #define COMP_ENTRY_SIZE 64
+ 
+ #define RX_BUFFERS_PER_QUEUE 512
+-#define MANA_RX_DATA_ALIGN 64
+ 
+ #define MAX_SEND_BUFFERS_PER_QUEUE 256
+ 
+diff --git a/include/net/sock.h b/include/net/sock.h
+index e70c903b04f30..25780942ec8bf 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1808,6 +1808,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
+ #endif
+ }
+ 
++static inline void sock_not_owned_by_me(const struct sock *sk)
++{
++#ifdef CONFIG_LOCKDEP
++	WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
++#endif
++}
++
+ static inline bool sock_owned_by_user(const struct sock *sk)
+ {
+ 	sock_owned_by_me(sk);
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index aed10bae50acb..2c0a9a98272ca 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -148,6 +148,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
+ static void io_queue_sqe(struct io_kiocb *req);
+ 
+ struct kmem_cache *req_cachep;
++static struct workqueue_struct *iou_wq __ro_after_init;
+ 
+ static int __read_mostly sysctl_io_uring_disabled;
+ static int __read_mostly sysctl_io_uring_group = -1;
+@@ -343,7 +344,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
+ err:
+ 	kfree(ctx->cancel_table.hbs);
+ 	kfree(ctx->cancel_table_locked.hbs);
+-	kfree(ctx->io_bl);
+ 	xa_destroy(&ctx->io_bl_xa);
+ 	kfree(ctx);
+ 	return NULL;
+@@ -2934,7 +2934,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
+ 		io_wq_put_hash(ctx->hash_map);
+ 	kfree(ctx->cancel_table.hbs);
+ 	kfree(ctx->cancel_table_locked.hbs);
+-	kfree(ctx->io_bl);
+ 	xa_destroy(&ctx->io_bl_xa);
+ 	kfree(ctx);
+ }
+@@ -3182,7 +3181,7 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+ 	 * noise and overhead, there's no discernable change in runtime
+ 	 * over using system_wq.
+ 	 */
+-	queue_work(system_unbound_wq, &ctx->exit_work);
++	queue_work(iou_wq, &ctx->exit_work);
+ }
+ 
+ static int io_uring_release(struct inode *inode, struct file *file)
+@@ -3430,14 +3429,15 @@ static void *io_uring_validate_mmap_request(struct file *file,
+ 		ptr = ctx->sq_sqes;
+ 		break;
+ 	case IORING_OFF_PBUF_RING: {
++		struct io_buffer_list *bl;
+ 		unsigned int bgid;
+ 
+ 		bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
+-		rcu_read_lock();
+-		ptr = io_pbuf_get_address(ctx, bgid);
+-		rcu_read_unlock();
+-		if (!ptr)
+-			return ERR_PTR(-EINVAL);
++		bl = io_pbuf_get_bl(ctx, bgid);
++		if (IS_ERR(bl))
++			return bl;
++		ptr = bl->buf_ring;
++		io_put_bl(ctx, bl);
+ 		break;
+ 		}
+ 	default:
+@@ -4666,6 +4666,8 @@ static int __init io_uring_init(void)
+ 				offsetof(struct io_kiocb, cmd.data),
+ 				sizeof_field(struct io_kiocb, cmd.data), NULL);
+ 
++	iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
++
+ #ifdef CONFIG_SYSCTL
+ 	register_sysctl_init("kernel", kernel_io_uring_disabled_table);
+ #endif
+diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
+index e8516f3bbbaaa..26a00920042c4 100644
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -17,8 +17,6 @@
+ 
+ #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
+ 
+-#define BGID_ARRAY	64
+-
+ /* BIDs are addressed by a 16-bit field in a CQE */
+ #define MAX_BIDS_PER_BGID (1 << 16)
+ 
+@@ -31,13 +29,9 @@ struct io_provide_buf {
+ 	__u16				bid;
+ };
+ 
+-static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
+-						   struct io_buffer_list *bl,
+-						   unsigned int bgid)
++static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
++							  unsigned int bgid)
+ {
+-	if (bl && bgid < BGID_ARRAY)
+-		return &bl[bgid];
+-
+ 	return xa_load(&ctx->io_bl_xa, bgid);
+ }
+ 
+@@ -53,7 +47,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
+ {
+ 	lockdep_assert_held(&ctx->uring_lock);
+ 
+-	return __io_buffer_get_list(ctx, ctx->io_bl, bgid);
++	return __io_buffer_get_list(ctx, bgid);
+ }
+ 
+ static int io_buffer_add_list(struct io_ring_ctx *ctx,
+@@ -65,11 +59,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
+ 	 * always under the ->uring_lock, but the RCU lookup from mmap does.
+ 	 */
+ 	bl->bgid = bgid;
+-	smp_store_release(&bl->is_ready, 1);
+-
+-	if (bgid < BGID_ARRAY)
+-		return 0;
+-
++	atomic_set(&bl->refs, 1);
+ 	return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
+ }
+ 
+@@ -215,24 +205,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+ 	return ret;
+ }
+ 
+-static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
+-{
+-	struct io_buffer_list *bl;
+-	int i;
+-
+-	bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL);
+-	if (!bl)
+-		return -ENOMEM;
+-
+-	for (i = 0; i < BGID_ARRAY; i++) {
+-		INIT_LIST_HEAD(&bl[i].buf_list);
+-		bl[i].bgid = i;
+-	}
+-
+-	smp_store_release(&ctx->io_bl, bl);
+-	return 0;
+-}
+-
+ /*
+  * Mark the given mapped range as free for reuse
+  */
+@@ -301,22 +273,22 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
+ 	return i;
+ }
+ 
++void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
++{
++	if (atomic_dec_and_test(&bl->refs)) {
++		__io_remove_buffers(ctx, bl, -1U);
++		kfree_rcu(bl, rcu);
++	}
++}
++
+ void io_destroy_buffers(struct io_ring_ctx *ctx)
+ {
+ 	struct io_buffer_list *bl;
+ 	unsigned long index;
+-	int i;
+-
+-	for (i = 0; i < BGID_ARRAY; i++) {
+-		if (!ctx->io_bl)
+-			break;
+-		__io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
+-	}
+ 
+ 	xa_for_each(&ctx->io_bl_xa, index, bl) {
+ 		xa_erase(&ctx->io_bl_xa, bl->bgid);
+-		__io_remove_buffers(ctx, bl, -1U);
+-		kfree_rcu(bl, rcu);
++		io_put_bl(ctx, bl);
+ 	}
+ 
+ 	while (!list_empty(&ctx->io_buffers_pages)) {
+@@ -485,12 +457,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
+ 
+ 	io_ring_submit_lock(ctx, issue_flags);
+ 
+-	if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
+-		ret = io_init_bl_list(ctx);
+-		if (ret)
+-			goto err;
+-	}
+-
+ 	bl = io_buffer_get_list(ctx, p->bgid);
+ 	if (unlikely(!bl)) {
+ 		bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
+@@ -503,14 +469,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
+ 		if (ret) {
+ 			/*
+ 			 * Doesn't need rcu free as it was never visible, but
+-			 * let's keep it consistent throughout. Also can't
+-			 * be a lower indexed array group, as adding one
+-			 * where lookup failed cannot happen.
++			 * let's keep it consistent throughout.
+ 			 */
+-			if (p->bgid >= BGID_ARRAY)
+-				kfree_rcu(bl, rcu);
+-			else
+-				WARN_ON_ONCE(1);
++			kfree_rcu(bl, rcu);
+ 			goto err;
+ 		}
+ 	}
+@@ -675,12 +636,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+ 	if (reg.ring_entries >= 65536)
+ 		return -EINVAL;
+ 
+-	if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
+-		int ret = io_init_bl_list(ctx);
+-		if (ret)
+-			return ret;
+-	}
+-
+ 	bl = io_buffer_get_list(ctx, reg.bgid);
+ 	if (bl) {
+ 		/* if mapped buffer ring OR classic exists, don't allow */
+@@ -729,31 +684,40 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+ 	if (!bl->is_mapped)
+ 		return -EINVAL;
+ 
+-	__io_remove_buffers(ctx, bl, -1U);
+-	if (bl->bgid >= BGID_ARRAY) {
+-		xa_erase(&ctx->io_bl_xa, bl->bgid);
+-		kfree_rcu(bl, rcu);
+-	}
++	xa_erase(&ctx->io_bl_xa, bl->bgid);
++	io_put_bl(ctx, bl);
+ 	return 0;
+ }
+ 
+-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
++struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
++				      unsigned long bgid)
+ {
+ 	struct io_buffer_list *bl;
++	bool ret;
+ 
+-	bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid);
+-
+-	if (!bl || !bl->is_mmap)
+-		return NULL;
+ 	/*
+-	 * Ensure the list is fully setup. Only strictly needed for RCU lookup
+-	 * via mmap, and in that case only for the array indexed groups. For
+-	 * the xarray lookups, it's either visible and ready, or not at all.
++	 * We have to be a bit careful here - we're inside mmap and cannot grab
++	 * the uring_lock. This means the buffer_list could be simultaneously
++	 * going away, if someone is trying to be sneaky. Look it up under rcu
++	 * so we know it's not going away, and attempt to grab a reference to
++	 * it. If the ref is already zero, then fail the mapping. If successful,
++	 * the caller will call io_put_bl() to drop the the reference at at the
++	 * end. This may then safely free the buffer_list (and drop the pages)
++	 * at that point, vm_insert_pages() would've already grabbed the
++	 * necessary vma references.
+ 	 */
+-	if (!smp_load_acquire(&bl->is_ready))
+-		return NULL;
+-
+-	return bl->buf_ring;
++	rcu_read_lock();
++	bl = xa_load(&ctx->io_bl_xa, bgid);
++	/* must be a mmap'able buffer ring and have pages */
++	ret = false;
++	if (bl && bl->is_mmap)
++		ret = atomic_inc_not_zero(&bl->refs);
++	rcu_read_unlock();
++
++	if (ret)
++		return bl;
++
++	return ERR_PTR(-EINVAL);
+ }
+ 
+ /*
+diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
+index 3d0cb6b8c1ed2..8d7929369501d 100644
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -25,12 +25,12 @@ struct io_buffer_list {
+ 	__u16 head;
+ 	__u16 mask;
+ 
++	atomic_t refs;
++
+ 	/* ring mapped provided buffers */
+ 	__u8 is_mapped;
+ 	/* ring mapped provided buffers, but mmap'ed by application */
+ 	__u8 is_mmap;
+-	/* bl is visible from an RCU point of view for lookup */
+-	__u8 is_ready;
+ };
+ 
+ struct io_buffer {
+@@ -60,7 +60,9 @@ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
+ 
+ void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+ 
+-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid);
++void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
++struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
++				      unsigned long bgid);
+ 
+ static inline void io_kbuf_recycle_ring(struct io_kiocb *req)
+ {
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index 4b7d186c7622d..4902a7487f076 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -2866,17 +2866,46 @@ void bpf_link_inc(struct bpf_link *link)
+ 	atomic64_inc(&link->refcnt);
+ }
+ 
++static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
++{
++	struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
++
++	/* free bpf_link and its containing memory */
++	link->ops->dealloc_deferred(link);
++}
++
++static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
++{
++	if (rcu_trace_implies_rcu_gp())
++		bpf_link_defer_dealloc_rcu_gp(rcu);
++	else
++		call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
++}
++
+ /* bpf_link_free is guaranteed to be called from process context */
+ static void bpf_link_free(struct bpf_link *link)
+ {
++	bool sleepable = false;
++
+ 	bpf_link_free_id(link->id);
+ 	if (link->prog) {
++		sleepable = link->prog->aux->sleepable;
+ 		/* detach BPF program, clean up used resources */
+ 		link->ops->release(link);
+ 		bpf_prog_put(link->prog);
+ 	}
+-	/* free bpf_link and its containing memory */
+-	link->ops->dealloc(link);
++	if (link->ops->dealloc_deferred) {
++		/* schedule BPF link deallocation; if underlying BPF program
++		 * is sleepable, we need to first wait for RCU tasks trace
++		 * sync, then go through "classic" RCU grace period
++		 */
++		if (sleepable)
++			call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
++		else
++			call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
++	}
++	if (link->ops->dealloc)
++		link->ops->dealloc(link);
+ }
+ 
+ static void bpf_link_put_deferred(struct work_struct *work)
+@@ -3381,7 +3410,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
+ 
+ static const struct bpf_link_ops bpf_raw_tp_link_lops = {
+ 	.release = bpf_raw_tp_link_release,
+-	.dealloc = bpf_raw_tp_link_dealloc,
++	.dealloc_deferred = bpf_raw_tp_link_dealloc,
+ 	.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
+ 	.fill_link_info = bpf_raw_tp_link_fill_link_info,
+ };
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 396c4c66932f2..c9fc734989c68 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -6637,6 +6637,11 @@ static int check_stack_access_within_bounds(
+ 	err = check_stack_slot_within_bounds(env, min_off, state, type);
+ 	if (!err && max_off > 0)
+ 		err = -EINVAL; /* out of stack access into non-negative offsets */
++	if (!err && access_size < 0)
++		/* access_size should not be negative (or overflow an int); others checks
++		 * along the way should have prevented such an access.
++		 */
++		err = -EFAULT; /* invalid negative access size; integer overflow? */
+ 
+ 	if (err) {
+ 		if (tnum_is_const(reg->var_off)) {
+diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
+index 1d76f3b014aee..1e79084a9d9d2 100644
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -2639,7 +2639,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
+ 
+ static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
+ 	.release = bpf_kprobe_multi_link_release,
+-	.dealloc = bpf_kprobe_multi_link_dealloc,
++	.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
+ 	.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
+ };
+ 
+@@ -3065,6 +3065,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)
+ 
+ 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+ 	bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
++	if (umulti_link->task)
++		put_task_struct(umulti_link->task);
++	path_put(&umulti_link->path);
+ }
+ 
+ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
+@@ -3072,16 +3075,13 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
+ 	struct bpf_uprobe_multi_link *umulti_link;
+ 
+ 	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+-	if (umulti_link->task)
+-		put_task_struct(umulti_link->task);
+-	path_put(&umulti_link->path);
+ 	kvfree(umulti_link->uprobes);
+ 	kfree(umulti_link);
+ }
+ 
+ static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
+ 	.release = bpf_uprobe_multi_link_release,
+-	.dealloc = bpf_uprobe_multi_link_dealloc,
++	.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
+ };
+ 
+ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
+diff --git a/mm/memory.c b/mm/memory.c
+index 78e05d3e9e4ac..e44d4d887cf6d 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -5674,6 +5674,10 @@ int follow_phys(struct vm_area_struct *vma,
+ 		goto out;
+ 	pte = ptep_get(ptep);
+ 
++	/* Never return PFNs of anon folios in COW mappings. */
++	if (vm_normal_folio(vma, address, pte))
++		goto unlock;
++
+ 	if ((flags & FOLL_WRITE) && !pte_write(pte))
+ 		goto unlock;
+ 
+diff --git a/net/9p/client.c b/net/9p/client.c
+index e265a0ca6bddd..f7e90b4769bba 100644
+--- a/net/9p/client.c
++++ b/net/9p/client.c
+@@ -1583,7 +1583,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
+ 		received = rsize;
+ 	}
+ 
+-	p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
++	p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received);
+ 
+ 	if (non_zc) {
+ 		int n = copy_to_iter(dataptr, received, to);
+@@ -1609,9 +1609,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+ 	int total = 0;
+ 	*err = 0;
+ 
+-	p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
+-		 fid->fid, offset, iov_iter_count(from));
+-
+ 	while (iov_iter_count(from)) {
+ 		int count = iov_iter_count(from);
+ 		int rsize = fid->iounit;
+@@ -1623,6 +1620,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+ 		if (count < rsize)
+ 			rsize = count;
+ 
++		p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n",
++			 fid->fid, offset, rsize, count);
++
+ 		/* Don't bother zerocopy for small IO (< 1024) */
+ 		if (clnt->trans_mod->zc_request && rsize > 1024) {
+ 			req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0,
+@@ -1650,7 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
+ 			written = rsize;
+ 		}
+ 
+-		p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
++		p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written);
+ 
+ 		p9_req_put(clnt, req);
+ 		iov_iter_revert(from, count - written - iov_iter_count(from));
+diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
+index c5462486dbca1..282ec581c0720 100644
+--- a/net/ax25/ax25_dev.c
++++ b/net/ax25/ax25_dev.c
+@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
+ 	spin_lock_bh(&ax25_dev_lock);
+ 
+ #ifdef CONFIG_AX25_DAMA_SLAVE
+-	ax25_ds_del_timer(ax25_dev);
++	timer_shutdown_sync(&ax25_dev->dama.slave_timer);
+ #endif
+ 
+ 	/*
+diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
+index 233453807b509..ce3ff2fa72e58 100644
+--- a/net/bluetooth/hci_debugfs.c
++++ b/net/bluetooth/hci_debugfs.c
+@@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val)
+ {
+ 	struct hci_dev *hdev = data;
+ 
+-	if (val == 0 || val > hdev->conn_info_max_age)
++	hci_dev_lock(hdev);
++	if (val == 0 || val > hdev->conn_info_max_age) {
++		hci_dev_unlock(hdev);
+ 		return -EINVAL;
++	}
+ 
+-	hci_dev_lock(hdev);
+ 	hdev->conn_info_min_age = val;
+ 	hci_dev_unlock(hdev);
+ 
+@@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val)
+ {
+ 	struct hci_dev *hdev = data;
+ 
+-	if (val == 0 || val < hdev->conn_info_min_age)
++	hci_dev_lock(hdev);
++	if (val == 0 || val < hdev->conn_info_min_age) {
++		hci_dev_unlock(hdev);
+ 		return -EINVAL;
++	}
+ 
+-	hci_dev_lock(hdev);
+ 	hdev->conn_info_max_age = val;
+ 	hci_dev_unlock(hdev);
+ 
+@@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val)
+ {
+ 	struct hci_dev *hdev = data;
+ 
+-	if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
++	hci_dev_lock(hdev);
++	if (val == 0 || val % 2 || val > hdev->sniff_max_interval) {
++		hci_dev_unlock(hdev);
+ 		return -EINVAL;
++	}
+ 
+-	hci_dev_lock(hdev);
+ 	hdev->sniff_min_interval = val;
+ 	hci_dev_unlock(hdev);
+ 
+@@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val)
+ {
+ 	struct hci_dev *hdev = data;
+ 
+-	if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
++	hci_dev_lock(hdev);
++	if (val == 0 || val % 2 || val < hdev->sniff_min_interval) {
++		hci_dev_unlock(hdev);
+ 		return -EINVAL;
++	}
+ 
+-	hci_dev_lock(hdev);
+ 	hdev->sniff_max_interval = val;
+ 	hci_dev_unlock(hdev);
+ 
+@@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val)
+ {
+ 	struct hci_dev *hdev = data;
+ 
+-	if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
++	hci_dev_lock(hdev);
++	if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) {
++		hci_dev_unlock(hdev);
+ 		return -EINVAL;
++	}
+ 
+-	hci_dev_lock(hdev);
+ 	hdev->le_conn_min_interval = val;
+ 	hci_dev_unlock(hdev);
+ 
+@@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val)
+ {
+ 	struct hci_dev *hdev = data;
+ 
+-	if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
++	hci_dev_lock(hdev);
++	if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) {
++		hci_dev_unlock(hdev);
+ 		return -EINVAL;
++	}
+ 
+-	hci_dev_lock(hdev);
+ 	hdev->le_conn_max_interval = val;
+ 	hci_dev_unlock(hdev);
+ 
+@@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val)
+ {
+ 	struct hci_dev *hdev = data;
+ 
+-	if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
++	hci_dev_lock(hdev);
++	if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) {
++		hci_dev_unlock(hdev);
+ 		return -EINVAL;
++	}
+ 
+-	hci_dev_lock(hdev);
+ 	hdev->le_adv_min_interval = val;
+ 	hci_dev_unlock(hdev);
+ 
+@@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val)
+ {
+ 	struct hci_dev *hdev = data;
+ 
+-	if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
++	hci_dev_lock(hdev);
++	if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) {
++		hci_dev_unlock(hdev);
+ 		return -EINVAL;
++	}
+ 
+-	hci_dev_lock(hdev);
+ 	hdev->le_adv_max_interval = val;
+ 	hci_dev_unlock(hdev);
+ 
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index 2bb8ab9302a97..bb0e5902a3e60 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -3219,6 +3219,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
+ 		if (test_bit(HCI_ENCRYPT, &hdev->flags))
+ 			set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+ 
++		/* "Link key request" completed ahead of "connect request" completes */
++		if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
++		    ev->link_type == ACL_LINK) {
++			struct link_key *key;
++			struct hci_cp_read_enc_key_size cp;
++
++			key = hci_find_link_key(hdev, &ev->bdaddr);
++			if (key) {
++				set_bit(HCI_CONN_ENCRYPT, &conn->flags);
++
++				if (!(hdev->commands[20] & 0x10)) {
++					conn->enc_key_size = HCI_LINK_KEY_SIZE;
++				} else {
++					cp.handle = cpu_to_le16(conn->handle);
++					if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
++							 sizeof(cp), &cp)) {
++						bt_dev_err(hdev, "sending read key size failed");
++						conn->enc_key_size = HCI_LINK_KEY_SIZE;
++					}
++				}
++
++				hci_encrypt_cfm(conn, ev->status);
++			}
++		}
++
+ 		/* Get remote features */
+ 		if (conn->type == ACL_LINK) {
+ 			struct hci_cp_read_remote_features cp;
+diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
+index 9b241eabca3e8..d6c0633bfe5bf 100644
+--- a/net/bluetooth/hci_sync.c
++++ b/net/bluetooth/hci_sync.c
+@@ -3292,7 +3292,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
+ 	if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
+ 		return;
+ 
+-	bacpy(&hdev->public_addr, &ba);
++	if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
++		baswap(&hdev->public_addr, &ba);
++	else
++		bacpy(&hdev->public_addr, &ba);
+ }
+ 
+ struct hci_init_stage {
+diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
+index aa23479b20b2a..ed62c1026fe93 100644
+--- a/net/bridge/netfilter/ebtables.c
++++ b/net/bridge/netfilter/ebtables.c
+@@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ 	struct ebt_table_info *newinfo;
+ 	struct ebt_replace tmp;
+ 
++	if (len < sizeof(tmp))
++		return -EINVAL;
+ 	if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ 		return -EFAULT;
+ 
+@@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len)
+ {
+ 	struct ebt_replace hlp;
+ 
++	if (len < sizeof(hlp))
++		return -EINVAL;
+ 	if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
+ 		return -EFAULT;
+ 
+@@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg,
+ {
+ 	struct compat_ebt_replace hlp;
+ 
++	if (len < sizeof(hlp))
++		return -EINVAL;
+ 	if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
+ 		return -EFAULT;
+ 
+diff --git a/net/core/gro.c b/net/core/gro.c
+index 0759277dc14ee..cefddf65f7db0 100644
+--- a/net/core/gro.c
++++ b/net/core/gro.c
+@@ -195,8 +195,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
+ 	}
+ 
+ merge:
+-	/* sk owenrship - if any - completely transferred to the aggregated packet */
++	/* sk ownership - if any - completely transferred to the aggregated packet */
+ 	skb->destructor = NULL;
++	skb->sk = NULL;
+ 	delta_truesize = skb->truesize;
+ 	if (offset > headlen) {
+ 		unsigned int eat = offset - headlen;
+diff --git a/net/core/sock_map.c b/net/core/sock_map.c
+index 27d733c0f65e1..8598466a38057 100644
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
+ 	struct sock *sk;
+ 	int err = 0;
+ 
++	if (irqs_disabled())
++		return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
++
+ 	spin_lock_bh(&stab->lock);
+ 	sk = *psk;
+ 	if (!sk_test || sk_test == sk)
+@@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
+ 	struct bpf_shtab_elem *elem;
+ 	int ret = -ENOENT;
+ 
++	if (irqs_disabled())
++		return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
++
+ 	hash = sock_hash_bucket_hash(key, key_size);
+ 	bucket = sock_hash_select_bucket(htab, hash);
+ 
+diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
+index e5742f2a2d522..1b6457f357bdb 100644
+--- a/net/hsr/hsr_slave.c
++++ b/net/hsr/hsr_slave.c
+@@ -220,7 +220,8 @@ void hsr_del_port(struct hsr_port *port)
+ 		netdev_update_features(master->dev);
+ 		dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
+ 		netdev_rx_handler_unregister(port->dev);
+-		dev_set_promiscuity(port->dev, -1);
++		if (!port->hsr->fwd_offloaded)
++			dev_set_promiscuity(port->dev, -1);
+ 		netdev_upper_dev_unlink(port->dev, master->dev);
+ 	}
+ 
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 762817d6c8d70..a018981b45142 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -289,6 +289,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
+ 	struct sock_reuseport *reuseport_cb;
+ 	struct inet_bind_hashbucket *head2;
+ 	struct inet_bind2_bucket *tb2;
++	bool conflict = false;
+ 	bool reuseport_cb_ok;
+ 
+ 	rcu_read_lock();
+@@ -301,18 +302,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
+ 
+ 	spin_lock(&head2->lock);
+ 
+-	inet_bind_bucket_for_each(tb2, &head2->chain)
+-		if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+-			break;
++	inet_bind_bucket_for_each(tb2, &head2->chain) {
++		if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
++			continue;
+ 
+-	if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
+-					reuseport_ok)) {
+-		spin_unlock(&head2->lock);
+-		return true;
++		if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,	reuseport_ok))
++			continue;
++
++		conflict = true;
++		break;
+ 	}
+ 
+ 	spin_unlock(&head2->lock);
+-	return false;
++
++	return conflict;
+ }
+ 
+ /*
+@@ -774,6 +777,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
+ }
+ EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+ 
++void inet_csk_clear_xmit_timers_sync(struct sock *sk)
++{
++	struct inet_connection_sock *icsk = inet_csk(sk);
++
++	/* ongoing timer handlers need to acquire socket lock. */
++	sock_not_owned_by_me(sk);
++
++	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
++
++	sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
++	sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
++	sk_stop_timer_sync(sk, &sk->sk_timer);
++}
++
+ void inet_csk_delete_keepalive_timer(struct sock *sk)
+ {
+ 	sk_stop_timer(sk, &sk->sk_timer);
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index 7072fc0783ef5..c88c9034d6300 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -24,6 +24,8 @@
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ 
++#include "../core/sock_destructor.h"
++
+ /* Use skb->cb to track consecutive/adjacent fragments coming at
+  * the end of the queue. Nodes in the rb-tree queue will
+  * contain "runs" of one or more adjacent fragments.
+@@ -39,6 +41,7 @@ struct ipfrag_skb_cb {
+ 	};
+ 	struct sk_buff		*next_frag;
+ 	int			frag_run_len;
++	int			ip_defrag_offset;
+ };
+ 
+ #define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
+@@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+ 	 */
+ 	if (!last)
+ 		fragrun_create(q, skb);  /* First fragment. */
+-	else if (last->ip_defrag_offset + last->len < end) {
++	else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) {
+ 		/* This is the common case: skb goes to the end. */
+ 		/* Detect and discard overlaps. */
+-		if (offset < last->ip_defrag_offset + last->len)
++		if (offset < FRAG_CB(last)->ip_defrag_offset + last->len)
+ 			return IPFRAG_OVERLAP;
+-		if (offset == last->ip_defrag_offset + last->len)
++		if (offset == FRAG_CB(last)->ip_defrag_offset + last->len)
+ 			fragrun_append_to_last(q, skb);
+ 		else
+ 			fragrun_create(q, skb);
+@@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+ 
+ 			parent = *rbn;
+ 			curr = rb_to_skb(parent);
+-			curr_run_end = curr->ip_defrag_offset +
++			curr_run_end = FRAG_CB(curr)->ip_defrag_offset +
+ 					FRAG_CB(curr)->frag_run_len;
+-			if (end <= curr->ip_defrag_offset)
++			if (end <= FRAG_CB(curr)->ip_defrag_offset)
+ 				rbn = &parent->rb_left;
+ 			else if (offset >= curr_run_end)
+ 				rbn = &parent->rb_right;
+-			else if (offset >= curr->ip_defrag_offset &&
++			else if (offset >= FRAG_CB(curr)->ip_defrag_offset &&
+ 				 end <= curr_run_end)
+ 				return IPFRAG_DUP;
+ 			else
+@@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+ 		rb_insert_color(&skb->rbnode, &q->rb_fragments);
+ 	}
+ 
+-	skb->ip_defrag_offset = offset;
++	FRAG_CB(skb)->ip_defrag_offset = offset;
+ 
+ 	return IPFRAG_OK;
+ }
+@@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ 			      struct sk_buff *parent)
+ {
+ 	struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
+-	struct sk_buff **nextp;
++	void (*destructor)(struct sk_buff *);
++	unsigned int orig_truesize = 0;
++	struct sk_buff **nextp = NULL;
++	struct sock *sk = skb->sk;
+ 	int delta;
+ 
++	if (sk && is_skb_wmem(skb)) {
++		/* TX: skb->sk might have been passed as argument to
++		 * dst->output and must remain valid until tx completes.
++		 *
++		 * Move sk to reassembled skb and fix up wmem accounting.
++		 */
++		orig_truesize = skb->truesize;
++		destructor = skb->destructor;
++	}
++
+ 	if (head != skb) {
+ 		fp = skb_clone(skb, GFP_ATOMIC);
+-		if (!fp)
+-			return NULL;
++		if (!fp) {
++			head = skb;
++			goto out_restore_sk;
++		}
+ 		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+ 		if (RB_EMPTY_NODE(&skb->rbnode))
+ 			FRAG_CB(parent)->next_frag = fp;
+@@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ 					&q->rb_fragments);
+ 		if (q->fragments_tail == skb)
+ 			q->fragments_tail = fp;
++
++		if (orig_truesize) {
++			/* prevent skb_morph from releasing sk */
++			skb->sk = NULL;
++			skb->destructor = NULL;
++		}
+ 		skb_morph(skb, head);
+ 		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+ 		rb_replace_node(&head->rbnode, &skb->rbnode,
+@@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ 		consume_skb(head);
+ 		head = skb;
+ 	}
+-	WARN_ON(head->ip_defrag_offset != 0);
++	WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0);
+ 
+ 	delta = -head->truesize;
+ 
+ 	/* Head of list must not be cloned. */
+ 	if (skb_unclone(head, GFP_ATOMIC))
+-		return NULL;
++		goto out_restore_sk;
+ 
+ 	delta += head->truesize;
+ 	if (delta)
+@@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ 
+ 		clone = alloc_skb(0, GFP_ATOMIC);
+ 		if (!clone)
+-			return NULL;
++			goto out_restore_sk;
+ 		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ 		skb_frag_list_init(head);
+ 		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+@@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+ 		nextp = &skb_shinfo(head)->frag_list;
+ 	}
+ 
++out_restore_sk:
++	if (orig_truesize) {
++		int ts_delta = head->truesize - orig_truesize;
++
++		/* if this reassembled skb is fragmented later,
++		 * fraglist skbs will get skb->sk assigned from head->sk,
++		 * and each frag skb will be released via sock_wfree.
++		 *
++		 * Update sk_wmem_alloc.
++		 */
++		head->sk = sk;
++		head->destructor = destructor;
++		refcount_add(ts_delta, &sk->sk_wmem_alloc);
++	}
++
+ 	return nextp;
+ }
+ EXPORT_SYMBOL(inet_frag_reasm_prepare);
+@@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare);
+ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+ 			    void *reasm_data, bool try_coalesce)
+ {
++	struct sock *sk = is_skb_wmem(head) ? head->sk : NULL;
++	const unsigned int head_truesize = head->truesize;
+ 	struct sk_buff **nextp = reasm_data;
+ 	struct rb_node *rbn;
+ 	struct sk_buff *fp;
+@@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+ 	head->prev = NULL;
+ 	head->tstamp = q->stamp;
+ 	head->mono_delivery_time = q->mono_delivery_time;
++
++	if (sk)
++		refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
+ }
+ EXPORT_SYMBOL(inet_frag_reasm_finish);
+ 
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index a4941f53b5237..fb947d1613fe2 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ 	}
+ 
+ 	skb_dst_drop(skb);
++	skb_orphan(skb);
+ 	return -EINPROGRESS;
+ 
+ insert_error:
+@@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
+ 	struct ipq *qp;
+ 
+ 	__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
+-	skb_orphan(skb);
+ 
+ 	/* Lookup (or create) queue header */
+ 	qp = ip_find(net, ip_hdr(skb), user, vif);
+diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
+index 5169c3c72cffe..f21a1a5403723 100644
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ 					  tpi->flags | TUNNEL_NO_KEY,
+ 					  iph->saddr, iph->daddr, 0);
+ 	} else {
++		if (unlikely(!pskb_may_pull(skb,
++					    gre_hdr_len + sizeof(*ershdr))))
++			return PACKET_REJECT;
++
+ 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+ 		ver = ershdr->ver;
++		iph = ip_hdr(skb);
+ 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+ 					  tpi->flags | TUNNEL_KEY,
+ 					  iph->saddr, iph->daddr, tpi->key);
+diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
+index 2407066b0fec1..b150c9929b12e 100644
+--- a/net/ipv4/netfilter/arp_tables.c
++++ b/net/ipv4/netfilter/arp_tables.c
+@@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ 	void *loc_cpu_entry;
+ 	struct arpt_entry *iter;
+ 
++	if (len < sizeof(tmp))
++		return -EINVAL;
+ 	if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ 		return -EFAULT;
+ 
+@@ -1254,6 +1256,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ 	void *loc_cpu_entry;
+ 	struct arpt_entry *iter;
+ 
++	if (len < sizeof(tmp))
++		return -EINVAL;
+ 	if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ 		return -EFAULT;
+ 
+diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
+index 7da1df4997d05..4876707595781 100644
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -1108,6 +1108,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ 	void *loc_cpu_entry;
+ 	struct ipt_entry *iter;
+ 
++	if (len < sizeof(tmp))
++		return -EINVAL;
+ 	if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ 		return -EFAULT;
+ 
+@@ -1492,6 +1494,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ 	void *loc_cpu_entry;
+ 	struct ipt_entry *iter;
+ 
++	if (len < sizeof(tmp))
++		return -EINVAL;
+ 	if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ 		return -EFAULT;
+ 
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 68bb8d6bcc113..f8df35f7352a5 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout)
+ 	lock_sock(sk);
+ 	__tcp_close(sk, timeout);
+ 	release_sock(sk);
++	if (!sk->sk_net_refcnt)
++		inet_csk_clear_xmit_timers_sync(sk);
+ 	sock_put(sk);
+ }
+ EXPORT_SYMBOL(tcp_close);
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 848072793fa98..70a9a4a48216e 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -584,6 +584,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk,
+ }
+ 
+ DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
++EXPORT_SYMBOL(udp_encap_needed_key);
++
++#if IS_ENABLED(CONFIG_IPV6)
++DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
++EXPORT_SYMBOL(udpv6_encap_needed_key);
++#endif
++
+ void udp_encap_enable(void)
+ {
+ 	static_branch_inc(&udp_encap_needed_key);
+diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
+index 6c95d28d0c4a7..c3d67423ae189 100644
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+ 	NAPI_GRO_CB(p)->count++;
+ 	p->data_len += skb->len;
+ 
+-	/* sk owenrship - if any - completely transferred to the aggregated packet */
++	/* sk ownership - if any - completely transferred to the aggregated packet */
+ 	skb->destructor = NULL;
++	skb->sk = NULL;
+ 	p->truesize += skb->truesize;
+ 	p->len += skb->len;
+ 
+@@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ 	unsigned int off = skb_gro_offset(skb);
+ 	int flush = 1;
+ 
+-	/* we can do L4 aggregation only if the packet can't land in a tunnel
+-	 * otherwise we could corrupt the inner stream
++	/* We can do L4 aggregation only if the packet can't land in a tunnel
++	 * otherwise we could corrupt the inner stream. Detecting such packets
++	 * cannot be foolproof and the aggregation might still happen in some
++	 * cases. Such packets should be caught in udp_unexpected_gso later.
+ 	 */
+ 	NAPI_GRO_CB(skb)->is_flist = 0;
+ 	if (!sk || !udp_sk(sk)->gro_receive) {
++		/* If the packet was locally encapsulated in a UDP tunnel that
++		 * wasn't detected above, do not GRO.
++		 */
++		if (skb->encapsulation)
++			goto out;
++
+ 		if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+ 			NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
+ 
+@@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
+ 		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ 		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ 
+-		if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+-			if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+-				skb->csum_level++;
+-		} else {
+-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+-			skb->csum_level = 0;
+-		}
++		__skb_incr_checksum_unnecessary(skb);
+ 
+ 		return 0;
+ 	}
+diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
+index 4fc2cae0d116c..54294f6a8ec51 100644
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -645,19 +645,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+ 	if (!w) {
+ 		/* New dump:
+ 		 *
+-		 * 1. hook callback destructor.
+-		 */
+-		cb->args[3] = (long)cb->done;
+-		cb->done = fib6_dump_done;
+-
+-		/*
+-		 * 2. allocate and initialize walker.
++		 * 1. allocate and initialize walker.
+ 		 */
+ 		w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ 		if (!w)
+ 			return -ENOMEM;
+ 		w->func = fib6_dump_node;
+ 		cb->args[2] = (long)w;
++
++		/* 2. hook callback destructor.
++		 */
++		cb->args[3] = (long)cb->done;
++		cb->done = fib6_dump_done;
++
+ 	}
+ 
+ 	arg.skb = skb;
+diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
+index 070d87abf7c02..26c3287beb29c 100644
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb,
+ 	struct ip6_tnl *tunnel;
+ 	u8 ver;
+ 
++	if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
++		return PACKET_REJECT;
++
+ 	ipv6h = ipv6_hdr(skb);
+ 	ershdr = (struct erspan_base_hdr *)skb->data;
+ 	ver = ershdr->ver;
+diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
+index fd9f049d6d41e..636b360311c53 100644
+--- a/net/ipv6/netfilter/ip6_tables.c
++++ b/net/ipv6/netfilter/ip6_tables.c
+@@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ 	void *loc_cpu_entry;
+ 	struct ip6t_entry *iter;
+ 
++	if (len < sizeof(tmp))
++		return -EINVAL;
+ 	if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ 		return -EFAULT;
+ 
+@@ -1501,6 +1503,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
+ 	void *loc_cpu_entry;
+ 	struct ip6t_entry *iter;
+ 
++	if (len < sizeof(tmp))
++		return -EINVAL;
+ 	if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
+ 		return -EFAULT;
+ 
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index b2dd48911c8d6..efbec7ee27d0a 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
+ 	}
+ 
+ 	skb_dst_drop(skb);
++	skb_orphan(skb);
+ 	return -EINPROGRESS;
+ 
+ insert_error:
+@@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
+ 	hdr = ipv6_hdr(skb);
+ 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
+ 
+-	skb_orphan(skb);
+ 	fq = fq_find(net, fhdr->identification, user, hdr,
+ 		     skb->dev ? skb->dev->ifindex : 0);
+ 	if (fq == NULL) {
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 438476a31313c..d31beb65db08f 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -450,7 +450,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ 	goto try_again;
+ }
+ 
+-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
++DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+ void udpv6_encap_enable(void)
+ {
+ 	static_branch_inc(&udpv6_encap_needed_key);
+diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
+index 6b95ba241ebe2..626d7b362dc7b 100644
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
+ 		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ 		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ 
+-		if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+-			if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+-				skb->csum_level++;
+-		} else {
+-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+-			skb->csum_level = 0;
+-		}
++		__skb_incr_checksum_unnecessary(skb);
+ 
+ 		return 0;
+ 	}
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index b54951ae07aa9..01ac690af7799 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
+ 	return READ_ONCE(msk->wnd_end);
+ }
+ 
+-static bool mptcp_is_tcpsk(struct sock *sk)
++static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
+ {
+-	struct socket *sock = sk->sk_socket;
+-
+-	if (unlikely(sk->sk_prot == &tcp_prot)) {
+-		/* we are being invoked after mptcp_accept() has
+-		 * accepted a non-mp-capable flow: sk is a tcp_sk,
+-		 * not an mptcp one.
+-		 *
+-		 * Hand the socket over to tcp so all further socket ops
+-		 * bypass mptcp.
+-		 */
+-		WRITE_ONCE(sock->ops, &inet_stream_ops);
+-		return true;
+ #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+-	} else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
+-		WRITE_ONCE(sock->ops, &inet6_stream_ops);
+-		return true;
++	if (sk->sk_prot == &tcpv6_prot)
++		return &inet6_stream_ops;
+ #endif
+-	}
+-
+-	return false;
++	WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
++	return &inet_stream_ops;
+ }
+ 
+ static int __mptcp_socket_create(struct mptcp_sock *msk)
+@@ -3328,44 +3314,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
+ 		msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
+ }
+ 
+-static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
+-				 bool kern)
+-{
+-	struct sock *newsk;
+-
+-	pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+-	newsk = inet_csk_accept(ssk, flags, err, kern);
+-	if (!newsk)
+-		return NULL;
+-
+-	pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
+-	if (sk_is_mptcp(newsk)) {
+-		struct mptcp_subflow_context *subflow;
+-		struct sock *new_mptcp_sock;
+-
+-		subflow = mptcp_subflow_ctx(newsk);
+-		new_mptcp_sock = subflow->conn;
+-
+-		/* is_mptcp should be false if subflow->conn is missing, see
+-		 * subflow_syn_recv_sock()
+-		 */
+-		if (WARN_ON_ONCE(!new_mptcp_sock)) {
+-			tcp_sk(newsk)->is_mptcp = 0;
+-			goto out;
+-		}
+-
+-		newsk = new_mptcp_sock;
+-		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+-	} else {
+-		MPTCP_INC_STATS(sock_net(ssk),
+-				MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+-	}
+-
+-out:
+-	newsk->sk_kern_sock = kern;
+-	return newsk;
+-}
+-
+ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
+ {
+ 	struct mptcp_subflow_context *subflow, *tmp;
+@@ -3802,7 +3750,6 @@ static struct proto mptcp_prot = {
+ 	.connect	= mptcp_connect,
+ 	.disconnect	= mptcp_disconnect,
+ 	.close		= mptcp_close,
+-	.accept		= mptcp_accept,
+ 	.setsockopt	= mptcp_setsockopt,
+ 	.getsockopt	= mptcp_getsockopt,
+ 	.shutdown	= mptcp_shutdown,
+@@ -3912,18 +3859,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
+ 	if (!ssk)
+ 		return -EINVAL;
+ 
+-	newsk = mptcp_accept(ssk, flags, &err, kern);
++	pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
++	newsk = inet_csk_accept(ssk, flags, &err, kern);
+ 	if (!newsk)
+ 		return err;
+ 
+-	lock_sock(newsk);
+-
+-	__inet_accept(sock, newsock, newsk);
+-	if (!mptcp_is_tcpsk(newsock->sk)) {
+-		struct mptcp_sock *msk = mptcp_sk(newsk);
++	pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
++	if (sk_is_mptcp(newsk)) {
+ 		struct mptcp_subflow_context *subflow;
++		struct sock *new_mptcp_sock;
++
++		subflow = mptcp_subflow_ctx(newsk);
++		new_mptcp_sock = subflow->conn;
++
++		/* is_mptcp should be false if subflow->conn is missing, see
++		 * subflow_syn_recv_sock()
++		 */
++		if (WARN_ON_ONCE(!new_mptcp_sock)) {
++			tcp_sk(newsk)->is_mptcp = 0;
++			goto tcpfallback;
++		}
++
++		newsk = new_mptcp_sock;
++		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
++
++		newsk->sk_kern_sock = kern;
++		lock_sock(newsk);
++		__inet_accept(sock, newsock, newsk);
+ 
+ 		set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
++		msk = mptcp_sk(newsk);
+ 		msk->in_accept_queue = 0;
+ 
+ 		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
+@@ -3945,6 +3910,19 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
+ 			if (unlikely(list_is_singular(&msk->conn_list)))
+ 				mptcp_set_state(newsk, TCP_CLOSE);
+ 		}
++	} else {
++tcpfallback:
++		newsk->sk_kern_sock = kern;
++		lock_sock(newsk);
++		__inet_accept(sock, newsock, newsk);
++		/* we are being invoked after accepting a non-mp-capable
++		 * flow: sk is a tcp_sk, not an mptcp one.
++		 *
++		 * Hand the socket over to tcp so all further socket ops
++		 * bypass mptcp.
++		 */
++		WRITE_ONCE(newsock->sk->sk_socket->ops,
++			   mptcp_fallback_tcp_ops(newsock->sk));
+ 	}
+ 	release_sock(newsk);
+ 
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index ab41700bee688..23ee96c6abcbf 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -905,6 +905,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
+ 	return child;
+ 
+ fallback:
++	if (fallback)
++		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+ 	mptcp_subflow_drop_ctx(child);
+ 	return child;
+ }
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index f10419ba6e0bd..2a4649df8f086 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -1200,6 +1200,26 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
+ #define __NFT_TABLE_F_UPDATE		(__NFT_TABLE_F_WAS_DORMANT | \
+ 					 __NFT_TABLE_F_WAS_AWAKEN)
+ 
++static bool nft_table_pending_update(const struct nft_ctx *ctx)
++{
++	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
++	struct nft_trans *trans;
++
++	if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
++		return true;
++
++	list_for_each_entry(trans, &nft_net->commit_list, list) {
++		if (trans->ctx.table == ctx->table &&
++		    ((trans->msg_type == NFT_MSG_NEWCHAIN &&
++		      nft_trans_chain_update(trans)) ||
++		     (trans->msg_type == NFT_MSG_DELCHAIN &&
++		      nft_is_base_chain(trans->ctx.chain))))
++			return true;
++	}
++
++	return false;
++}
++
+ static int nf_tables_updtable(struct nft_ctx *ctx)
+ {
+ 	struct nft_trans *trans;
+@@ -1223,7 +1243,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
+ 		return -EOPNOTSUPP;
+ 
+ 	/* No dormant off/on/off/on games in single transaction */
+-	if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
++	if (nft_table_pending_update(ctx))
+ 		return -EINVAL;
+ 
+ 	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+@@ -2420,6 +2440,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ 		struct nft_stats __percpu *stats = NULL;
+ 		struct nft_chain_hook hook = {};
+ 
++		if (table->flags & __NFT_TABLE_F_UPDATE)
++			return -EINVAL;
++
+ 		if (flags & NFT_CHAIN_BINDING)
+ 			return -EOPNOTSUPP;
+ 
+@@ -2621,6 +2644,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+ 		}
+ 	}
+ 
++	if (table->flags & __NFT_TABLE_F_UPDATE &&
++	    !list_empty(&hook.list)) {
++		NL_SET_BAD_ATTR(extack, attr);
++		err = -EOPNOTSUPP;
++		goto err_hooks;
++	}
++
+ 	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ 	    nft_is_base_chain(chain) &&
+ 	    !list_empty(&hook.list)) {
+@@ -2850,6 +2880,9 @@ static int nft_delchain_hook(struct nft_ctx *ctx,
+ 	struct nft_trans *trans;
+ 	int err;
+ 
++	if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
++		return -EOPNOTSUPP;
++
+ 	err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
+ 				   ctx->family, chain->flags, extack);
+ 	if (err < 0)
+@@ -2934,7 +2967,8 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
+ 	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
+ 
+ 	if (nla[NFTA_CHAIN_HOOK]) {
+-		if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
++		if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN ||
++		    chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ 			return -EOPNOTSUPP;
+ 
+ 		if (nft_is_base_chain(chain)) {
+@@ -8134,11 +8168,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
+ 	return err;
+ }
+ 
++/* call under rcu_read_lock */
+ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
+ {
+ 	const struct nf_flowtable_type *type;
+ 
+-	list_for_each_entry(type, &nf_tables_flowtables, list) {
++	list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
+ 		if (family == type->family)
+ 			return type;
+ 	}
+@@ -8150,9 +8185,13 @@ nft_flowtable_type_get(struct net *net, u8 family)
+ {
+ 	const struct nf_flowtable_type *type;
+ 
++	rcu_read_lock();
+ 	type = __nft_flowtable_type_get(family);
+-	if (type != NULL && try_module_get(type->owner))
++	if (type != NULL && try_module_get(type->owner)) {
++		rcu_read_unlock();
+ 		return type;
++	}
++	rcu_read_unlock();
+ 
+ 	lockdep_nfnl_nft_mutex_not_held();
+ #ifdef CONFIG_MODULES
+@@ -10053,9 +10092,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
+ 			if (nft_trans_chain_update(trans)) {
+ 				nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
+ 						       &nft_trans_chain_hooks(trans));
+-				nft_netdev_unregister_hooks(net,
+-							    &nft_trans_chain_hooks(trans),
+-							    true);
++				if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
++					nft_netdev_unregister_hooks(net,
++								    &nft_trans_chain_hooks(trans),
++								    true);
++				}
+ 			} else {
+ 				nft_chain_del(trans->ctx.chain);
+ 				nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
+@@ -10294,10 +10335,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ 	struct nft_trans *trans, *next;
+ 	LIST_HEAD(set_update_list);
+ 	struct nft_trans_elem *te;
++	int err = 0;
+ 
+ 	if (action == NFNL_ABORT_VALIDATE &&
+ 	    nf_tables_validate(net) < 0)
+-		return -EAGAIN;
++		err = -EAGAIN;
+ 
+ 	list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
+ 					 list) {
+@@ -10327,9 +10369,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ 			break;
+ 		case NFT_MSG_NEWCHAIN:
+ 			if (nft_trans_chain_update(trans)) {
+-				nft_netdev_unregister_hooks(net,
+-							    &nft_trans_chain_hooks(trans),
+-							    true);
++				if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
++					nft_netdev_unregister_hooks(net,
++								    &nft_trans_chain_hooks(trans),
++								    true);
++				}
+ 				free_percpu(nft_trans_chain_stats(trans));
+ 				kfree(nft_trans_chain_name(trans));
+ 				nft_trans_destroy(trans);
+@@ -10483,12 +10527,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
+ 		nf_tables_abort_release(trans);
+ 	}
+ 
+-	if (action == NFNL_ABORT_AUTOLOAD)
+-		nf_tables_module_autoload(net);
+-	else
+-		nf_tables_module_autoload_cleanup(net);
+-
+-	return 0;
++	return err;
+ }
+ 
+ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
+@@ -10501,6 +10540,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
+ 	gc_seq = nft_gc_seq_begin(nft_net);
+ 	ret = __nf_tables_abort(net, action);
+ 	nft_gc_seq_end(nft_net, gc_seq);
++
++	WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
++
++	/* module autoload needs to happen after GC sequence update because it
++	 * temporarily releases and grabs mutex again.
++	 */
++	if (action == NFNL_ABORT_AUTOLOAD)
++		nf_tables_module_autoload(net);
++	else
++		nf_tables_module_autoload_cleanup(net);
++
+ 	mutex_unlock(&nft_net->commit_mutex);
+ 
+ 	return ret;
+@@ -11301,9 +11351,10 @@ static void __net_exit nf_tables_exit_net(struct net *net)
+ 
+ 	gc_seq = nft_gc_seq_begin(nft_net);
+ 
+-	if (!list_empty(&nft_net->commit_list) ||
+-	    !list_empty(&nft_net->module_list))
+-		__nf_tables_abort(net, NFNL_ABORT_NONE);
++	WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
++
++	if (!list_empty(&nft_net->module_list))
++		nf_tables_module_autoload_cleanup(net);
+ 
+ 	__nft_release_tables(net);
+ 
+@@ -11395,6 +11446,7 @@ static void __exit nf_tables_module_exit(void)
+ 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+ 	nft_chain_filter_fini();
+ 	nft_chain_route_fini();
++	nf_tables_trans_destroy_flush_work();
+ 	unregister_pernet_subsys(&nf_tables_net_ops);
+ 	cancel_work_sync(&trans_gc_work);
+ 	cancel_work_sync(&trans_destroy_work);
+diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
+index 12684d835cb53..772ddb5824d9e 100644
+--- a/net/nfc/nci/core.c
++++ b/net/nfc/nci/core.c
+@@ -1516,6 +1516,11 @@ static void nci_rx_work(struct work_struct *work)
+ 		nfc_send_to_raw_sock(ndev->nfc_dev, skb,
+ 				     RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
+ 
++		if (!nci_plen(skb->data)) {
++			kfree_skb(skb);
++			break;
++		}
++
+ 		/* Process frame */
+ 		switch (nci_mt(skb->data)) {
+ 		case NCI_MT_RSP_PKT:
+diff --git a/net/rds/rdma.c b/net/rds/rdma.c
+index a4e3c5de998be..00dbcd4d28e68 100644
+--- a/net/rds/rdma.c
++++ b/net/rds/rdma.c
+@@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
+ 		}
+ 		ret = PTR_ERR(trans_private);
+ 		/* Trigger connection so that its ready for the next retry */
+-		if (ret == -ENODEV)
++		if (ret == -ENODEV && cp)
+ 			rds_conn_connect_if_down(cp->cp_conn);
+ 		goto out;
+ 	}
+diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
+index dffa990a9629f..e34f1be151645 100644
+--- a/net/sched/act_skbmod.c
++++ b/net/sched/act_skbmod.c
+@@ -241,13 +241,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
+ 	struct tcf_skbmod *d = to_skbmod(a);
+ 	unsigned char *b = skb_tail_pointer(skb);
+ 	struct tcf_skbmod_params  *p;
+-	struct tc_skbmod opt = {
+-		.index   = d->tcf_index,
+-		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+-		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
+-	};
++	struct tc_skbmod opt;
+ 	struct tcf_t t;
+ 
++	memset(&opt, 0, sizeof(opt));
++	opt.index   = d->tcf_index;
++	opt.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
++	opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
+ 	spin_lock_bh(&d->tcf_lock);
+ 	opt.action = d->tcf_action;
+ 	p = rcu_dereference_protected(d->skbmod_p,
+diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
+index e9eaf637220e9..5f25a2595add5 100644
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -809,7 +809,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
+ 		notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+ 						       !qdisc_is_offloaded);
+ 		/* TODO: perform the search on a per txq basis */
+-		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
++		sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
+ 		if (sch == NULL) {
+ 			WARN_ON_ONCE(parentid != TC_H_ROOT);
+ 			break;
+diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
+index e0ce4276274be..933e12e3a55c7 100644
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -1216,15 +1216,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
+  * MSG_SPLICE_PAGES is used exclusively to reduce the number of
+  * copy operations in this path. Therefore the caller must ensure
+  * that the pages backing @xdr are unchanging.
+- *
+- * Note that the send is non-blocking. The caller has incremented
+- * the reference count on each page backing the RPC message, and
+- * the network layer will "put" these pages when transmission is
+- * complete.
+- *
+- * This is safe for our RPC services because the memory backing
+- * the head and tail components is never kmalloc'd. These always
+- * come from pages in the svc_rqst::rq_pages array.
+  */
+ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
+ 			   rpc_fraghdr marker, unsigned int *sentp)
+@@ -1254,6 +1245,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
+ 	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ 		      1 + count, sizeof(marker) + rqstp->rq_res.len);
+ 	ret = sock_sendmsg(svsk->sk_sock, &msg);
++	page_frag_free(buf);
+ 	if (ret < 0)
+ 		return ret;
+ 	*sentp += ret;
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index acf5bb74fd386..df166f6afad82 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk,
+ 	if (unlikely(flags & MSG_ERRQUEUE))
+ 		return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
+ 
+-	psock = sk_psock_get(sk);
+ 	err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
+ 	if (err < 0)
+ 		return err;
++	psock = sk_psock_get(sk);
+ 	bpf_strp_enabled = sk_psock_strp_enabled(psock);
+ 
+ 	/* If crypto failed the connection is broken */
+@@ -2152,12 +2152,15 @@ int tls_sw_recvmsg(struct sock *sk,
+ 		}
+ 
+ 		/* Drain records from the rx_list & copy if required */
+-		if (is_peek || is_kvec)
++		if (is_peek)
+ 			err = process_rx_list(ctx, msg, &control, copied + peeked,
+ 					      decrypted - peeked, is_peek, NULL);
+ 		else
+ 			err = process_rx_list(ctx, msg, &control, 0,
+ 					      async_copy_bytes, is_peek, NULL);
++
++		/* we could have copied less than we wanted, and possibly nothing */
++		decrypted += max(err, 0) - async_copy_bytes;
+ 	}
+ 
+ 	copied += decrypted;
+diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
+index a64bf601b480d..2925f5d27ad3f 100644
+--- a/net/vmw_vsock/virtio_transport.c
++++ b/net/vmw_vsock/virtio_transport.c
+@@ -109,7 +109,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
+ 		if (!skb)
+ 			break;
+ 
+-		virtio_transport_deliver_tap_pkt(skb);
+ 		reply = virtio_vsock_skb_reply(skb);
+ 
+ 		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
+@@ -128,6 +127,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
+ 			break;
+ 		}
+ 
++		virtio_transport_deliver_tap_pkt(skb);
++
+ 		if (reply) {
+ 			struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
+ 			int val;
+diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
+index 0669bac5e900e..3f899cc7e99a9 100755
+--- a/scripts/bpf_doc.py
++++ b/scripts/bpf_doc.py
+@@ -414,8 +414,8 @@ class PrinterRST(Printer):
+             version = version.stdout.decode().rstrip()
+         except:
+             try:
+-                version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot,
+-                                         capture_output=True, check=True)
++                version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'],
++                                         cwd=linuxRoot, capture_output=True, check=True)
+                 version = version.stdout.decode().rstrip()
+             except:
+                 return 'Linux'
+diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
+index c9e38ad937fd4..3c54125eb3733 100644
+--- a/scripts/mod/Makefile
++++ b/scripts/mod/Makefile
+@@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
+ hostprogs-always-y	+= modpost mk_elfconfig
+ always-y		+= empty.o
+ 
+-modpost-objs	:= modpost.o file2alias.o sumversion.o
++modpost-objs	:= modpost.o file2alias.o sumversion.o symsearch.o
+ 
+ devicetable-offsets-file := devicetable-offsets.h
+ 
+@@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s
+ 
+ # dependencies on generated files need to be listed explicitly
+ 
+-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
++$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
+ $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
+ 
+ quiet_cmd_elfconfig = MKELF   $@
+diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
+index 5191fdbd3fa23..7d53942445d75 100644
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -22,7 +22,6 @@
+ #include <errno.h>
+ #include "modpost.h"
+ #include "../../include/linux/license.h"
+-#include "../../include/linux/module_symbol.h"
+ 
+ static bool module_enabled;
+ /* Are we using CONFIG_MODVERSIONS? */
+@@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
+ 			*p = TO_NATIVE(*p);
+ 	}
+ 
++	symsearch_init(info);
++
+ 	return 1;
+ }
+ 
+ static void parse_elf_finish(struct elf_info *info)
+ {
++	symsearch_finish(info);
+ 	release_file(info->hdr, info->size);
+ }
+ 
+@@ -1042,75 +1044,16 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
+ 	return 1;
+ }
+ 
+-/*
+- * If there's no name there, ignore it; likewise, ignore it if it's
+- * one of the magic symbols emitted used by current tools.
+- *
+- * Otherwise if find_symbols_between() returns those symbols, they'll
+- * fail the whitelist tests and cause lots of false alarms ... fixable
+- * only by merging __exit and __init sections into __text, bloating
+- * the kernel (which is especially evil on embedded platforms).
+- */
+-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
+-{
+-	const char *name = elf->strtab + sym->st_name;
+-
+-	if (!name || !strlen(name))
+-		return 0;
+-	return !is_mapping_symbol(name);
+-}
+-
+-/* Look up the nearest symbol based on the section and the address */
+-static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
+-				 unsigned int secndx, bool allow_negative,
+-				 Elf_Addr min_distance)
+-{
+-	Elf_Sym *sym;
+-	Elf_Sym *near = NULL;
+-	Elf_Addr sym_addr, distance;
+-	bool is_arm = (elf->hdr->e_machine == EM_ARM);
+-
+-	for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+-		if (get_secindex(elf, sym) != secndx)
+-			continue;
+-		if (!is_valid_name(elf, sym))
+-			continue;
+-
+-		sym_addr = sym->st_value;
+-
+-		/*
+-		 * For ARM Thumb instruction, the bit 0 of st_value is set
+-		 * if the symbol is STT_FUNC type. Mask it to get the address.
+-		 */
+-		if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
+-			 sym_addr &= ~1;
+-
+-		if (addr >= sym_addr)
+-			distance = addr - sym_addr;
+-		else if (allow_negative)
+-			distance = sym_addr - addr;
+-		else
+-			continue;
+-
+-		if (distance <= min_distance) {
+-			min_distance = distance;
+-			near = sym;
+-		}
+-
+-		if (min_distance == 0)
+-			break;
+-	}
+-	return near;
+-}
+-
+ static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
+ 			     unsigned int secndx)
+ {
+-	return find_nearest_sym(elf, addr, secndx, false, ~0);
++	return symsearch_find_nearest(elf, addr, secndx, false, ~0);
+ }
+ 
+ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
+ {
++	Elf_Sym *new_sym;
++
+ 	/* If the supplied symbol has a valid name, return it */
+ 	if (is_valid_name(elf, sym))
+ 		return sym;
+@@ -1119,7 +1062,9 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
+ 	 * Strive to find a better symbol name, but the resulting name may not
+ 	 * match the symbol referenced in the original code.
+ 	 */
+-	return find_nearest_sym(elf, addr, get_secindex(elf, sym), true, 20);
++	new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
++					 true, 20);
++	return new_sym ? new_sym : sym;
+ }
+ 
+ static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
+diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
+index 5f94c2c9f2d95..6413f26fcb6b4 100644
+--- a/scripts/mod/modpost.h
++++ b/scripts/mod/modpost.h
+@@ -10,6 +10,7 @@
+ #include <fcntl.h>
+ #include <unistd.h>
+ #include <elf.h>
++#include "../../include/linux/module_symbol.h"
+ 
+ #include "list.h"
+ #include "elfconfig.h"
+@@ -128,6 +129,8 @@ struct elf_info {
+ 	 * take shndx from symtab_shndx_start[N] instead */
+ 	Elf32_Word   *symtab_shndx_start;
+ 	Elf32_Word   *symtab_shndx_stop;
++
++	struct symsearch *symsearch;
+ };
+ 
+ /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
+@@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
+ 	return index;
+ }
+ 
++/*
++ * If there's no name there, ignore it; likewise, ignore it if it's
++ * one of the magic symbols emitted used by current tools.
++ *
++ * Internal symbols created by tools should be ignored by modpost.
++ */
++static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
++{
++	const char *name = elf->strtab + sym->st_name;
++
++	if (!name || !strlen(name))
++		return 0;
++	return !is_mapping_symbol(name);
++}
++
++/* symsearch.c */
++void symsearch_init(struct elf_info *elf);
++void symsearch_finish(struct elf_info *elf);
++Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
++				unsigned int secndx, bool allow_negative,
++				Elf_Addr min_distance);
++
+ /* file2alias.c */
+ void handle_moddevtable(struct module *mod, struct elf_info *info,
+ 			Elf_Sym *sym, const char *symname);
+diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
+new file mode 100644
+index 0000000000000..aa4ed51f9960c
+--- /dev/null
++++ b/scripts/mod/symsearch.c
+@@ -0,0 +1,199 @@
++// SPDX-License-Identifier: GPL-2.0
++
++/*
++ * Helper functions for finding the symbol in an ELF which is "nearest"
++ * to a given address.
++ */
++
++#include "modpost.h"
++
++struct syminfo {
++	unsigned int symbol_index;
++	unsigned int section_index;
++	Elf_Addr addr;
++};
++
++/*
++ * Container used to hold an entire binary search table.
++ * Entries in table are ascending, sorted first by section_index,
++ * then by addr, and last by symbol_index.  The sorting by
++ * symbol_index is used to ensure predictable behavior when
++ * multiple symbols are present with the same address; all
++ * symbols past the first are effectively ignored, by eliding
++ * them in symsearch_fixup().
++ */
++struct symsearch {
++	unsigned int table_size;
++	struct syminfo table[];
++};
++
++static int syminfo_compare(const void *s1, const void *s2)
++{
++	const struct syminfo *sym1 = s1;
++	const struct syminfo *sym2 = s2;
++
++	if (sym1->section_index > sym2->section_index)
++		return 1;
++	if (sym1->section_index < sym2->section_index)
++		return -1;
++	if (sym1->addr > sym2->addr)
++		return 1;
++	if (sym1->addr < sym2->addr)
++		return -1;
++	if (sym1->symbol_index > sym2->symbol_index)
++		return 1;
++	if (sym1->symbol_index < sym2->symbol_index)
++		return -1;
++	return 0;
++}
++
++static unsigned int symbol_count(struct elf_info *elf)
++{
++	unsigned int result = 0;
++
++	for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
++		if (is_valid_name(elf, sym))
++			result++;
++	}
++	return result;
++}
++
++/*
++ * Populate the search array that we just allocated.
++ * Be slightly paranoid here.  The ELF file is mmap'd and could
++ * conceivably change between symbol_count() and symsearch_populate().
++ * If we notice any difference, bail out rather than potentially
++ * propagating errors or crashing.
++ */
++static void symsearch_populate(struct elf_info *elf,
++			       struct syminfo *table,
++			       unsigned int table_size)
++{
++	bool is_arm = (elf->hdr->e_machine == EM_ARM);
++
++	for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
++		if (is_valid_name(elf, sym)) {
++			if (table_size-- == 0)
++				fatal("%s: size mismatch\n", __func__);
++			table->symbol_index = sym - elf->symtab_start;
++			table->section_index = get_secindex(elf, sym);
++			table->addr = sym->st_value;
++
++			/*
++			 * For ARM Thumb instruction, the bit 0 of st_value is
++			 * set if the symbol is STT_FUNC type. Mask it to get
++			 * the address.
++			 */
++			if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
++				table->addr &= ~1;
++
++			table++;
++		}
++	}
++
++	if (table_size != 0)
++		fatal("%s: size mismatch\n", __func__);
++}
++
++/*
++ * Do any fixups on the table after sorting.
++ * For now, this just finds adjacent entries which have
++ * the same section_index and addr, and it propagates
++ * the first symbol_index over the subsequent entries,
++ * so that only one symbol_index is seen for any given
++ * section_index and addr.  This ensures that whether
++ * we're looking at an address from "above" or "below"
++ * that we see the same symbol_index.
++ * This does leave some duplicate entries in the table;
++ * in practice, these are a small fraction of the
++ * total number of entries, and they are harmless to
++ * the binary search algorithm other than a few occasional
++ * unnecessary comparisons.
++ */
++static void symsearch_fixup(struct syminfo *table, unsigned int table_size)
++{
++	/* Don't look at index 0, it will never change. */
++	for (unsigned int i = 1; i < table_size; i++) {
++		if (table[i].addr == table[i - 1].addr &&
++		    table[i].section_index == table[i - 1].section_index) {
++			table[i].symbol_index = table[i - 1].symbol_index;
++		}
++	}
++}
++
++void symsearch_init(struct elf_info *elf)
++{
++	unsigned int table_size = symbol_count(elf);
++
++	elf->symsearch = NOFAIL(malloc(sizeof(struct symsearch) +
++				       sizeof(struct syminfo) * table_size));
++	elf->symsearch->table_size = table_size;
++
++	symsearch_populate(elf, elf->symsearch->table, table_size);
++	qsort(elf->symsearch->table, table_size,
++	      sizeof(struct syminfo), syminfo_compare);
++
++	symsearch_fixup(elf->symsearch->table, table_size);
++}
++
++void symsearch_finish(struct elf_info *elf)
++{
++	free(elf->symsearch);
++	elf->symsearch = NULL;
++}
++
++/*
++ * Find the syminfo which is in secndx and "nearest" to addr.
++ * allow_negative: allow returning a symbol whose address is > addr.
++ * min_distance: ignore symbols which are further away than this.
++ *
++ * Returns a pointer into the symbol table for success.
++ * Returns NULL if no legal symbol is found within the requested range.
++ */
++Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
++				unsigned int secndx, bool allow_negative,
++				Elf_Addr min_distance)
++{
++	unsigned int hi = elf->symsearch->table_size;
++	unsigned int lo = 0;
++	struct syminfo *table = elf->symsearch->table;
++	struct syminfo target;
++
++	target.addr = addr;
++	target.section_index = secndx;
++	target.symbol_index = ~0;  /* compares greater than any actual index */
++	while (hi > lo) {
++		unsigned int mid = lo + (hi - lo) / 2;  /* Avoids overflow */
++
++		if (syminfo_compare(&table[mid], &target) > 0)
++			hi = mid;
++		else
++			lo = mid + 1;
++	}
++
++	/*
++	 * table[hi], if it exists, is the first entry in the array which
++	 * lies beyond target.  table[hi - 1], if it exists, is the last
++	 * entry in the array which comes before target, including the
++	 * case where it perfectly matches the section and the address.
++	 *
++	 * Note -- if the address we're looking up falls perfectly
++	 * in the middle of two symbols, this is written to always
++	 * prefer the symbol with the lower address.
++	 */
++	Elf_Sym *result = NULL;
++
++	if (allow_negative &&
++	    hi < elf->symsearch->table_size &&
++	    table[hi].section_index == secndx &&
++	    table[hi].addr - addr <= min_distance) {
++		min_distance = table[hi].addr - addr;
++		result = &elf->symtab_start[table[hi].symbol_index];
++	}
++	if (hi > 0 &&
++	    table[hi - 1].section_index == secndx &&
++	    addr - table[hi - 1].addr <= min_distance) {
++		result = &elf->symtab_start[table[hi - 1].symbol_index];
++	}
++	return result;
++}
+diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
+index 6fa640263216f..2c23a5a286086 100644
+--- a/security/selinux/selinuxfs.c
++++ b/security/selinux/selinuxfs.c
+@@ -2135,7 +2135,6 @@ static struct file_system_type sel_fs_type = {
+ 	.kill_sb	= sel_kill_sb,
+ };
+ 
+-static struct vfsmount *selinuxfs_mount __ro_after_init;
+ struct path selinux_null __ro_after_init;
+ 
+ static int __init init_sel_fs(void)
+@@ -2157,18 +2156,21 @@ static int __init init_sel_fs(void)
+ 		return err;
+ 	}
+ 
+-	selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type);
+-	if (IS_ERR(selinuxfs_mount)) {
++	selinux_null.mnt = kern_mount(&sel_fs_type);
++	if (IS_ERR(selinux_null.mnt)) {
+ 		pr_err("selinuxfs:  could not mount!\n");
+-		err = PTR_ERR(selinuxfs_mount);
+-		selinuxfs_mount = NULL;
++		err = PTR_ERR(selinux_null.mnt);
++		selinux_null.mnt = NULL;
++		return err;
+ 	}
++
+ 	selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root,
+ 						&null_name);
+ 	if (IS_ERR(selinux_null.dentry)) {
+ 		pr_err("selinuxfs:  could not lookup null!\n");
+ 		err = PTR_ERR(selinux_null.dentry);
+ 		selinux_null.dentry = NULL;
++		return err;
+ 	}
+ 
+ 	return err;
+diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c
+index d36234b88fb42..941bfbf812ed3 100644
+--- a/sound/pci/emu10k1/emu10k1_callback.c
++++ b/sound/pci/emu10k1/emu10k1_callback.c
+@@ -255,7 +255,7 @@ lookup_voices(struct snd_emux *emu, struct snd_emu10k1 *hw,
+ 		/* check if sample is finished playing (non-looping only) */
+ 		if (bp != best + V_OFF && bp != best + V_FREE &&
+ 		    (vp->reg.sample_mode & SNDRV_SFNT_SAMPLE_SINGLESHOT)) {
+-			val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch) - 64;
++			val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch);
+ 			if (val >= vp->reg.loopstart)
+ 				bp = best + V_OFF;
+ 		}
+@@ -362,7 +362,7 @@ start_voice(struct snd_emux_voice *vp)
+ 
+ 	map = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0);
+ 
+-	addr = vp->reg.start + 64;
++	addr = vp->reg.start;
+ 	temp = vp->reg.parm.filterQ;
+ 	ccca = (temp << 28) | addr;
+ 	if (vp->apitch < 0xe400)
+@@ -430,9 +430,6 @@ start_voice(struct snd_emux_voice *vp)
+ 		/* Q & current address (Q 4bit value, MSB) */
+ 		CCCA, ccca,
+ 
+-		/* cache */
+-		CCR, REG_VAL_PUT(CCR_CACHEINVALIDSIZE, 64),
+-
+ 		/* reset volume */
+ 		VTFT, vtarget | vp->ftarget,
+ 		CVCF, vtarget | CVCF_CURRENTFILTER_MASK,
+diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
+index 7adc1d373d65c..27848d6469636 100644
+--- a/sound/pci/hda/cs35l56_hda.c
++++ b/sound/pci/hda/cs35l56_hda.c
+@@ -978,14 +978,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int id)
+ 	pm_runtime_mark_last_busy(cs35l56->base.dev);
+ 	pm_runtime_enable(cs35l56->base.dev);
+ 
++	cs35l56->base.init_done = true;
++
+ 	ret = component_add(cs35l56->base.dev, &cs35l56_hda_comp_ops);
+ 	if (ret) {
+ 		dev_err(cs35l56->base.dev, "Register component failed: %d\n", ret);
+ 		goto pm_err;
+ 	}
+ 
+-	cs35l56->base.init_done = true;
+-
+ 	return 0;
+ 
+ pm_err:
+diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c
+index 757a4d193e0fb..c31f60b0421e5 100644
+--- a/sound/pci/hda/cs35l56_hda_i2c.c
++++ b/sound/pci/hda/cs35l56_hda_i2c.c
+@@ -49,10 +49,19 @@ static const struct i2c_device_id cs35l56_hda_i2c_id[] = {
+ 	{}
+ };
+ 
++static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
++	{ "CSC3554", 0 },
++	{ "CSC3556", 0 },
++	{ "CSC3557", 0 },
++	{}
++};
++MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
++
+ static struct i2c_driver cs35l56_hda_i2c_driver = {
+ 	.driver = {
+-		.name		= "cs35l56-hda",
+-		.pm		= &cs35l56_hda_pm_ops,
++		.name		  = "cs35l56-hda",
++		.acpi_match_table = cs35l56_acpi_hda_match,
++		.pm		  = &cs35l56_hda_pm_ops,
+ 	},
+ 	.id_table	= cs35l56_hda_i2c_id,
+ 	.probe		= cs35l56_hda_i2c_probe,
+diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c
+index 756aec342eab7..52c9e04b3c55f 100644
+--- a/sound/pci/hda/cs35l56_hda_spi.c
++++ b/sound/pci/hda/cs35l56_hda_spi.c
+@@ -49,10 +49,19 @@ static const struct spi_device_id cs35l56_hda_spi_id[] = {
+ 	{}
+ };
+ 
++static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
++	{ "CSC3554", 0 },
++	{ "CSC3556", 0 },
++	{ "CSC3557", 0 },
++	{}
++};
++MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
++
+ static struct spi_driver cs35l56_hda_spi_driver = {
+ 	.driver = {
+-		.name		= "cs35l56-hda",
+-		.pm		= &cs35l56_hda_pm_ops,
++		.name		  = "cs35l56-hda",
++		.acpi_match_table = cs35l56_acpi_hda_match,
++		.pm		  = &cs35l56_hda_pm_ops,
+ 	},
+ 	.id_table	= cs35l56_hda_spi_id,
+ 	.probe		= cs35l56_hda_spi_probe,
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 0db9326b6f844..b1c2fb43cab69 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10072,7 +10072,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+ 	SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+ 	SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+-	SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
++	SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
+ 	SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
+ 	SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP),
+@@ -10302,6 +10302,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+ 	SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+ 	SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
++	SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
+diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c
+index a32c14a109b77..223238f662f83 100644
+--- a/sound/soc/amd/acp/acp-pci.c
++++ b/sound/soc/amd/acp/acp-pci.c
+@@ -107,7 +107,10 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
+ 		goto unregister_dmic_dev;
+ 	}
+ 
+-	acp_init(chip);
++	ret = acp_init(chip);
++	if (ret)
++		goto unregister_dmic_dev;
++
+ 	res = devm_kcalloc(&pci->dev, num_res, sizeof(struct resource), GFP_KERNEL);
+ 	if (!res) {
+ 		ret = -ENOMEM;
+diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c
+index e67c2e19cb1a7..1fdbef5fd6cba 100644
+--- a/sound/soc/codecs/rt5682-sdw.c
++++ b/sound/soc/codecs/rt5682-sdw.c
+@@ -763,12 +763,12 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev)
+ 		return 0;
+ 
+ 	if (!slave->unattach_request) {
++		mutex_lock(&rt5682->disable_irq_lock);
+ 		if (rt5682->disable_irq == true) {
+-			mutex_lock(&rt5682->disable_irq_lock);
+ 			sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
+ 			rt5682->disable_irq = false;
+-			mutex_unlock(&rt5682->disable_irq_lock);
+ 		}
++		mutex_unlock(&rt5682->disable_irq_lock);
+ 		goto regmap_sync;
+ 	}
+ 
+diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c
+index 935e597022d32..b8471b2d8f4f1 100644
+--- a/sound/soc/codecs/rt711-sdca-sdw.c
++++ b/sound/soc/codecs/rt711-sdca-sdw.c
+@@ -438,13 +438,13 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev)
+ 		return 0;
+ 
+ 	if (!slave->unattach_request) {
++		mutex_lock(&rt711->disable_irq_lock);
+ 		if (rt711->disable_irq == true) {
+-			mutex_lock(&rt711->disable_irq_lock);
+ 			sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
+ 			sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+ 			rt711->disable_irq = false;
+-			mutex_unlock(&rt711->disable_irq_lock);
+ 		}
++		mutex_unlock(&rt711->disable_irq_lock);
+ 		goto regmap_sync;
+ 	}
+ 
+diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c
+index 3f5773310ae8c..988451f24a756 100644
+--- a/sound/soc/codecs/rt711-sdw.c
++++ b/sound/soc/codecs/rt711-sdw.c
+@@ -536,12 +536,12 @@ static int __maybe_unused rt711_dev_resume(struct device *dev)
+ 		return 0;
+ 
+ 	if (!slave->unattach_request) {
++		mutex_lock(&rt711->disable_irq_lock);
+ 		if (rt711->disable_irq == true) {
+-			mutex_lock(&rt711->disable_irq_lock);
+ 			sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
+ 			rt711->disable_irq = false;
+-			mutex_unlock(&rt711->disable_irq_lock);
+ 		}
++		mutex_unlock(&rt711->disable_irq_lock);
+ 		goto regmap_sync;
+ 	}
+ 
+diff --git a/sound/soc/codecs/rt712-sdca-sdw.c b/sound/soc/codecs/rt712-sdca-sdw.c
+index 6b644a89c5890..ba877432cea61 100644
+--- a/sound/soc/codecs/rt712-sdca-sdw.c
++++ b/sound/soc/codecs/rt712-sdca-sdw.c
+@@ -438,13 +438,14 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev)
+ 		return 0;
+ 
+ 	if (!slave->unattach_request) {
++		mutex_lock(&rt712->disable_irq_lock);
+ 		if (rt712->disable_irq == true) {
+-			mutex_lock(&rt712->disable_irq_lock);
++
+ 			sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
+ 			sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+ 			rt712->disable_irq = false;
+-			mutex_unlock(&rt712->disable_irq_lock);
+ 		}
++		mutex_unlock(&rt712->disable_irq_lock);
+ 		goto regmap_sync;
+ 	}
+ 
+diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c
+index a38ec58622145..43a4e79e56966 100644
+--- a/sound/soc/codecs/rt722-sdca-sdw.c
++++ b/sound/soc/codecs/rt722-sdca-sdw.c
+@@ -464,13 +464,13 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev)
+ 		return 0;
+ 
+ 	if (!slave->unattach_request) {
++		mutex_lock(&rt722->disable_irq_lock);
+ 		if (rt722->disable_irq == true) {
+-			mutex_lock(&rt722->disable_irq_lock);
+ 			sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6);
+ 			sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+ 			rt722->disable_irq = false;
+-			mutex_unlock(&rt722->disable_irq_lock);
+ 		}
++		mutex_unlock(&rt722->disable_irq_lock);
+ 		goto regmap_sync;
+ 	}
+ 
+diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
+index 72b90a7ee4b68..b9c20e29fe63e 100644
+--- a/sound/soc/codecs/wm_adsp.c
++++ b/sound/soc/codecs/wm_adsp.c
+@@ -683,11 +683,12 @@ static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl)
+ int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type,
+ 		      unsigned int alg, void *buf, size_t len)
+ {
+-	struct cs_dsp_coeff_ctl *cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
++	struct cs_dsp_coeff_ctl *cs_ctl;
+ 	struct wm_coeff_ctl *ctl;
+ 	int ret;
+ 
+ 	mutex_lock(&dsp->cs_dsp.pwr_lock);
++	cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
+ 	ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len);
+ 	mutex_unlock(&dsp->cs_dsp.pwr_lock);
+ 
+diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
+index 2d25748ca7066..b27e89ff6a167 100644
+--- a/sound/soc/soc-ops.c
++++ b/sound/soc/soc-ops.c
+@@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol,
+ 	int max = mc->max;
+ 	int min = mc->min;
+ 	int sign_bit = mc->sign_bit;
+-	unsigned int mask = (1 << fls(max)) - 1;
++	unsigned int mask = (1ULL << fls(max)) - 1;
+ 	unsigned int invert = mc->invert;
+ 	int val;
+ 	int ret;
+diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
+index 4c54ce212de6a..cc006d7038d97 100644
+--- a/sound/soc/sof/amd/acp.c
++++ b/sound/soc/sof/amd/acp.c
+@@ -522,6 +522,10 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
+ 		goto unregister_dev;
+ 	}
+ 
++	ret = acp_init(sdev);
++	if (ret < 0)
++		goto free_smn_dev;
++
+ 	sdev->ipc_irq = pci->irq;
+ 	ret = request_threaded_irq(sdev->ipc_irq, acp_irq_handler, acp_irq_thread,
+ 				   IRQF_SHARED, "AudioDSP", sdev);
+@@ -531,10 +535,6 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
+ 		goto free_smn_dev;
+ 	}
+ 
+-	ret = acp_init(sdev);
+-	if (ret < 0)
+-		goto free_ipc_irq;
+-
+ 	sdev->dsp_box.offset = 0;
+ 	sdev->dsp_box.size = BOX_SIZE_512;
+ 
+diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
+index 798e60b5454b7..845a4023ba44e 100644
+--- a/tools/arch/x86/include/asm/cpufeatures.h
++++ b/tools/arch/x86/include/asm/cpufeatures.h
+@@ -219,7 +219,7 @@
+ #define X86_FEATURE_IBRS		( 7*32+25) /* Indirect Branch Restricted Speculation */
+ #define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
+ #define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
+-#define X86_FEATURE_ZEN			(7*32+28) /* "" CPU based on Zen microarchitecture */
++#define X86_FEATURE_ZEN			( 7*32+28) /* "" Generic flag for all Zen and newer */
+ #define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* "" L1TF workaround PTE inversion */
+ #define X86_FEATURE_IBRS_ENHANCED	( 7*32+30) /* Enhanced IBRS */
+ #define X86_FEATURE_MSR_IA32_FEAT_CTL	( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
+diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
+index 897af958cee85..575b7e248e521 100755
+--- a/tools/net/ynl/ynl-gen-c.py
++++ b/tools/net/ynl/ynl-gen-c.py
+@@ -198,8 +198,11 @@ class Type(SpecAttr):
+         presence = ''
+         for i in range(0, len(ref)):
+             presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
+-            if self.presence_type() == 'bit':
+-                code.append(presence + ' = 1;')
++            # Every layer below last is a nest, so we know it uses bit presence
++            # last layer is "self" and may be a complex type
++            if i == len(ref) - 1 and self.presence_type() != 'bit':
++                continue
++            code.append(presence + ' = 1;')
+         code += self._setter_lines(ri, member, presence)
+ 
+         func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
+diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
+index c7fa61f0dff8d..0c603bec5e209 100644
+--- a/tools/testing/selftests/mm/vm_util.h
++++ b/tools/testing/selftests/mm/vm_util.h
+@@ -3,7 +3,7 @@
+ #include <stdbool.h>
+ #include <sys/mman.h>
+ #include <err.h>
+-#include <string.h> /* ffsl() */
++#include <strings.h> /* ffsl() */
+ #include <unistd.h> /* _SC_PAGESIZE */
+ 
+ #define BIT_ULL(nr)                   (1ULL << (nr))
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+index 3b971d1617d81..7647c74adb26c 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+@@ -1,6 +1,11 @@
+ #!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+ 
++# Double quotes to prevent globbing and word splitting is recommended in new
++# code but we accept it, especially because there were too many before having
++# address all other issues detected by shellcheck.
++#shellcheck disable=SC2086
++
+ . "$(dirname "${0}")/mptcp_lib.sh"
+ 
+ time_start=$(date +%s)
+@@ -13,7 +18,6 @@ sout=""
+ cin_disconnect=""
+ cin=""
+ cout=""
+-ksft_skip=4
+ capture=false
+ timeout_poll=30
+ timeout_test=$((timeout_poll * 2 + 1))
+@@ -131,6 +135,8 @@ ns4="ns4-$rndh"
+ TEST_COUNT=0
+ TEST_GROUP=""
+ 
++# This function is used in the cleanup trap
++#shellcheck disable=SC2317
+ cleanup()
+ {
+ 	rm -f "$cin_disconnect" "$cout_disconnect"
+@@ -225,8 +231,9 @@ set_ethtool_flags() {
+ 	local dev="$2"
+ 	local flags="$3"
+ 
+-	ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
+-	[ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
++	if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then
++		echo "INFO: set $ns dev $dev: ethtool -K $flags"
++	fi
+ }
+ 
+ set_random_ethtool_flags() {
+@@ -363,7 +370,7 @@ do_transfer()
+ 	local extra_args="$7"
+ 
+ 	local port
+-	port=$((10000+$TEST_COUNT))
++	port=$((10000+TEST_COUNT))
+ 	TEST_COUNT=$((TEST_COUNT+1))
+ 
+ 	if [ "$rcvbuf" -gt 0 ]; then
+@@ -420,12 +427,20 @@ do_transfer()
+ 			nstat -n
+ 	fi
+ 
+-	local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+-	local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+-	local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+-	local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+-	local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+-	local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
++	local stat_synrx_last_l
++	local stat_ackrx_last_l
++	local stat_cookietx_last
++	local stat_cookierx_last
++	local stat_csum_err_s
++	local stat_csum_err_c
++	local stat_tcpfb_last_l
++	stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
++	stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
++	stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
++	stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
++	stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
++	stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
++	stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
+ 
+ 	timeout ${timeout_test} \
+ 		ip netns exec ${listener_ns} \
+@@ -488,11 +503,18 @@ do_transfer()
+ 	check_transfer $cin $sout "file received by server"
+ 	rets=$?
+ 
+-	local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+-	local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+-	local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+-	local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+-	local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
++	local stat_synrx_now_l
++	local stat_ackrx_now_l
++	local stat_cookietx_now
++	local stat_cookierx_now
++	local stat_ooo_now
++	local stat_tcpfb_now_l
++	stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
++	stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
++	stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
++	stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
++	stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
++	stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
+ 
+ 	expect_synrx=$((stat_synrx_last_l))
+ 	expect_ackrx=$((stat_ackrx_last_l))
+@@ -501,8 +523,8 @@ do_transfer()
+ 	cookies=${cookies##*=}
+ 
+ 	if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
+-		expect_synrx=$((stat_synrx_last_l+$connect_per_transfer))
+-		expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer))
++		expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
++		expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
+ 	fi
+ 
+ 	if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
+@@ -510,7 +532,7 @@ do_transfer()
+ 			"${stat_synrx_now_l}" "${expect_synrx}" 1>&2
+ 		retc=1
+ 	fi
+-	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
++	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
+ 		if [ ${stat_ooo_now} -eq 0 ]; then
+ 			printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
+ 				"${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
+@@ -521,18 +543,20 @@ do_transfer()
+ 	fi
+ 
+ 	if $checksum; then
+-		local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+-		local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
++		local csum_err_s
++		local csum_err_c
++		csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
++		csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ 
+ 		local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
+ 		if [ $csum_err_s_nr -gt 0 ]; then
+-			printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
++			printf "[ FAIL ]\nserver got %d data checksum error[s]" ${csum_err_s_nr}
+ 			rets=1
+ 		fi
+ 
+ 		local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
+ 		if [ $csum_err_c_nr -gt 0 ]; then
+-			printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
++			printf "[ FAIL ]\nclient got %d data checksum error[s]" ${csum_err_c_nr}
+ 			retc=1
+ 		fi
+ 	fi
+@@ -544,6 +568,11 @@ do_transfer()
+ 		mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+ 	fi
+ 
++	if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
++		mptcp_lib_pr_fail "unexpected fallback to TCP"
++		rets=1
++	fi
++
+ 	if [ $cookies -eq 2 ];then
+ 		if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+ 			printf " WARN: CookieSent: did not advance"
+@@ -701,7 +730,7 @@ run_test_transparent()
+ 		return
+ 	fi
+ 
+-ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
++	if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
+ flush ruleset
+ table inet mangle {
+ 	chain divert {
+@@ -712,7 +741,7 @@ table inet mangle {
+ 	}
+ }
+ EOF
+-	if [ $? -ne 0 ]; then
++	then
+ 		echo "SKIP: $msg, could not load nft ruleset"
+ 		mptcp_lib_fail_if_expected_feature "nft rules"
+ 		mptcp_lib_result_skip "${TEST_GROUP}"
+@@ -727,8 +756,7 @@ EOF
+ 		local_addr="0.0.0.0"
+ 	fi
+ 
+-	ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
+-	if [ $? -ne 0 ]; then
++	if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then
+ 		ip netns exec "$listener_ns" nft flush ruleset
+ 		echo "SKIP: $msg, ip $r6flag rule failed"
+ 		mptcp_lib_fail_if_expected_feature "ip rule"
+@@ -736,8 +764,7 @@ EOF
+ 		return
+ 	fi
+ 
+-	ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
+-	if [ $? -ne 0 ]; then
++	if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then
+ 		ip netns exec "$listener_ns" nft flush ruleset
+ 		ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+ 		echo "SKIP: $msg, ip route add local $local_addr failed"
+@@ -900,7 +927,7 @@ stop_if_error "Could not even run ping tests"
+ echo -n "INFO: Using loss of $tc_loss "
+ test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+ 
+-reorder_delay=$(($tc_delay / 4))
++reorder_delay=$((tc_delay / 4))
+ 
+ if [ -z "${tc_reorder}" ]; then
+ 	reorder1=$((RANDOM%10))
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 34c3423469679..00cf4efac4c21 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -796,7 +796,7 @@ pm_nl_check_endpoint()
+ 			[ -n "$_flags" ]; flags="flags $_flags"
+ 			shift
+ 		elif [ $1 = "dev" ]; then
+-			[ -n "$2" ]; dev="dev $1"
++			[ -n "$2" ]; dev="dev $2"
+ 			shift
+ 		elif [ $1 = "id" ]; then
+ 			_id=$2
+@@ -3507,6 +3507,8 @@ endpoint_tests()
+ 		local tests_pid=$!
+ 
+ 		wait_mpj $ns2
++		pm_nl_check_endpoint "creation" \
++			$ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
+ 		chk_subflow_nr "before delete" 2
+ 		chk_mptcp_info subflows 1 subflows 1
+ 
+diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
+index 7c5b12664b03b..bfb07dc495186 100644
+--- a/tools/testing/selftests/net/reuseaddr_conflict.c
++++ b/tools/testing/selftests/net/reuseaddr_conflict.c
+@@ -109,6 +109,6 @@ int main(void)
+ 	fd1 = open_port(0, 1);
+ 	if (fd1 >= 0)
+ 		error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+-	fprintf(stderr, "Success");
++	fprintf(stderr, "Success\n");
+ 	return 0;
+ }
+diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
+index 31e5f0f8859d1..be8e66abc74e1 100755
+--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
++++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
+@@ -984,6 +984,7 @@ encap_params_common()
+ 	local plen=$1; shift
+ 	local enc_ethtype=$1; shift
+ 	local grp=$1; shift
++	local grp_dmac=$1; shift
+ 	local src=$1; shift
+ 	local mz=$1; shift
+ 
+@@ -1002,11 +1003,11 @@ encap_params_common()
+ 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"
+ 
+ 	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Destination IP - match"
+ 
+-	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Destination IP - no match"
+ 
+@@ -1019,20 +1020,20 @@ encap_params_common()
+ 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"
+ 
+ 	run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ 	log_test $? 0 "Default destination port - match"
+ 
+-	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ 	log_test $? 0 "Default destination port - no match"
+ 
+ 	run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
+-	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ 	log_test $? 0 "Non-default destination port - match"
+ 
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ 	log_test $? 0 "Non-default destination port - no match"
+ 
+@@ -1045,11 +1046,11 @@ encap_params_common()
+ 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"
+ 
+ 	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Default destination VNI - match"
+ 
+-	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Default destination VNI - no match"
+ 
+@@ -1057,11 +1058,11 @@ encap_params_common()
+ 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"
+ 
+ 	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Non-default destination VNI - match"
+ 
+-	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Non-default destination VNI - no match"
+ 
+@@ -1079,6 +1080,7 @@ encap_params_ipv4_ipv4()
+ 	local plen=32
+ 	local enc_ethtype="ip"
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local src=192.0.2.129
+ 
+ 	echo
+@@ -1086,7 +1088,7 @@ encap_params_ipv4_ipv4()
+ 	echo "------------------------------------------------------------------"
+ 
+ 	encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+-		$grp $src "mausezahn"
++		$grp $grp_dmac $src "mausezahn"
+ }
+ 
+ encap_params_ipv6_ipv4()
+@@ -1098,6 +1100,7 @@ encap_params_ipv6_ipv4()
+ 	local plen=32
+ 	local enc_ethtype="ip"
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local src=2001:db8:100::1
+ 
+ 	echo
+@@ -1105,7 +1108,7 @@ encap_params_ipv6_ipv4()
+ 	echo "------------------------------------------------------------------"
+ 
+ 	encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+-		$grp $src "mausezahn -6"
++		$grp $grp_dmac $src "mausezahn -6"
+ }
+ 
+ encap_params_ipv4_ipv6()
+@@ -1117,6 +1120,7 @@ encap_params_ipv4_ipv6()
+ 	local plen=128
+ 	local enc_ethtype="ipv6"
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local src=192.0.2.129
+ 
+ 	echo
+@@ -1124,7 +1128,7 @@ encap_params_ipv4_ipv6()
+ 	echo "------------------------------------------------------------------"
+ 
+ 	encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+-		$grp $src "mausezahn"
++		$grp $grp_dmac $src "mausezahn"
+ }
+ 
+ encap_params_ipv6_ipv6()
+@@ -1136,6 +1140,7 @@ encap_params_ipv6_ipv6()
+ 	local plen=128
+ 	local enc_ethtype="ipv6"
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local src=2001:db8:100::1
+ 
+ 	echo
+@@ -1143,7 +1148,7 @@ encap_params_ipv6_ipv6()
+ 	echo "------------------------------------------------------------------"
+ 
+ 	encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+-		$grp $src "mausezahn -6"
++		$grp $grp_dmac $src "mausezahn -6"
+ }
+ 
+ starg_exclude_ir_common()
+@@ -1154,6 +1159,7 @@ starg_exclude_ir_common()
+ 	local vtep2_ip=$1; shift
+ 	local plen=$1; shift
+ 	local grp=$1; shift
++	local grp_dmac=$1; shift
+ 	local valid_src=$1; shift
+ 	local invalid_src=$1; shift
+ 	local mz=$1; shift
+@@ -1175,14 +1181,14 @@ starg_exclude_ir_common()
+ 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"
+ 
+ 	# Check that invalid source is not forwarded to any VTEP.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ 	log_test $? 0 "Block excluded source - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ 	log_test $? 0 "Block excluded source - second VTEP"
+ 
+ 	# Check that valid source is forwarded to both VTEPs.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Forward valid source - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1192,14 +1198,14 @@ starg_exclude_ir_common()
+ 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+ 
+ 	# Check that invalid source is not forwarded to any VTEP.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Block excluded source after removal - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ 	log_test $? 0 "Block excluded source after removal - second VTEP"
+ 
+ 	# Check that valid source is forwarded to the remaining VTEP.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ 	log_test $? 0 "Forward valid source after removal - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1214,6 +1220,7 @@ starg_exclude_ir_ipv4_ipv4()
+ 	local vtep2_ip=198.51.100.200
+ 	local plen=32
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local valid_src=192.0.2.129
+ 	local invalid_src=192.0.2.145
+ 
+@@ -1222,7 +1229,7 @@ starg_exclude_ir_ipv4_ipv4()
+ 	echo "-------------------------------------------------------------"
+ 
+ 	starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+-		$valid_src $invalid_src "mausezahn"
++		$grp_dmac $valid_src $invalid_src "mausezahn"
+ }
+ 
+ starg_exclude_ir_ipv6_ipv4()
+@@ -1233,6 +1240,7 @@ starg_exclude_ir_ipv6_ipv4()
+ 	local vtep2_ip=198.51.100.200
+ 	local plen=32
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local valid_src=2001:db8:100::1
+ 	local invalid_src=2001:db8:200::1
+ 
+@@ -1241,7 +1249,7 @@ starg_exclude_ir_ipv6_ipv4()
+ 	echo "-------------------------------------------------------------"
+ 
+ 	starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+-		$valid_src $invalid_src "mausezahn -6"
++		$grp_dmac $valid_src $invalid_src "mausezahn -6"
+ }
+ 
+ starg_exclude_ir_ipv4_ipv6()
+@@ -1252,6 +1260,7 @@ starg_exclude_ir_ipv4_ipv6()
+ 	local vtep2_ip=2001:db8:2000::1
+ 	local plen=128
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local valid_src=192.0.2.129
+ 	local invalid_src=192.0.2.145
+ 
+@@ -1260,7 +1269,7 @@ starg_exclude_ir_ipv4_ipv6()
+ 	echo "-------------------------------------------------------------"
+ 
+ 	starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+-		$valid_src $invalid_src "mausezahn"
++		$grp_dmac $valid_src $invalid_src "mausezahn"
+ }
+ 
+ starg_exclude_ir_ipv6_ipv6()
+@@ -1271,6 +1280,7 @@ starg_exclude_ir_ipv6_ipv6()
+ 	local vtep2_ip=2001:db8:2000::1
+ 	local plen=128
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local valid_src=2001:db8:100::1
+ 	local invalid_src=2001:db8:200::1
+ 
+@@ -1279,7 +1289,7 @@ starg_exclude_ir_ipv6_ipv6()
+ 	echo "-------------------------------------------------------------"
+ 
+ 	starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+-		$valid_src $invalid_src "mausezahn -6"
++		$grp_dmac $valid_src $invalid_src "mausezahn -6"
+ }
+ 
+ starg_include_ir_common()
+@@ -1290,6 +1300,7 @@ starg_include_ir_common()
+ 	local vtep2_ip=$1; shift
+ 	local plen=$1; shift
+ 	local grp=$1; shift
++	local grp_dmac=$1; shift
+ 	local valid_src=$1; shift
+ 	local invalid_src=$1; shift
+ 	local mz=$1; shift
+@@ -1311,14 +1322,14 @@ starg_include_ir_common()
+ 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"
+ 
+ 	# Check that invalid source is not forwarded to any VTEP.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ 	log_test $? 0 "Block excluded source - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ 	log_test $? 0 "Block excluded source - second VTEP"
+ 
+ 	# Check that valid source is forwarded to both VTEPs.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Forward valid source - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1328,14 +1339,14 @@ starg_include_ir_common()
+ 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+ 
+ 	# Check that invalid source is not forwarded to any VTEP.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Block excluded source after removal - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ 	log_test $? 0 "Block excluded source after removal - second VTEP"
+ 
+ 	# Check that valid source is forwarded to the remaining VTEP.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ 	log_test $? 0 "Forward valid source after removal - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1350,6 +1361,7 @@ starg_include_ir_ipv4_ipv4()
+ 	local vtep2_ip=198.51.100.200
+ 	local plen=32
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local valid_src=192.0.2.129
+ 	local invalid_src=192.0.2.145
+ 
+@@ -1358,7 +1370,7 @@ starg_include_ir_ipv4_ipv4()
+ 	echo "-------------------------------------------------------------"
+ 
+ 	starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+-		$valid_src $invalid_src "mausezahn"
++		$grp_dmac $valid_src $invalid_src "mausezahn"
+ }
+ 
+ starg_include_ir_ipv6_ipv4()
+@@ -1369,6 +1381,7 @@ starg_include_ir_ipv6_ipv4()
+ 	local vtep2_ip=198.51.100.200
+ 	local plen=32
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local valid_src=2001:db8:100::1
+ 	local invalid_src=2001:db8:200::1
+ 
+@@ -1377,7 +1390,7 @@ starg_include_ir_ipv6_ipv4()
+ 	echo "-------------------------------------------------------------"
+ 
+ 	starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+-		$valid_src $invalid_src "mausezahn -6"
++		$grp_dmac $valid_src $invalid_src "mausezahn -6"
+ }
+ 
+ starg_include_ir_ipv4_ipv6()
+@@ -1388,6 +1401,7 @@ starg_include_ir_ipv4_ipv6()
+ 	local vtep2_ip=2001:db8:2000::1
+ 	local plen=128
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local valid_src=192.0.2.129
+ 	local invalid_src=192.0.2.145
+ 
+@@ -1396,7 +1410,7 @@ starg_include_ir_ipv4_ipv6()
+ 	echo "-------------------------------------------------------------"
+ 
+ 	starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+-		$valid_src $invalid_src "mausezahn"
++		$grp_dmac $valid_src $invalid_src "mausezahn"
+ }
+ 
+ starg_include_ir_ipv6_ipv6()
+@@ -1407,6 +1421,7 @@ starg_include_ir_ipv6_ipv6()
+ 	local vtep2_ip=2001:db8:2000::1
+ 	local plen=128
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local valid_src=2001:db8:100::1
+ 	local invalid_src=2001:db8:200::1
+ 
+@@ -1415,7 +1430,7 @@ starg_include_ir_ipv6_ipv6()
+ 	echo "-------------------------------------------------------------"
+ 
+ 	starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+-		$valid_src $invalid_src "mausezahn -6"
++		$grp_dmac $valid_src $invalid_src "mausezahn -6"
+ }
+ 
+ starg_exclude_p2mp_common()
+@@ -1425,6 +1440,7 @@ starg_exclude_p2mp_common()
+ 	local mcast_grp=$1; shift
+ 	local plen=$1; shift
+ 	local grp=$1; shift
++	local grp_dmac=$1; shift
+ 	local valid_src=$1; shift
+ 	local invalid_src=$1; shift
+ 	local mz=$1; shift
+@@ -1442,12 +1458,12 @@ starg_exclude_p2mp_common()
+ 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"
+ 
+ 	# Check that invalid source is not forwarded.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ 	log_test $? 0 "Block excluded source"
+ 
+ 	# Check that valid source is forwarded.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Forward valid source"
+ 
+@@ -1455,7 +1471,7 @@ starg_exclude_p2mp_common()
+ 	run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+ 
+ 	# Check that valid source is not received anymore.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Receive of valid source after removal from group"
+ }
+@@ -1467,6 +1483,7 @@ starg_exclude_p2mp_ipv4_ipv4()
+ 	local mcast_grp=238.1.1.1
+ 	local plen=32
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local valid_src=192.0.2.129
+ 	local invalid_src=192.0.2.145
+ 
+@@ -1474,7 +1491,7 @@ starg_exclude_p2mp_ipv4_ipv4()
+ 	echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+ 	echo "---------------------------------------------------------------"
+ 
+-	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ 		$valid_src $invalid_src "mausezahn"
+ }
+ 
+@@ -1485,6 +1502,7 @@ starg_exclude_p2mp_ipv6_ipv4()
+ 	local mcast_grp=238.1.1.1
+ 	local plen=32
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local valid_src=2001:db8:100::1
+ 	local invalid_src=2001:db8:200::1
+ 
+@@ -1492,7 +1510,7 @@ starg_exclude_p2mp_ipv6_ipv4()
+ 	echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+ 	echo "---------------------------------------------------------------"
+ 
+-	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ 		$valid_src $invalid_src "mausezahn -6"
+ }
+ 
+@@ -1503,6 +1521,7 @@ starg_exclude_p2mp_ipv4_ipv6()
+ 	local mcast_grp=ff0e::2
+ 	local plen=128
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local valid_src=192.0.2.129
+ 	local invalid_src=192.0.2.145
+ 
+@@ -1510,7 +1529,7 @@ starg_exclude_p2mp_ipv4_ipv6()
+ 	echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+ 	echo "---------------------------------------------------------------"
+ 
+-	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ 		$valid_src $invalid_src "mausezahn"
+ }
+ 
+@@ -1521,6 +1540,7 @@ starg_exclude_p2mp_ipv6_ipv6()
+ 	local mcast_grp=ff0e::2
+ 	local plen=128
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local valid_src=2001:db8:100::1
+ 	local invalid_src=2001:db8:200::1
+ 
+@@ -1528,7 +1548,7 @@ starg_exclude_p2mp_ipv6_ipv6()
+ 	echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+ 	echo "---------------------------------------------------------------"
+ 
+-	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ 		$valid_src $invalid_src "mausezahn -6"
+ }
+ 
+@@ -1539,6 +1559,7 @@ starg_include_p2mp_common()
+ 	local mcast_grp=$1; shift
+ 	local plen=$1; shift
+ 	local grp=$1; shift
++	local grp_dmac=$1; shift
+ 	local valid_src=$1; shift
+ 	local invalid_src=$1; shift
+ 	local mz=$1; shift
+@@ -1556,12 +1577,12 @@ starg_include_p2mp_common()
+ 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"
+ 
+ 	# Check that invalid source is not forwarded.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ 	log_test $? 0 "Block excluded source"
+ 
+ 	# Check that valid source is forwarded.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Forward valid source"
+ 
+@@ -1569,7 +1590,7 @@ starg_include_p2mp_common()
+ 	run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+ 
+ 	# Check that valid source is not received anymore.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Receive of valid source after removal from group"
+ }
+@@ -1581,6 +1602,7 @@ starg_include_p2mp_ipv4_ipv4()
+ 	local mcast_grp=238.1.1.1
+ 	local plen=32
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local valid_src=192.0.2.129
+ 	local invalid_src=192.0.2.145
+ 
+@@ -1588,7 +1610,7 @@ starg_include_p2mp_ipv4_ipv4()
+ 	echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+ 	echo "---------------------------------------------------------------"
+ 
+-	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ 		$valid_src $invalid_src "mausezahn"
+ }
+ 
+@@ -1599,6 +1621,7 @@ starg_include_p2mp_ipv6_ipv4()
+ 	local mcast_grp=238.1.1.1
+ 	local plen=32
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local valid_src=2001:db8:100::1
+ 	local invalid_src=2001:db8:200::1
+ 
+@@ -1606,7 +1629,7 @@ starg_include_p2mp_ipv6_ipv4()
+ 	echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+ 	echo "---------------------------------------------------------------"
+ 
+-	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ 		$valid_src $invalid_src "mausezahn -6"
+ }
+ 
+@@ -1617,6 +1640,7 @@ starg_include_p2mp_ipv4_ipv6()
+ 	local mcast_grp=ff0e::2
+ 	local plen=128
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local valid_src=192.0.2.129
+ 	local invalid_src=192.0.2.145
+ 
+@@ -1624,7 +1648,7 @@ starg_include_p2mp_ipv4_ipv6()
+ 	echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+ 	echo "---------------------------------------------------------------"
+ 
+-	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ 		$valid_src $invalid_src "mausezahn"
+ }
+ 
+@@ -1635,6 +1659,7 @@ starg_include_p2mp_ipv6_ipv6()
+ 	local mcast_grp=ff0e::2
+ 	local plen=128
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local valid_src=2001:db8:100::1
+ 	local invalid_src=2001:db8:200::1
+ 
+@@ -1642,7 +1667,7 @@ starg_include_p2mp_ipv6_ipv6()
+ 	echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+ 	echo "---------------------------------------------------------------"
+ 
+-	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
++	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
+ 		$valid_src $invalid_src "mausezahn -6"
+ }
+ 
+@@ -1654,6 +1679,7 @@ egress_vni_translation_common()
+ 	local plen=$1; shift
+ 	local proto=$1; shift
+ 	local grp=$1; shift
++	local grp_dmac=$1; shift
+ 	local src=$1; shift
+ 	local mz=$1; shift
+ 
+@@ -1689,20 +1715,20 @@ egress_vni_translation_common()
+ 	# Make sure that packets sent from the first VTEP over VLAN 10 are
+ 	# received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
+ 	# the second VTEP, since it is configured as PVID.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+ 	log_test $? 0 "Egress VNI translation - PVID configured"
+ 
+ 	# Remove PVID flag from VLAN 4000 on the second VTEP and make sure
+ 	# packets are no longer received by the SVI interface.
+ 	run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+ 	log_test $? 0 "Egress VNI translation - no PVID configured"
+ 
+ 	# Reconfigure the PVID and make sure packets are received again.
+ 	run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
+ 	log_test $? 0 "Egress VNI translation - PVID reconfigured"
+ }
+@@ -1715,6 +1741,7 @@ egress_vni_translation_ipv4_ipv4()
+ 	local plen=32
+ 	local proto="ipv4"
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local src=192.0.2.129
+ 
+ 	echo
+@@ -1722,7 +1749,7 @@ egress_vni_translation_ipv4_ipv4()
+ 	echo "----------------------------------------------------------------"
+ 
+ 	egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+-		$src "mausezahn"
++		$grp_dmac $src "mausezahn"
+ }
+ 
+ egress_vni_translation_ipv6_ipv4()
+@@ -1733,6 +1760,7 @@ egress_vni_translation_ipv6_ipv4()
+ 	local plen=32
+ 	local proto="ipv6"
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local src=2001:db8:100::1
+ 
+ 	echo
+@@ -1740,7 +1768,7 @@ egress_vni_translation_ipv6_ipv4()
+ 	echo "----------------------------------------------------------------"
+ 
+ 	egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+-		$src "mausezahn -6"
++		$grp_dmac $src "mausezahn -6"
+ }
+ 
+ egress_vni_translation_ipv4_ipv6()
+@@ -1751,6 +1779,7 @@ egress_vni_translation_ipv4_ipv6()
+ 	local plen=128
+ 	local proto="ipv4"
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local src=192.0.2.129
+ 
+ 	echo
+@@ -1758,7 +1787,7 @@ egress_vni_translation_ipv4_ipv6()
+ 	echo "----------------------------------------------------------------"
+ 
+ 	egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+-		$src "mausezahn"
++		$grp_dmac $src "mausezahn"
+ }
+ 
+ egress_vni_translation_ipv6_ipv6()
+@@ -1769,6 +1798,7 @@ egress_vni_translation_ipv6_ipv6()
+ 	local plen=128
+ 	local proto="ipv6"
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local src=2001:db8:100::1
+ 
+ 	echo
+@@ -1776,7 +1806,7 @@ egress_vni_translation_ipv6_ipv6()
+ 	echo "----------------------------------------------------------------"
+ 
+ 	egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+-		$src "mausezahn -6"
++		$grp_dmac $src "mausezahn -6"
+ }
+ 
+ all_zeros_mdb_common()
+@@ -1789,12 +1819,18 @@ all_zeros_mdb_common()
+ 	local vtep4_ip=$1; shift
+ 	local plen=$1; shift
+ 	local ipv4_grp=239.1.1.1
++	local ipv4_grp_dmac=01:00:5e:01:01:01
+ 	local ipv4_unreg_grp=239.2.2.2
++	local ipv4_unreg_grp_dmac=01:00:5e:02:02:02
+ 	local ipv4_ll_grp=224.0.0.100
++	local ipv4_ll_grp_dmac=01:00:5e:00:00:64
+ 	local ipv4_src=192.0.2.129
+ 	local ipv6_grp=ff0e::1
++	local ipv6_grp_dmac=33:33:00:00:00:01
+ 	local ipv6_unreg_grp=ff0e::2
++	local ipv6_unreg_grp_dmac=33:33:00:00:00:02
+ 	local ipv6_ll_grp=ff02::1
++	local ipv6_ll_grp_dmac=33:33:00:00:00:01
+ 	local ipv6_src=2001:db8:100::1
+ 
+ 	# Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
+@@ -1830,7 +1866,7 @@ all_zeros_mdb_common()
+ 
+ 	# Send registered IPv4 multicast and make sure it only arrives to the
+ 	# first VTEP.
+-	run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "Registered IPv4 multicast - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+@@ -1838,7 +1874,7 @@ all_zeros_mdb_common()
+ 
+ 	# Send unregistered IPv4 multicast that is not link-local and make sure
+ 	# it arrives to the first and second VTEPs.
+-	run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ 	log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1846,7 +1882,7 @@ all_zeros_mdb_common()
+ 
+ 	# Send IPv4 link-local multicast traffic and make sure it does not
+ 	# arrive to any VTEP.
+-	run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ 	log_test $? 0 "Link-local IPv4 multicast - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+@@ -1881,7 +1917,7 @@ all_zeros_mdb_common()
+ 
+ 	# Send registered IPv6 multicast and make sure it only arrives to the
+ 	# third VTEP.
+-	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 103 1
+ 	log_test $? 0 "Registered IPv6 multicast - third VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 104 0
+@@ -1889,7 +1925,7 @@ all_zeros_mdb_common()
+ 
+ 	# Send unregistered IPv6 multicast that is not link-local and make sure
+ 	# it arrives to the third and fourth VTEPs.
+-	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ 	log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+@@ -1897,7 +1933,7 @@ all_zeros_mdb_common()
+ 
+ 	# Send IPv6 link-local multicast traffic and make sure it does not
+ 	# arrive to any VTEP.
+-	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ 	log_test $? 0 "Link-local IPv6 multicast - third VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+@@ -1972,6 +2008,7 @@ mdb_fdb_common()
+ 	local plen=$1; shift
+ 	local proto=$1; shift
+ 	local grp=$1; shift
++	local grp_dmac=$1; shift
+ 	local src=$1; shift
+ 	local mz=$1; shift
+ 
+@@ -1995,7 +2032,7 @@ mdb_fdb_common()
+ 
+ 	# Send IP multicast traffic and make sure it is forwarded by the MDB
+ 	# and only arrives to the first VTEP.
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "IP multicast - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+@@ -2012,7 +2049,7 @@ mdb_fdb_common()
+ 	# Remove the MDB entry and make sure that IP multicast is now forwarded
+ 	# by the FDB to the second VTEP.
+ 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+-	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
++	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ 	log_test $? 0 "IP multicast after removal - first VTEP"
+ 	tc_check_packets "$ns2" "dev vx0 ingress" 102 2
+@@ -2028,14 +2065,15 @@ mdb_fdb_ipv4_ipv4()
+ 	local plen=32
+ 	local proto="ipv4"
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local src=192.0.2.129
+ 
+ 	echo
+ 	echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
+ 	echo "------------------------------------------------------"
+ 
+-	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+-		"mausezahn"
++	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
++		$grp_dmac $src "mausezahn"
+ }
+ 
+ mdb_fdb_ipv6_ipv4()
+@@ -2047,14 +2085,15 @@ mdb_fdb_ipv6_ipv4()
+ 	local plen=32
+ 	local proto="ipv6"
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local src=2001:db8:100::1
+ 
+ 	echo
+ 	echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
+ 	echo "------------------------------------------------------"
+ 
+-	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+-		"mausezahn -6"
++	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
++		$grp_dmac $src "mausezahn -6"
+ }
+ 
+ mdb_fdb_ipv4_ipv6()
+@@ -2066,14 +2105,15 @@ mdb_fdb_ipv4_ipv6()
+ 	local plen=128
+ 	local proto="ipv4"
+ 	local grp=239.1.1.1
++	local grp_dmac=01:00:5e:01:01:01
+ 	local src=192.0.2.129
+ 
+ 	echo
+ 	echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
+ 	echo "------------------------------------------------------"
+ 
+-	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+-		"mausezahn"
++	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
++		$grp_dmac $src "mausezahn"
+ }
+ 
+ mdb_fdb_ipv6_ipv6()
+@@ -2085,14 +2125,15 @@ mdb_fdb_ipv6_ipv6()
+ 	local plen=128
+ 	local proto="ipv6"
+ 	local grp=ff0e::1
++	local grp_dmac=33:33:00:00:00:01
+ 	local src=2001:db8:100::1
+ 
+ 	echo
+ 	echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
+ 	echo "------------------------------------------------------"
+ 
+-	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+-		"mausezahn -6"
++	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
++		$grp_dmac $src "mausezahn -6"
+ }
+ 
+ mdb_grp1_loop()
+@@ -2127,7 +2168,9 @@ mdb_torture_common()
+ 	local vtep1_ip=$1; shift
+ 	local vtep2_ip=$1; shift
+ 	local grp1=$1; shift
++	local grp1_dmac=$1; shift
+ 	local grp2=$1; shift
++	local grp2_dmac=$1; shift
+ 	local src=$1; shift
+ 	local mz=$1; shift
+ 	local pid1
+@@ -2152,9 +2195,9 @@ mdb_torture_common()
+ 	pid1=$!
+ 	mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
+ 	pid2=$!
+-	ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
++	ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ 	pid3=$!
+-	ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
++	ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ 	pid4=$!
+ 
+ 	sleep 30
+@@ -2170,15 +2213,17 @@ mdb_torture_ipv4_ipv4()
+ 	local vtep1_ip=198.51.100.100
+ 	local vtep2_ip=198.51.100.200
+ 	local grp1=239.1.1.1
++	local grp1_dmac=01:00:5e:01:01:01
+ 	local grp2=239.2.2.2
++	local grp2_dmac=01:00:5e:02:02:02
+ 	local src=192.0.2.129
+ 
+ 	echo
+ 	echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
+ 	echo "----------------------------------------------------------"
+ 
+-	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+-		"mausezahn"
++	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
++		$grp2_dmac $src "mausezahn"
+ }
+ 
+ mdb_torture_ipv6_ipv4()
+@@ -2187,15 +2232,17 @@ mdb_torture_ipv6_ipv4()
+ 	local vtep1_ip=198.51.100.100
+ 	local vtep2_ip=198.51.100.200
+ 	local grp1=ff0e::1
++	local grp1_dmac=33:33:00:00:00:01
+ 	local grp2=ff0e::2
++	local grp2_dmac=33:33:00:00:00:02
+ 	local src=2001:db8:100::1
+ 
+ 	echo
+ 	echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
+ 	echo "----------------------------------------------------------"
+ 
+-	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+-		"mausezahn -6"
++	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
++		$grp2_dmac $src "mausezahn -6"
+ }
+ 
+ mdb_torture_ipv4_ipv6()
+@@ -2204,15 +2251,17 @@ mdb_torture_ipv4_ipv6()
+ 	local vtep1_ip=2001:db8:1000::1
+ 	local vtep2_ip=2001:db8:2000::1
+ 	local grp1=239.1.1.1
++	local grp1_dmac=01:00:5e:01:01:01
+ 	local grp2=239.2.2.2
++	local grp2_dmac=01:00:5e:02:02:02
+ 	local src=192.0.2.129
+ 
+ 	echo
+ 	echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
+ 	echo "----------------------------------------------------------"
+ 
+-	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+-		"mausezahn"
++	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
++		$grp2_dmac $src "mausezahn"
+ }
+ 
+ mdb_torture_ipv6_ipv6()
+@@ -2221,15 +2270,17 @@ mdb_torture_ipv6_ipv6()
+ 	local vtep1_ip=2001:db8:1000::1
+ 	local vtep2_ip=2001:db8:2000::1
+ 	local grp1=ff0e::1
++	local grp1_dmac=33:33:00:00:00:01
+ 	local grp2=ff0e::2
++	local grp2_dmac=33:33:00:00:00:02
+ 	local src=2001:db8:100::1
+ 
+ 	echo
+ 	echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
+ 	echo "----------------------------------------------------------"
+ 
+-	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+-		"mausezahn -6"
++	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
++		$grp2_dmac $src "mausezahn -6"
+ }
+ 
+ ################################################################################
+diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
+index 9cd5e885e91f7..f4549e6894dd9 100755
+--- a/tools/testing/selftests/net/udpgro_fwd.sh
++++ b/tools/testing/selftests/net/udpgro_fwd.sh
+@@ -241,7 +241,7 @@ for family in 4 6; do
+ 
+ 	create_vxlan_pair
+ 	ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+-	run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
++	run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
+ 	cleanup
+ 
+ 	# use NAT to circumvent GRO FWD check
+@@ -254,13 +254,7 @@ for family in 4 6; do
+ 	# load arp cache before running the test to reduce the amount of
+ 	# stray traffic on top of the UDP tunnel
+ 	ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
+-	run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
+-	cleanup
+-
+-	create_vxlan_pair
+-	run_bench "UDP tunnel fwd perf" $OL_NET$DST
+-	ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+-	run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
++	run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
+ 	cleanup
+ done
+
author	Mike Pagano <mpagano@gentoo.org>	2024-04-10 11:09:06 -0400
committer	Mike Pagano <mpagano@gentoo.org>	2024-04-10 11:09:06 -0400
commit	55f07c32363e30cdd7d8619be719408a3999d536 (patch)
tree	5a6ae7b94f222c31930b983aa3cc399ef645b16f
parent	Linux patch 6.6.25 (diff)
download	linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.tar.gz linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.tar.bz2 linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.zip