diff options
author | 2024-04-10 11:09:06 -0400 | |
---|---|---|
committer | 2024-04-10 11:09:06 -0400 | |
commit | 55f07c32363e30cdd7d8619be719408a3999d536 (patch) | |
tree | 5a6ae7b94f222c31930b983aa3cc399ef645b16f | |
parent | Linux patch 6.6.25 (diff) | |
download | linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.tar.gz linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.tar.bz2 linux-patches-55f07c32363e30cdd7d8619be719408a3999d536.zip |
Linux patch 6.6.266.6-32
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1025_linux-6.6.26.patch | 15825 |
2 files changed, 15829 insertions, 0 deletions
diff --git a/0000_README b/0000_README index cfd8c138..7661b44e 100644 --- a/0000_README +++ b/0000_README @@ -143,6 +143,10 @@ Patch: 1024_linux-6.6.25.patch From: https://www.kernel.org Desc: Linux 6.6.25 +Patch: 1025_linux-6.6.26.patch +From: https://www.kernel.org +Desc: Linux 6.6.26 + Patch: 1510_fs-enable-link-security-restrictions-by-default.patch From: http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch/ Desc: Enable link security restrictions by default. diff --git a/1025_linux-6.6.26.patch b/1025_linux-6.6.26.patch new file mode 100644 index 00000000..20a79d8e --- /dev/null +++ b/1025_linux-6.6.26.patch @@ -0,0 +1,15825 @@ +diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst +index 32a8893e56177..9edb2860a3e19 100644 +--- a/Documentation/admin-guide/hw-vuln/spectre.rst ++++ b/Documentation/admin-guide/hw-vuln/spectre.rst +@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically, + the BHB might be shared across privilege levels even in the presence of + Enhanced IBRS. + +-Currently the only known real-world BHB attack vector is via +-unprivileged eBPF. Therefore, it's highly recommended to not enable +-unprivileged eBPF, especially when eIBRS is used (without retpolines). +-For a full mitigation against BHB attacks, it's recommended to use +-retpolines (or eIBRS combined with retpolines). ++Previously the only known real-world BHB attack vector was via unprivileged ++eBPF. Further research has found attacks that don't require unprivileged eBPF. ++For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or ++use the BHB clearing sequence. + + Attack scenarios + ---------------- +@@ -430,6 +429,23 @@ The possible values in this file are: + 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB + =========================== ======================================================= + ++ - Branch History Injection (BHI) protection status: ++ ++.. list-table:: ++ ++ * - BHI: Not affected ++ - System is not affected ++ * - BHI: Retpoline ++ - System is protected by retpoline ++ * - BHI: BHI_DIS_S ++ - System is protected by BHI_DIS_S ++ * - BHI: SW loop; KVM SW loop ++ - System is protected by software clearing sequence ++ * - BHI: Syscall hardening ++ - Syscalls are hardened against BHI ++ * - BHI: Syscall hardening; KVM: SW loop ++ - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence ++ + Full mitigation might require a microcode update from the CPU + vendor. When the necessary microcode is not available, the kernel will + report vulnerability. +@@ -484,7 +500,11 @@ Spectre variant 2 + + Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at + boot, by setting the IBRS bit, and they're automatically protected against +- Spectre v2 variant attacks. ++ some Spectre v2 variant attacks. The BHB can still influence the choice of ++ indirect branch predictor entry, and although branch predictor entries are ++ isolated between modes when eIBRS is enabled, the BHB itself is not isolated ++ between modes. Systems which support BHI_DIS_S will set it to protect against ++ BHI attacks. + + On Intel's enhanced IBRS systems, this includes cross-thread branch target + injections on SMT systems (STIBP). In other words, Intel eIBRS enables +@@ -638,6 +658,22 @@ kernel command line. + spectre_v2=off. Spectre variant 1 mitigations + cannot be disabled. + ++ spectre_bhi= ++ ++ [X86] Control mitigation of Branch History Injection ++ (BHI) vulnerability. Syscalls are hardened against BHI ++ regardless of this setting. This setting affects the deployment ++ of the HW BHI control and the SW BHB clearing sequence. ++ ++ on ++ unconditionally enable. ++ off ++ unconditionally disable. ++ auto ++ enable if hardware mitigation ++ control(BHI_DIS_S) is available, otherwise ++ enable alternate mitigation in KVM. ++ + For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt + + Mitigation selection guide +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 7a36124dde5e5..61199466c0437 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5920,6 +5920,18 @@ + sonypi.*= [HW] Sony Programmable I/O Control Device driver + See Documentation/admin-guide/laptops/sonypi.rst + ++ spectre_bhi= [X86] Control mitigation of Branch History Injection ++ (BHI) vulnerability. Syscalls are hardened against BHI ++ reglardless of this setting. This setting affects the ++ deployment of the HW BHI control and the SW BHB ++ clearing sequence. ++ ++ on - unconditionally enable. ++ off - unconditionally disable. ++ auto - (default) enable hardware mitigation ++ (BHI_DIS_S) if available, otherwise enable ++ alternate mitigation in KVM. ++ + spectre_v2= [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability. + The default operation protects the kernel from +diff --git a/Makefile b/Makefile +index 022af2a9a6d9b..77ad41bd298e0 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 6 + PATCHLEVEL = 6 +-SUBLEVEL = 25 ++SUBLEVEL = 26 + EXTRAVERSION = + NAME = Hurr durr I'ma ninja sloth + +diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +index 5a33e16a8b677..c2f5e9f6679d6 100644 +--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi ++++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +@@ -970,6 +970,8 @@ bluetooth: bluetooth { + vddrf-supply = <&pp1300_l2c>; + vddch0-supply = <&pp3300_l10c>; + max-speed = <3200000>; ++ ++ qcom,local-bd-address-broken; + }; + }; + +diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c +index c94c0f8c9a737..d95416b93a9dd 100644 +--- a/arch/arm64/kernel/ptrace.c ++++ b/arch/arm64/kernel/ptrace.c +@@ -728,7 +728,6 @@ static void sve_init_header_from_task(struct user_sve_header *header, + { + unsigned int vq; + bool active; +- bool fpsimd_only; + enum vec_type task_type; + + memset(header, 0, sizeof(*header)); +@@ -744,12 +743,10 @@ static void sve_init_header_from_task(struct user_sve_header *header, + case ARM64_VEC_SVE: + if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; +- fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE); + break; + case ARM64_VEC_SME: + if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) + header->flags |= SVE_PT_VL_INHERIT; +- fpsimd_only = false; + break; + default: + WARN_ON_ONCE(1); +@@ -757,7 +754,7 @@ static void sve_init_header_from_task(struct user_sve_header *header, + } + + if (active) { +- if (fpsimd_only) { ++ if (target->thread.fp_type == FP_STATE_FPSIMD) { + header->flags |= SVE_PT_REGS_FPSIMD; + } else { + header->flags |= SVE_PT_REGS_SVE; +diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c +index f155b8c9e98c7..15aa9bad1c280 100644 +--- a/arch/arm64/kvm/hyp/pgtable.c ++++ b/arch/arm64/kvm/hyp/pgtable.c +@@ -805,12 +805,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, + * Perform the appropriate TLB invalidation based on the + * evicted pte value (if any). + */ +- if (kvm_pte_table(ctx->old, ctx->level)) +- kvm_tlb_flush_vmid_range(mmu, ctx->addr, +- kvm_granule_size(ctx->level)); +- else if (kvm_pte_valid(ctx->old)) ++ if (kvm_pte_table(ctx->old, ctx->level)) { ++ u64 size = kvm_granule_size(ctx->level); ++ u64 addr = ALIGN_DOWN(ctx->addr, size); ++ ++ kvm_tlb_flush_vmid_range(mmu, addr, size); ++ } else if (kvm_pte_valid(ctx->old)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, + ctx->addr, ctx->level); ++ } + } + + if (stage2_pte_is_counted(ctx->old)) +diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c +index 150d1c6543f7f..29196dce9b91d 100644 +--- a/arch/arm64/net/bpf_jit_comp.c ++++ b/arch/arm64/net/bpf_jit_comp.c +@@ -876,7 +876,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + emit(A64_UXTH(is64, dst, dst), ctx); + break; + case 32: +- emit(A64_REV32(is64, dst, dst), ctx); ++ emit(A64_REV32(0, dst, dst), ctx); + /* upper 32 bits already cleared */ + break; + case 64: +@@ -1189,7 +1189,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + } else { + emit_a64_mov_i(1, tmp, off, ctx); + if (sign_extend) +- emit(A64_LDRSW(dst, src_adj, off_adj), ctx); ++ emit(A64_LDRSW(dst, src, tmp), ctx); + else + emit(A64_LDR32(dst, src, tmp), ctx); + } +diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c +index 926bec775f41c..9822366dc186e 100644 +--- a/arch/powerpc/mm/book3s64/pgtable.c ++++ b/arch/powerpc/mm/book3s64/pgtable.c +@@ -130,7 +130,7 @@ void set_pud_at(struct mm_struct *mm, unsigned long addr, + + WARN_ON(pte_hw_valid(pud_pte(*pudp))); + assert_spin_locked(pud_lockptr(mm, pudp)); +- WARN_ON(!(pud_large(pud))); ++ WARN_ON(!(pud_leaf(pud))); + #endif + trace_hugepage_set_pud(addr, pud_val(pud)); + return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud)); +diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h +index ec0cab9fbddd0..72ec1d9bd3f31 100644 +--- a/arch/riscv/include/asm/uaccess.h ++++ b/arch/riscv/include/asm/uaccess.h +@@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) + + #define __get_kernel_nofault(dst, src, type, err_label) \ + do { \ +- long __kr_err; \ ++ long __kr_err = 0; \ + \ + __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ + if (unlikely(__kr_err)) \ +@@ -328,7 +328,7 @@ do { \ + + #define __put_kernel_nofault(dst, src, type, err_label) \ + do { \ +- long __kr_err; \ ++ long __kr_err = 0; \ + \ + __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ + if (unlikely(__kr_err)) \ +diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h +index 10aaa83db89ef..95050ebe9ad00 100644 +--- a/arch/riscv/include/uapi/asm/auxvec.h ++++ b/arch/riscv/include/uapi/asm/auxvec.h +@@ -34,7 +34,7 @@ + #define AT_L3_CACHEGEOMETRY 47 + + /* entries in ARCH_DLINFO */ +-#define AT_VECTOR_SIZE_ARCH 9 ++#define AT_VECTOR_SIZE_ARCH 10 + #define AT_MINSIGSTKSZ 51 + + #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ +diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c +index 37e87fdcf6a00..30e12b310cab7 100644 +--- a/arch/riscv/kernel/patch.c ++++ b/arch/riscv/kernel/patch.c +@@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) + */ + lockdep_assert_held(&text_mutex); + ++ preempt_disable(); ++ + if (across_pages) + patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); + +@@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) + if (across_pages) + patch_unmap(FIX_TEXT_POKE1); + ++ preempt_enable(); ++ + return 0; + } + NOKPROBE_SYMBOL(__patch_insn_set); +@@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) + if (!riscv_patch_in_stop_machine) + lockdep_assert_held(&text_mutex); + ++ preempt_disable(); ++ + if (across_pages) + patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); + +@@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) + if (across_pages) + patch_unmap(FIX_TEXT_POKE1); + ++ preempt_enable(); ++ + return ret; + } + NOKPROBE_SYMBOL(__patch_insn_write); +diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c +index e32d737e039fd..83e223318822a 100644 +--- a/arch/riscv/kernel/process.c ++++ b/arch/riscv/kernel/process.c +@@ -26,8 +26,6 @@ + #include <asm/cpuidle.h> + #include <asm/vector.h> + +-register unsigned long gp_in_global __asm__("gp"); +- + #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) + #include <linux/stackprotector.h> + unsigned long __stack_chk_guard __read_mostly; +@@ -186,7 +184,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) + if (unlikely(args->fn)) { + /* Kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); +- childregs->gp = gp_in_global; + /* Supervisor/Machine, irqs on: */ + childregs->status = SR_PP | SR_PIE; + +diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c +index 39e72aa016a4c..b467ba5ed9100 100644 +--- a/arch/riscv/kvm/aia_aplic.c ++++ b/arch/riscv/kvm/aia_aplic.c +@@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) + raw_spin_lock_irqsave(&irqd->lock, flags); + + sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK; +- if (!pending && +- ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) || +- (sm == APLIC_SOURCECFG_SM_LEVEL_LOW))) ++ if (sm == APLIC_SOURCECFG_SM_INACTIVE) + goto skip_write_pending; + ++ if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH || ++ sm == APLIC_SOURCECFG_SM_LEVEL_LOW) { ++ if (!pending) ++ goto skip_write_pending; ++ if ((irqd->state & APLIC_IRQ_STATE_INPUT) && ++ sm == APLIC_SOURCECFG_SM_LEVEL_LOW) ++ goto skip_write_pending; ++ if (!(irqd->state & APLIC_IRQ_STATE_INPUT) && ++ sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ++ goto skip_write_pending; ++ } ++ + if (pending) + irqd->state |= APLIC_IRQ_STATE_PENDING; + else +@@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled) + + static bool aplic_read_input(struct aplic *aplic, u32 irq) + { +- bool ret; +- unsigned long flags; ++ u32 sourcecfg, sm, raw_input, irq_inverted; + struct aplic_irq *irqd; ++ unsigned long flags; ++ bool ret = false; + + if (!irq || aplic->nr_irqs <= irq) + return false; + irqd = &aplic->irqs[irq]; + + raw_spin_lock_irqsave(&irqd->lock, flags); +- ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false; ++ ++ sourcecfg = irqd->sourcecfg; ++ if (sourcecfg & APLIC_SOURCECFG_D) ++ goto skip; ++ ++ sm = sourcecfg & APLIC_SOURCECFG_SM_MASK; ++ if (sm == APLIC_SOURCECFG_SM_INACTIVE) ++ goto skip; ++ ++ raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0; ++ irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW || ++ sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0; ++ ret = !!(raw_input ^ irq_inverted); ++ ++skip: + raw_spin_unlock_irqrestore(&irqd->lock, flags); + + return ret; +diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c +index 442a74f113cbf..14e1a73ffcfe6 100644 +--- a/arch/s390/boot/vmem.c ++++ b/arch/s390/boot/vmem.c +@@ -360,7 +360,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e + } + pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); + pud_populate(&init_mm, pud, pmd); +- } else if (pud_large(*pud)) { ++ } else if (pud_leaf(*pud)) { + continue; + } + pgtable_pmd_populate(pud, addr, next, mode); +diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h +index fb3ee7758b765..38290b0078c56 100644 +--- a/arch/s390/include/asm/pgtable.h ++++ b/arch/s390/include/asm/pgtable.h +@@ -729,7 +729,7 @@ static inline int pud_bad(pud_t pud) + { + unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK; + +- if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud)) ++ if (type > _REGION_ENTRY_TYPE_R3 || pud_leaf(pud)) + return 1; + if (type < _REGION_ENTRY_TYPE_R3) + return 0; +@@ -1396,7 +1396,7 @@ static inline unsigned long pud_deref(pud_t pud) + unsigned long origin_mask; + + origin_mask = _REGION_ENTRY_ORIGIN; +- if (pud_large(pud)) ++ if (pud_leaf(pud)) + origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; + return (unsigned long)__va(pud_val(pud) & origin_mask); + } +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +index 49a11f6dd7ae9..26c08ee877407 100644 +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -653,6 +653,7 @@ SYM_DATA_START_LOCAL(daton_psw) + SYM_DATA_END(daton_psw) + + .section .rodata, "a" ++ .balign 8 + #define SYSCALL(esame,emu) .quad __s390x_ ## esame + SYM_DATA_START(sys_call_table) + #include "asm/syscall_table.h" +diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c +index 157e0a8d5157d..d17bb1ef63f41 100644 +--- a/arch/s390/mm/gmap.c ++++ b/arch/s390/mm/gmap.c +@@ -596,7 +596,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) + pud = pud_offset(p4d, vmaddr); + VM_BUG_ON(pud_none(*pud)); + /* large puds cannot yet be handled */ +- if (pud_large(*pud)) ++ if (pud_leaf(*pud)) + return -EFAULT; + pmd = pmd_offset(pud, vmaddr); + VM_BUG_ON(pmd_none(*pmd)); +diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c +index 297a6d897d5a0..5f64f3d0fafbb 100644 +--- a/arch/s390/mm/hugetlbpage.c ++++ b/arch/s390/mm/hugetlbpage.c +@@ -224,7 +224,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, + if (p4d_present(*p4dp)) { + pudp = pud_offset(p4dp, addr); + if (pud_present(*pudp)) { +- if (pud_large(*pudp)) ++ if (pud_leaf(*pudp)) + return (pte_t *) pudp; + pmdp = pmd_offset(pudp, addr); + } +@@ -240,7 +240,7 @@ int pmd_huge(pmd_t pmd) + + int pud_huge(pud_t pud) + { +- return pud_large(pud); ++ return pud_leaf(pud); + } + + bool __init arch_hugetlb_valid_size(unsigned long size) +diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c +index b87e96c64b61d..441f654d048d2 100644 +--- a/arch/s390/mm/pageattr.c ++++ b/arch/s390/mm/pageattr.c +@@ -274,7 +274,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, + if (pud_none(*pudp)) + return -EINVAL; + next = pud_addr_end(addr, end); +- if (pud_large(*pudp)) { ++ if (pud_leaf(*pudp)) { + need_split = !!(flags & SET_MEMORY_4K); + need_split |= !!(addr & ~PUD_MASK); + need_split |= !!(addr + PUD_SIZE > next); +diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c +index 5cb92941540b3..5e349869590a8 100644 +--- a/arch/s390/mm/pgtable.c ++++ b/arch/s390/mm/pgtable.c +@@ -479,7 +479,7 @@ static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp) + return -ENOENT; + + /* Large PUDs are not supported yet. */ +- if (pud_large(*pud)) ++ if (pud_leaf(*pud)) + return -EFAULT; + + *pmdp = pmd_offset(pud, addr); +diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c +index 6d276103c6d58..2d3f65da56eea 100644 +--- a/arch/s390/mm/vmem.c ++++ b/arch/s390/mm/vmem.c +@@ -322,7 +322,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, + if (!add) { + if (pud_none(*pud)) + continue; +- if (pud_large(*pud)) { ++ if (pud_leaf(*pud)) { + if (IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE)) { + pud_clear(pud); +@@ -343,7 +343,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, + if (!pmd) + goto out; + pud_populate(&init_mm, pud, pmd); +- } else if (pud_large(*pud)) { ++ } else if (pud_leaf(*pud)) { + continue; + } + ret = modify_pmd_table(pud, addr, next, add, direct); +@@ -586,7 +586,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) + if (!pmd) + goto out; + pud_populate(&init_mm, pud, pmd); +- } else if (WARN_ON_ONCE(pud_large(*pud))) { ++ } else if (WARN_ON_ONCE(pud_leaf(*pud))) { + goto out; + } + pmd = pmd_offset(pud, addr); +diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c +index e507692e51e71..8af02176f68bf 100644 +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size) + * PLT for hotpatchable calls. The calling convention is the same as for the + * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered. + */ +-extern const char bpf_plt[]; +-extern const char bpf_plt_ret[]; +-extern const char bpf_plt_target[]; +-extern const char bpf_plt_end[]; +-#define BPF_PLT_SIZE 32 ++struct bpf_plt { ++ char code[16]; ++ void *ret; ++ void *target; ++} __packed; ++extern const struct bpf_plt bpf_plt; + asm( + ".pushsection .rodata\n" + " .balign 8\n" +@@ -531,15 +532,14 @@ asm( + " .balign 8\n" + "bpf_plt_ret: .quad 0\n" + "bpf_plt_target: .quad 0\n" +- "bpf_plt_end:\n" + " .popsection\n" + ); + +-static void bpf_jit_plt(void *plt, void *ret, void *target) ++static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) + { +- memcpy(plt, bpf_plt, BPF_PLT_SIZE); +- *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret; +- *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret; ++ memcpy(plt, &bpf_plt, sizeof(*plt)); ++ plt->ret = ret; ++ plt->target = target; + } + + /* +@@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) + jit->prg = ALIGN(jit->prg, 8); + jit->prologue_plt = jit->prg; + if (jit->prg_buf) +- bpf_jit_plt(jit->prg_buf + jit->prg, ++ bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg), + jit->prg_buf + jit->prologue_plt_ret, NULL); +- jit->prg += BPF_PLT_SIZE; ++ jit->prg += sizeof(struct bpf_plt); + } + + static int get_probe_mem_regno(const u8 *insn) +@@ -1901,9 +1901,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) + struct bpf_jit jit; + int pass; + +- if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE)) +- return orig_fp; +- + if (!fp->jit_requested) + return orig_fp; + +@@ -2009,14 +2006,11 @@ bool bpf_jit_supports_far_kfunc_call(void) + int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *old_addr, void *new_addr) + { ++ struct bpf_plt expected_plt, current_plt, new_plt, *plt; + struct { + u16 opc; + s32 disp; + } __packed insn; +- char expected_plt[BPF_PLT_SIZE]; +- char current_plt[BPF_PLT_SIZE]; +- char new_plt[BPF_PLT_SIZE]; +- char *plt; + char *ret; + int err; + +@@ -2035,18 +2029,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + */ + } else { + /* Verify the PLT. */ +- plt = (char *)ip + (insn.disp << 1); +- err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE); ++ plt = ip + (insn.disp << 1); ++ err = copy_from_kernel_nofault(¤t_plt, plt, ++ sizeof(current_plt)); + if (err < 0) + return err; + ret = (char *)ip + 6; +- bpf_jit_plt(expected_plt, ret, old_addr); +- if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE)) ++ bpf_jit_plt(&expected_plt, ret, old_addr); ++ if (memcmp(¤t_plt, &expected_plt, sizeof(current_plt))) + return -EINVAL; + /* Adjust the call address. */ +- bpf_jit_plt(new_plt, ret, new_addr); +- s390_kernel_write(plt + (bpf_plt_target - bpf_plt), +- new_plt + (bpf_plt_target - bpf_plt), ++ bpf_jit_plt(&new_plt, ret, new_addr); ++ s390_kernel_write(&plt->target, &new_plt.target, + sizeof(void *)); + } + +diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c +index f83017992eaae..d7db4e737218c 100644 +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -1665,7 +1665,7 @@ bool kern_addr_valid(unsigned long addr) + if (pud_none(*pud)) + return false; + +- if (pud_large(*pud)) ++ if (pud_leaf(*pud)) + return pfn_valid(pud_pfn(*pud)); + + pmd = pmd_offset(pud, addr); +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 4b81e884a6147..b4e6859542a39 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -2566,6 +2566,31 @@ config MITIGATION_RFDS + stored in floating point, vector and integer registers. + See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst> + ++choice ++ prompt "Clear branch history" ++ depends on CPU_SUP_INTEL ++ default SPECTRE_BHI_ON ++ help ++ Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks ++ where the branch history buffer is poisoned to speculatively steer ++ indirect branches. ++ See <file:Documentation/admin-guide/hw-vuln/spectre.rst> ++ ++config SPECTRE_BHI_ON ++ bool "on" ++ help ++ Equivalent to setting spectre_bhi=on command line parameter. ++config SPECTRE_BHI_OFF ++ bool "off" ++ help ++ Equivalent to setting spectre_bhi=off command line parameter. ++config SPECTRE_BHI_AUTO ++ bool "auto" ++ help ++ Equivalent to setting spectre_bhi=auto command line parameter. ++ ++endchoice ++ + endif + + config ARCH_HAS_ADD_PAGES +diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile +index 71fc531b95b4e..583c11664c63b 100644 +--- a/arch/x86/boot/compressed/Makefile ++++ b/arch/x86/boot/compressed/Makefile +@@ -84,7 +84,7 @@ LDFLAGS_vmlinux += -T + hostprogs := mkpiggy + HOST_EXTRACFLAGS += -I$(srctree)/tools/include + +-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' ++sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__start_rodata\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' + + quiet_cmd_voffset = VOFFSET $@ + cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ +diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c +index f711f2a85862e..b5ecbd32a46fa 100644 +--- a/arch/x86/boot/compressed/misc.c ++++ b/arch/x86/boot/compressed/misc.c +@@ -330,6 +330,7 @@ static size_t parse_elf(void *output) + return ehdr.e_entry - LOAD_PHYSICAL_ADDR; + } + ++const unsigned long kernel_text_size = VO___start_rodata - VO__text; + const unsigned long kernel_total_size = VO__end - VO__text; + + static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); +@@ -357,6 +358,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + return entry; + } + ++/* ++ * Set the memory encryption xloadflag based on the mem_encrypt= command line ++ * parameter, if provided. ++ */ ++static void parse_mem_encrypt(struct setup_header *hdr) ++{ ++ int on = cmdline_find_option_bool("mem_encrypt=on"); ++ int off = cmdline_find_option_bool("mem_encrypt=off"); ++ ++ if (on > off) ++ hdr->xloadflags |= XLF_MEM_ENCRYPTION; ++} ++ + /* + * The compressed kernel image (ZO), has been moved so that its position + * is against the end of the buffer used to hold the uncompressed kernel +@@ -387,6 +401,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) + /* Clear flags intended for solely in-kernel use. */ + boot_params->hdr.loadflags &= ~KASLR_FLAG; + ++ parse_mem_encrypt(&boot_params->hdr); ++ + sanitize_boot_params(boot_params); + + if (boot_params->screen_info.orig_video_mode == 7) { +diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c +index 80d76aea1f7bf..0a49218a516a2 100644 +--- a/arch/x86/boot/compressed/sev.c ++++ b/arch/x86/boot/compressed/sev.c +@@ -116,6 +116,9 @@ static bool fault_in_kernel_space(unsigned long address) + #undef __init + #define __init + ++#undef __head ++#define __head ++ + #define __BOOT_COMPRESSED + + /* Basic instruction decoding support needed */ +diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c +index d07be9d05cd03..ddd4efdc79d66 100644 +--- a/arch/x86/coco/core.c ++++ b/arch/x86/coco/core.c +@@ -3,13 +3,17 @@ + * Confidential Computing Platform Capability checks + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. ++ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + + #include <linux/export.h> + #include <linux/cc_platform.h> ++#include <linux/string.h> ++#include <linux/random.h> + ++#include <asm/archrandom.h> + #include <asm/coco.h> + #include <asm/processor.h> + +@@ -148,3 +152,40 @@ u64 cc_mkdec(u64 val) + } + } + EXPORT_SYMBOL_GPL(cc_mkdec); ++ ++__init void cc_random_init(void) ++{ ++ /* ++ * The seed is 32 bytes (in units of longs), which is 256 bits, which ++ * is the security level that the RNG is targeting. ++ */ ++ unsigned long rng_seed[32 / sizeof(long)]; ++ size_t i, longs; ++ ++ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) ++ return; ++ ++ /* ++ * Since the CoCo threat model includes the host, the only reliable ++ * source of entropy that can be neither observed nor manipulated is ++ * RDRAND. Usually, RDRAND failure is considered tolerable, but since ++ * CoCo guests have no other unobservable source of entropy, it's ++ * important to at least ensure the RNG gets some initial random seeds. ++ */ ++ for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) { ++ longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i); ++ ++ /* ++ * A zero return value means that the guest doesn't have RDRAND ++ * or the CPU is physically broken, and in both cases that ++ * means most crypto inside of the CoCo instance will be ++ * broken, defeating the purpose of CoCo in the first place. So ++ * just panic here because it's absolutely unsafe to continue ++ * executing. ++ */ ++ if (longs == 0) ++ panic("RDRAND is defective."); ++ } ++ add_device_randomness(rng_seed, sizeof(rng_seed)); ++ memzero_explicit(rng_seed, sizeof(rng_seed)); ++} +diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c +index 9c0b26ae51069..e72dac092245a 100644 +--- a/arch/x86/entry/common.c ++++ b/arch/x86/entry/common.c +@@ -48,7 +48,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) + + if (likely(unr < NR_syscalls)) { + unr = array_index_nospec(unr, NR_syscalls); +- regs->ax = sys_call_table[unr](regs); ++ regs->ax = x64_sys_call(regs, unr); + return true; + } + return false; +@@ -65,7 +65,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) + + if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { + xnr = array_index_nospec(xnr, X32_NR_syscalls); +- regs->ax = x32_sys_call_table[xnr](regs); ++ regs->ax = x32_sys_call(regs, xnr); + return true; + } + return false; +@@ -114,7 +114,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) + + if (likely(unr < IA32_NR_syscalls)) { + unr = array_index_nospec(unr, IA32_NR_syscalls); +- regs->ax = ia32_sys_call_table[unr](regs); ++ regs->ax = ia32_sys_call(regs, unr); + } else if (nr != -1) { + regs->ax = __ia32_sys_ni_syscall(regs); + } +@@ -141,7 +141,7 @@ static __always_inline bool int80_is_external(void) + } + + /** +- * int80_emulation - 32-bit legacy syscall entry ++ * do_int80_emulation - 32-bit legacy syscall C entry from asm + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in +@@ -159,7 +159,7 @@ static __always_inline bool int80_is_external(void) + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +-DEFINE_IDTENTRY_RAW(int80_emulation) ++__visible noinstr void do_int80_emulation(struct pt_regs *regs) + { + int nr; + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 9f97a8bd11e81..5d96561c0d6ad 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + UNTRAIN_RET ++ CLEAR_BRANCH_HISTORY + + call do_syscall_64 /* returns with IRQs disabled */ + +@@ -1549,3 +1550,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) + call make_task_dead + SYM_CODE_END(rewind_stack_and_make_dead) + .popsection ++ ++/* ++ * This sequence executes branches in order to remove user branch information ++ * from the branch history tracker in the Branch Predictor, therefore removing ++ * user influence on subsequent BTB lookups. ++ * ++ * It should be used on parts prior to Alder Lake. Newer parts should use the ++ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being ++ * virtualized on newer hardware the VMM should protect against BHI attacks by ++ * setting BHI_DIS_S for the guests. ++ * ++ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging ++ * and not clearing the branch history. The call tree looks like: ++ * ++ * call 1 ++ * call 2 ++ * call 2 ++ * call 2 ++ * call 2 ++ * call 2 ++ * ret ++ * ret ++ * ret ++ * ret ++ * ret ++ * ret ++ * ++ * This means that the stack is non-constant and ORC can't unwind it with %rsp ++ * alone. Therefore we unconditionally set up the frame pointer, which allows ++ * ORC to unwind properly. ++ * ++ * The alignment is for performance and not for safety, and may be safely ++ * refactored in the future if needed. ++ */ ++SYM_FUNC_START(clear_bhb_loop) ++ push %rbp ++ mov %rsp, %rbp ++ movl $5, %ecx ++ ANNOTATE_INTRA_FUNCTION_CALL ++ call 1f ++ jmp 5f ++ .align 64, 0xcc ++ ANNOTATE_INTRA_FUNCTION_CALL ++1: call 2f ++ RET ++ .align 64, 0xcc ++2: movl $5, %eax ++3: jmp 4f ++ nop ++4: sub $1, %eax ++ jnz 3b ++ sub $1, %ecx ++ jnz 1b ++ RET ++5: lfence ++ pop %rbp ++ RET ++SYM_FUNC_END(clear_bhb_loop) ++EXPORT_SYMBOL_GPL(clear_bhb_loop) ++STACK_FRAME_NON_STANDARD(clear_bhb_loop) +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index 306181e4fcb90..4c1dfc51c56e4 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) + + IBRS_ENTER + UNTRAIN_RET ++ CLEAR_BRANCH_HISTORY + + /* + * SYSENTER doesn't filter flags, so we need to clear NT and AC +@@ -209,6 +210,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) + + IBRS_ENTER + UNTRAIN_RET ++ CLEAR_BRANCH_HISTORY + + movq %rsp, %rdi + call do_fast_syscall_32 +@@ -277,3 +279,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + int3 + SYM_CODE_END(entry_SYSCALL_compat) ++ ++/* ++ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries ++ * point to C routines, however since this is a system call interface the branch ++ * history needs to be scrubbed to protect against BHI attacks, and that ++ * scrubbing needs to take place in assembly code prior to entering any C ++ * routines. ++ */ ++SYM_CODE_START(int80_emulation) ++ ANNOTATE_NOENDBR ++ UNWIND_HINT_FUNC ++ CLEAR_BRANCH_HISTORY ++ jmp do_int80_emulation ++SYM_CODE_END(int80_emulation) +diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c +index 8cfc9bc73e7f8..c2235bae17ef6 100644 +--- a/arch/x86/entry/syscall_32.c ++++ b/arch/x86/entry/syscall_32.c +@@ -18,8 +18,25 @@ + #include <asm/syscalls_32.h> + #undef __SYSCALL + ++/* ++ * The sys_call_table[] is no longer used for system calls, but ++ * kernel/trace/trace_syscalls.c still wants to know the system ++ * call address. ++ */ ++#ifdef CONFIG_X86_32 + #define __SYSCALL(nr, sym) __ia32_##sym, +- +-__visible const sys_call_ptr_t ia32_sys_call_table[] = { ++const sys_call_ptr_t sys_call_table[] = { + #include <asm/syscalls_32.h> + }; ++#undef __SYSCALL ++#endif ++ ++#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); ++ ++long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) ++{ ++ switch (nr) { ++ #include <asm/syscalls_32.h> ++ default: return __ia32_sys_ni_syscall(regs); ++ } ++}; +diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c +index be120eec1fc9f..33b3f09e6f151 100644 +--- a/arch/x86/entry/syscall_64.c ++++ b/arch/x86/entry/syscall_64.c +@@ -11,8 +11,23 @@ + #include <asm/syscalls_64.h> + #undef __SYSCALL + ++/* ++ * The sys_call_table[] is no longer used for system calls, but ++ * kernel/trace/trace_syscalls.c still wants to know the system ++ * call address. ++ */ + #define __SYSCALL(nr, sym) __x64_##sym, +- +-asmlinkage const sys_call_ptr_t sys_call_table[] = { ++const sys_call_ptr_t sys_call_table[] = { + #include <asm/syscalls_64.h> + }; ++#undef __SYSCALL ++ ++#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); ++ ++long x64_sys_call(const struct pt_regs *regs, unsigned int nr) ++{ ++ switch (nr) { ++ #include <asm/syscalls_64.h> ++ default: return __x64_sys_ni_syscall(regs); ++ } ++}; +diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c +index bdd0e03a1265d..03de4a9321318 100644 +--- a/arch/x86/entry/syscall_x32.c ++++ b/arch/x86/entry/syscall_x32.c +@@ -11,8 +11,12 @@ + #include <asm/syscalls_x32.h> + #undef __SYSCALL + +-#define __SYSCALL(nr, sym) __x64_##sym, ++#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); + +-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { +-#include <asm/syscalls_x32.h> ++long x32_sys_call(const struct pt_regs *regs, unsigned int nr) ++{ ++ switch (nr) { ++ #include <asm/syscalls_x32.h> ++ default: return __x64_sys_ni_syscall(regs); ++ } + }; +diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c +index 5365d6acbf090..8ed10366c4a27 100644 +--- a/arch/x86/events/amd/core.c ++++ b/arch/x86/events/amd/core.c +@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = + /* + * AMD Performance Monitor Family 17h and later: + */ +-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = ++static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] = + { + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, +@@ -262,10 +262,24 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, + }; + ++static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] = ++{ ++ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, ++ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, ++ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, ++ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, ++ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, ++ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, ++ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, ++}; ++ + static u64 amd_pmu_event_map(int hw_event) + { +- if (boot_cpu_data.x86 >= 0x17) +- return amd_f17h_perfmon_event_map[hw_event]; ++ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19) ++ return amd_zen2_perfmon_event_map[hw_event]; ++ ++ if (cpu_feature_enabled(X86_FEATURE_ZEN1)) ++ return amd_zen1_perfmon_event_map[hw_event]; + + return amd_perfmon_event_map[hw_event]; + } +@@ -904,8 +918,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) + if (!status) + goto done; + +- /* Read branch records before unfreezing */ +- if (status & GLOBAL_STATUS_LBRS_FROZEN) { ++ /* Read branch records */ ++ if (x86_pmu.lbr_nr) { + amd_pmu_lbr_read(); + status &= ~GLOBAL_STATUS_LBRS_FROZEN; + } +diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c +index eb31f850841a8..110e34c59643a 100644 +--- a/arch/x86/events/amd/lbr.c ++++ b/arch/x86/events/amd/lbr.c +@@ -400,10 +400,12 @@ void amd_pmu_lbr_enable_all(void) + wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); + } + +- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); +- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); ++ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { ++ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); ++ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); ++ } + +- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); ++ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); + } + +@@ -416,10 +418,12 @@ void amd_pmu_lbr_disable_all(void) + return; + + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); +- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); +- + wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); +- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); ++ ++ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { ++ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); ++ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); ++ } + } + + __init int amd_pmu_lbr_init(void) +diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c +index eb8dd8b8a1e86..2b53f696c3c96 100644 +--- a/arch/x86/events/intel/ds.c ++++ b/arch/x86/events/intel/ds.c +@@ -1236,11 +1236,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, + struct pmu *pmu = event->pmu; + + /* +- * Make sure we get updated with the first PEBS +- * event. It will trigger also during removal, but +- * that does not hurt: ++ * Make sure we get updated with the first PEBS event. ++ * During removal, ->pebs_data_cfg is still valid for ++ * the last PEBS event. Don't clear it. + */ +- if (cpuc->n_pebs == 1) ++ if ((cpuc->n_pebs == 1) && add) + cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; + + if (needed_cb != pebs_needs_sched_cb(cpuc)) { +diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h +index b1a98fa38828e..0e82074517f6b 100644 +--- a/arch/x86/include/asm/asm-prototypes.h ++++ b/arch/x86/include/asm/asm-prototypes.h +@@ -13,6 +13,7 @@ + #include <asm/preempt.h> + #include <asm/asm.h> + #include <asm/gsseg.h> ++#include <asm/nospec-branch.h> + + #ifndef CONFIG_X86_CMPXCHG64 + extern void cmpxchg8b_emu(void); +diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h +index b3a7cfb0d99e0..c945c893c52e0 100644 +--- a/arch/x86/include/asm/boot.h ++++ b/arch/x86/include/asm/boot.h +@@ -81,6 +81,7 @@ + + #ifndef __ASSEMBLY__ + extern unsigned int output_len; ++extern const unsigned long kernel_text_size; + extern const unsigned long kernel_total_size; + + unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, +diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h +index 21940ef8d2904..de03537a01823 100644 +--- a/arch/x86/include/asm/coco.h ++++ b/arch/x86/include/asm/coco.h +@@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask) + + u64 cc_mkenc(u64 val); + u64 cc_mkdec(u64 val); ++void cc_random_init(void); + #else + static inline u64 cc_mkenc(u64 val) + { +@@ -32,6 +33,7 @@ static inline u64 cc_mkdec(u64 val) + { + return val; + } ++static inline void cc_random_init(void) { } + #endif + + #endif /* _ASM_X86_COCO_H */ +diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h +index a1273698fc430..686e92d2663ee 100644 +--- a/arch/x86/include/asm/cpufeature.h ++++ b/arch/x86/include/asm/cpufeature.h +@@ -33,6 +33,8 @@ enum cpuid_leafs + CPUID_7_EDX, + CPUID_8000_001F_EAX, + CPUID_8000_0021_EAX, ++ CPUID_LNX_5, ++ NR_CPUID_WORDS, + }; + + #define X86_CAP_FMT_NUM "%d:%d" +@@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ ++ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ + REQUIRED_MASK_CHECK || \ +- BUILD_BUG_ON_ZERO(NCAPINTS != 21)) ++ BUILD_BUG_ON_ZERO(NCAPINTS != 22)) + + #define DISABLED_MASK_BIT_SET(feature_bit) \ + ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ +@@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ ++ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ + DISABLED_MASK_CHECK || \ +- BUILD_BUG_ON_ZERO(NCAPINTS != 21)) ++ BUILD_BUG_ON_ZERO(NCAPINTS != 22)) + + #define cpu_has(c, bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index bd33f6366c80d..8c1593dd2c317 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -13,7 +13,7 @@ + /* + * Defines x86 CPU feature bits + */ +-#define NCAPINTS 21 /* N 32-bit words worth of info */ ++#define NCAPINTS 22 /* N 32-bit words worth of info */ + #define NBUGINTS 2 /* N 32-bit bug flags */ + + /* +@@ -218,7 +218,7 @@ + #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ + #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ + #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +-#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */ ++#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ + #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ + #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ + #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ +@@ -312,6 +312,10 @@ + #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ + #define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ + #define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ ++#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ ++#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ ++#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ ++#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ +@@ -452,6 +456,18 @@ + #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ + #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ + ++/* ++ * Extended auxiliary flags: Linux defined - for features scattered in various ++ * CPUID levels like 0x80000022, etc and Linux defined features. ++ * ++ * Reuse free bits when adding new feature flags! ++ */ ++#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ ++#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ ++#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ ++#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ ++#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ ++ + /* + * BUG word(s) + */ +@@ -499,4 +515,5 @@ + #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ + #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ + #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ ++#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h +index 702d93fdd10e8..88fcf08458d9c 100644 +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -143,6 +143,7 @@ + #define DISABLED_MASK18 (DISABLE_IBT) + #define DISABLED_MASK19 0 + #define DISABLED_MASK20 0 +-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) ++#define DISABLED_MASK21 0 ++#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) + + #endif /* _ASM_X86_DISABLED_FEATURES_H */ +diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h +index 5f1d3c421f686..cc9ccf61b6bd1 100644 +--- a/arch/x86/include/asm/init.h ++++ b/arch/x86/include/asm/init.h +@@ -2,6 +2,8 @@ + #ifndef _ASM_X86_INIT_H + #define _ASM_X86_INIT_H + ++#define __head __section(".head.text") ++ + struct x86_mapping_info { + void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ + void *context; /* context for alloc_pgt_page */ +diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h +index f4f5269846295..76081a34fc231 100644 +--- a/arch/x86/include/asm/mem_encrypt.h ++++ b/arch/x86/include/asm/mem_encrypt.h +@@ -46,8 +46,8 @@ void __init sme_unmap_bootdata(char *real_mode_data); + void __init sme_early_init(void); + void __init sev_setup_arch(void); + +-void __init sme_encrypt_kernel(struct boot_params *bp); +-void __init sme_enable(struct boot_params *bp); ++void sme_encrypt_kernel(struct boot_params *bp); ++void sme_enable(struct boot_params *bp); + + int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); + int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); +@@ -81,8 +81,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } + static inline void __init sme_early_init(void) { } + static inline void __init sev_setup_arch(void) { } + +-static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } +-static inline void __init sme_enable(struct boot_params *bp) { } ++static inline void sme_encrypt_kernel(struct boot_params *bp) { } ++static inline void sme_enable(struct boot_params *bp) { } + + static inline void sev_es_init_vc_handling(void) { } + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index c75cc5610be30..621bac6b74011 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -50,10 +50,13 @@ + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ + #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ + #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) ++#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ ++#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) + + /* A mask for bits which the kernel toggles when controlling mitigations */ + #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ +- | SPEC_CTRL_RRSBA_DIS_S) ++ | SPEC_CTRL_RRSBA_DIS_S \ ++ | SPEC_CTRL_BHI_DIS_S) + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +@@ -152,6 +155,10 @@ + * are restricted to targets in + * kernel. + */ ++#define ARCH_CAP_BHI_NO BIT(20) /* ++ * CPU is not affected by Branch ++ * History Injection. ++ */ + #define ARCH_CAP_PBRSB_NO BIT(24) /* + * Not susceptible to Post-Barrier + * Return Stack Buffer Predictions. +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 8ae2cb30ade3d..a8781c8763b44 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -271,11 +271,20 @@ + .Lskip_rsb_\@: + .endm + ++/* ++ * The CALL to srso_alias_untrain_ret() must be patched in directly at ++ * the spot where untraining must be done, ie., srso_alias_untrain_ret() ++ * must be the target of a CALL instruction instead of indirectly ++ * jumping to a wrapper which then calls it. Therefore, this macro is ++ * called outside of __UNTRAIN_RET below, for the time being, before the ++ * kernel can support nested alternatives with arbitrary nesting. ++ */ ++.macro CALL_UNTRAIN_RET + #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) +-#define CALL_UNTRAIN_RET "call entry_untrain_ret" +-#else +-#define CALL_UNTRAIN_RET "" ++ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ ++ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS + #endif ++.endm + + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the +@@ -288,38 +297,24 @@ + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ +-.macro UNTRAIN_RET +-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ +- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO) ++.macro __UNTRAIN_RET ibpb_feature, call_depth_insns ++#if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY) + VALIDATE_UNRET_END +- ALTERNATIVE_3 "", \ +- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ +- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ +- __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH ++ CALL_UNTRAIN_RET ++ ALTERNATIVE_2 "", \ ++ "call entry_ibpb", \ibpb_feature, \ ++ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH + #endif + .endm + +-.macro UNTRAIN_RET_VM +-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ +- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO) +- VALIDATE_UNRET_END +- ALTERNATIVE_3 "", \ +- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ +- "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT, \ +- __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH +-#endif +-.endm ++#define UNTRAIN_RET \ ++ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH) + +-.macro UNTRAIN_RET_FROM_CALL +-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ +- defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO) +- VALIDATE_UNRET_END +- ALTERNATIVE_3 "", \ +- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ +- "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ +- __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH +-#endif +-.endm ++#define UNTRAIN_RET_VM \ ++ __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH) ++ ++#define UNTRAIN_RET_FROM_CALL \ ++ __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL) + + + .macro CALL_DEPTH_ACCOUNT +@@ -340,6 +335,19 @@ + ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF + .endm + ++#ifdef CONFIG_X86_64 ++.macro CLEAR_BRANCH_HISTORY ++ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP ++.endm ++ ++.macro CLEAR_BRANCH_HISTORY_VMEXIT ++ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT ++.endm ++#else ++#define CLEAR_BRANCH_HISTORY ++#define CLEAR_BRANCH_HISTORY_VMEXIT ++#endif ++ + #else /* __ASSEMBLY__ */ + + #define ANNOTATE_RETPOLINE_SAFE \ +@@ -359,6 +367,22 @@ extern void __x86_return_thunk(void); + static inline void __x86_return_thunk(void) {} + #endif + ++#ifdef CONFIG_CPU_UNRET_ENTRY ++extern void retbleed_return_thunk(void); ++#else ++static inline void retbleed_return_thunk(void) {} ++#endif ++ ++extern void srso_alias_untrain_ret(void); ++ ++#ifdef CONFIG_CPU_SRSO ++extern void srso_return_thunk(void); ++extern void srso_alias_return_thunk(void); ++#else ++static inline void srso_return_thunk(void) {} ++static inline void srso_alias_return_thunk(void) {} ++#endif ++ + extern void retbleed_return_thunk(void); + extern void srso_return_thunk(void); + extern void srso_alias_return_thunk(void); +@@ -370,6 +394,10 @@ extern void srso_alias_untrain_ret(void); + extern void entry_untrain_ret(void); + extern void entry_ibpb(void); + ++#ifdef CONFIG_X86_64 ++extern void clear_bhb_loop(void); ++#endif ++ + extern void (*x86_return_thunk)(void); + + #ifdef CONFIG_CALL_DEPTH_TRACKING +diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h +index 7ba1726b71c7b..e9187ddd3d1fd 100644 +--- a/arch/x86/include/asm/required-features.h ++++ b/arch/x86/include/asm/required-features.h +@@ -99,6 +99,7 @@ + #define REQUIRED_MASK18 0 + #define REQUIRED_MASK19 0 + #define REQUIRED_MASK20 0 +-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) ++#define REQUIRED_MASK21 0 ++#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) + + #endif /* _ASM_X86_REQUIRED_FEATURES_H */ +diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h +index 36f905797075e..75a5388d40681 100644 +--- a/arch/x86/include/asm/sev.h ++++ b/arch/x86/include/asm/sev.h +@@ -199,15 +199,15 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) + struct snp_guest_request_ioctl; + + void setup_ghcb(void); +-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, +- unsigned long npages); +-void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, +- unsigned long npages); ++void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, ++ unsigned long npages); ++void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, ++ unsigned long npages); + void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); + void snp_set_memory_private(unsigned long vaddr, unsigned long npages); + void snp_set_wakeup_secondary_cpu(void); + bool snp_init(struct boot_params *bp); +-void __init __noreturn snp_abort(void); ++void __noreturn snp_abort(void); + void snp_dmi_setup(void); + int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); + void snp_accept_memory(phys_addr_t start, phys_addr_t end); +diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h +index 4fb36fba4b5a1..03bb950eba690 100644 +--- a/arch/x86/include/asm/syscall.h ++++ b/arch/x86/include/asm/syscall.h +@@ -16,19 +16,17 @@ + #include <asm/thread_info.h> /* for TS_COMPAT */ + #include <asm/unistd.h> + ++/* This is used purely for kernel/trace/trace_syscalls.c */ + typedef long (*sys_call_ptr_t)(const struct pt_regs *); + extern const sys_call_ptr_t sys_call_table[]; + +-#if defined(CONFIG_X86_32) +-#define ia32_sys_call_table sys_call_table +-#else + /* + * These may not exist, but still put the prototypes in so we + * can use IS_ENABLED(). + */ +-extern const sys_call_ptr_t ia32_sys_call_table[]; +-extern const sys_call_ptr_t x32_sys_call_table[]; +-#endif ++extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); ++extern long x32_sys_call(const struct pt_regs *, unsigned int nr); ++extern long x64_sys_call(const struct pt_regs *, unsigned int nr); + + /* + * Only the low 32 bits of orig_ax are meaningful, so we return int. +@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task) + } + + void do_syscall_64(struct pt_regs *regs, int nr); ++void do_int80_emulation(struct pt_regs *regs); + + #endif /* CONFIG_X86_32 */ + +diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h +index 01d19fc223463..eeea058cf6028 100644 +--- a/arch/x86/include/uapi/asm/bootparam.h ++++ b/arch/x86/include/uapi/asm/bootparam.h +@@ -38,6 +38,7 @@ + #define XLF_EFI_KEXEC (1<<4) + #define XLF_5LEVEL (1<<5) + #define XLF_5LEVEL_ENABLED (1<<6) ++#define XLF_MEM_ENCRYPTION (1<<7) + + #ifndef __ASSEMBLY__ + +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 031bca974fbf3..9fd91022d92d0 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -66,20 +66,6 @@ static const int amd_erratum_400[] = + static const int amd_erratum_383[] = + AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); + +-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */ +-static const int amd_erratum_1054[] = +- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); +- +-static const int amd_zenbleed[] = +- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf), +- AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf), +- AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf), +- AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf)); +- +-static const int amd_div0[] = +- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), +- AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); +- + static const int amd_erratum_1485[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x19, 0x10, 0x0, 0x1f, 0xf), + AMD_MODEL_RANGE(0x19, 0x60, 0x0, 0xaf, 0xf)); +@@ -620,6 +606,49 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) + } + + resctrl_cpu_detect(c); ++ ++ /* Figure out Zen generations: */ ++ switch (c->x86) { ++ case 0x17: { ++ switch (c->x86_model) { ++ case 0x00 ... 0x2f: ++ case 0x50 ... 0x5f: ++ setup_force_cpu_cap(X86_FEATURE_ZEN1); ++ break; ++ case 0x30 ... 0x4f: ++ case 0x60 ... 0x7f: ++ case 0x90 ... 0x91: ++ case 0xa0 ... 0xaf: ++ setup_force_cpu_cap(X86_FEATURE_ZEN2); ++ break; ++ default: ++ goto warn; ++ } ++ break; ++ } ++ case 0x19: { ++ switch (c->x86_model) { ++ case 0x00 ... 0x0f: ++ case 0x20 ... 0x5f: ++ setup_force_cpu_cap(X86_FEATURE_ZEN3); ++ break; ++ case 0x10 ... 0x1f: ++ case 0x60 ... 0xaf: ++ setup_force_cpu_cap(X86_FEATURE_ZEN4); ++ break; ++ default: ++ goto warn; ++ } ++ break; ++ } ++ default: ++ break; ++ } ++ ++ return; ++ ++warn: ++ WARN_ONCE(1, "Family 0x%x, model: 0x%x??\n", c->x86, c->x86_model); + } + + static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) +@@ -945,6 +974,19 @@ static void init_amd_bd(struct cpuinfo_x86 *c) + clear_rdrand_cpuid_bit(c); + } + ++static void fix_erratum_1386(struct cpuinfo_x86 *c) ++{ ++ /* ++ * Work around Erratum 1386. The XSAVES instruction malfunctions in ++ * certain circumstances on Zen1/2 uarch, and not all parts have had ++ * updated microcode at the time of writing (March 2023). ++ * ++ * Affected parts all have no supervisor XSAVE states, meaning that ++ * the XSAVEC instruction (which works fine) is equivalent. ++ */ ++ clear_cpu_cap(c, X86_FEATURE_XSAVES); ++} ++ + void init_spectral_chicken(struct cpuinfo_x86 *c) + { + #ifdef CONFIG_CPU_UNRET_ENTRY +@@ -965,24 +1007,19 @@ void init_spectral_chicken(struct cpuinfo_x86 *c) + } + } + #endif +- /* +- * Work around Erratum 1386. The XSAVES instruction malfunctions in +- * certain circumstances on Zen1/2 uarch, and not all parts have had +- * updated microcode at the time of writing (March 2023). +- * +- * Affected parts all have no supervisor XSAVE states, meaning that +- * the XSAVEC instruction (which works fine) is equivalent. +- */ +- clear_cpu_cap(c, X86_FEATURE_XSAVES); + } + + static void init_amd_zn(struct cpuinfo_x86 *c) + { +- set_cpu_cap(c, X86_FEATURE_ZEN); +- ++ setup_force_cpu_cap(X86_FEATURE_ZEN); + #ifdef CONFIG_NUMA + node_reclaim_distance = 32; + #endif ++} ++ ++static void init_amd_zen1(struct cpuinfo_x86 *c) ++{ ++ fix_erratum_1386(c); + + /* Fix up CPUID bits, but only if not virtualised. */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { +@@ -999,6 +1036,9 @@ static void init_amd_zn(struct cpuinfo_x86 *c) + if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } ++ ++ pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); ++ setup_force_cpu_bug(X86_BUG_DIV0); + } + + static bool cpu_has_zenbleed_microcode(void) +@@ -1023,11 +1063,8 @@ static bool cpu_has_zenbleed_microcode(void) + return true; + } + +-static void zenbleed_check(struct cpuinfo_x86 *c) ++static void zen2_zenbleed_check(struct cpuinfo_x86 *c) + { +- if (!cpu_has_amd_erratum(c, amd_zenbleed)) +- return; +- + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + +@@ -1042,6 +1079,20 @@ static void zenbleed_check(struct cpuinfo_x86 *c) + } + } + ++static void init_amd_zen2(struct cpuinfo_x86 *c) ++{ ++ fix_erratum_1386(c); ++ zen2_zenbleed_check(c); ++} ++ ++static void init_amd_zen3(struct cpuinfo_x86 *c) ++{ ++} ++ ++static void init_amd_zen4(struct cpuinfo_x86 *c) ++{ ++} ++ + static void init_amd(struct cpuinfo_x86 *c) + { + early_init_amd(c); +@@ -1080,6 +1131,15 @@ static void init_amd(struct cpuinfo_x86 *c) + case 0x19: init_amd_zn(c); break; + } + ++ if (boot_cpu_has(X86_FEATURE_ZEN1)) ++ init_amd_zen1(c); ++ else if (boot_cpu_has(X86_FEATURE_ZEN2)) ++ init_amd_zen2(c); ++ else if (boot_cpu_has(X86_FEATURE_ZEN3)) ++ init_amd_zen3(c); ++ else if (boot_cpu_has(X86_FEATURE_ZEN4)) ++ init_amd_zen4(c); ++ + /* + * Enable workaround for FXSAVE leak on CPUs + * without a XSaveErPtr feature +@@ -1131,7 +1191,7 @@ static void init_amd(struct cpuinfo_x86 *c) + * Counter May Be Inaccurate". + */ + if (cpu_has(c, X86_FEATURE_IRPERF) && +- !cpu_has_amd_erratum(c, amd_erratum_1054)) ++ (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f)) + msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); + + check_null_seg_clears_base(c); +@@ -1147,13 +1207,6 @@ static void init_amd(struct cpuinfo_x86 *c) + cpu_has(c, X86_FEATURE_AUTOIBRS)) + WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); + +- zenbleed_check(c); +- +- if (cpu_has_amd_erratum(c, amd_div0)) { +- pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); +- setup_force_cpu_bug(X86_BUG_DIV0); +- } +- + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + cpu_has_amd_erratum(c, amd_erratum_1485)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); +@@ -1313,7 +1366,7 @@ static void zenbleed_check_cpu(void *unused) + { + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + +- zenbleed_check(c); ++ zen2_zenbleed_check(c); + } + + void amd_check_microcode(void) +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 3452f7271d074..3fc2301556271 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -63,7 +63,7 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd); + + static DEFINE_MUTEX(spec_ctrl_mutex); + +-void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; ++void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; + + /* Update SPEC_CTRL MSR and its cached copy unconditionally */ + static void update_spec_ctrl(u64 val) +@@ -1108,8 +1108,7 @@ static void __init retbleed_select_mitigation(void) + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + +- if (IS_ENABLED(CONFIG_RETHUNK)) +- x86_return_thunk = retbleed_return_thunk; ++ x86_return_thunk = retbleed_return_thunk; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) +@@ -1607,6 +1606,79 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ + dump_stack(); + } + ++/* ++ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by ++ * branch history in userspace. Not needed if BHI_NO is set. ++ */ ++static bool __init spec_ctrl_bhi_dis(void) ++{ ++ if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) ++ return false; ++ ++ x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; ++ update_spec_ctrl(x86_spec_ctrl_base); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); ++ ++ return true; ++} ++ ++enum bhi_mitigations { ++ BHI_MITIGATION_OFF, ++ BHI_MITIGATION_ON, ++ BHI_MITIGATION_AUTO, ++}; ++ ++static enum bhi_mitigations bhi_mitigation __ro_after_init = ++ IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : ++ IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF : ++ BHI_MITIGATION_AUTO; ++ ++static int __init spectre_bhi_parse_cmdline(char *str) ++{ ++ if (!str) ++ return -EINVAL; ++ ++ if (!strcmp(str, "off")) ++ bhi_mitigation = BHI_MITIGATION_OFF; ++ else if (!strcmp(str, "on")) ++ bhi_mitigation = BHI_MITIGATION_ON; ++ else if (!strcmp(str, "auto")) ++ bhi_mitigation = BHI_MITIGATION_AUTO; ++ else ++ pr_err("Ignoring unknown spectre_bhi option (%s)", str); ++ ++ return 0; ++} ++early_param("spectre_bhi", spectre_bhi_parse_cmdline); ++ ++static void __init bhi_select_mitigation(void) ++{ ++ if (bhi_mitigation == BHI_MITIGATION_OFF) ++ return; ++ ++ /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ ++ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && ++ !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) ++ return; ++ ++ if (spec_ctrl_bhi_dis()) ++ return; ++ ++ if (!IS_ENABLED(CONFIG_X86_64)) ++ return; ++ ++ /* Mitigate KVM by default */ ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); ++ pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); ++ ++ if (bhi_mitigation == BHI_MITIGATION_AUTO) ++ return; ++ ++ /* Mitigate syscalls when the mitigation is forced =on */ ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); ++ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); ++} ++ + static void __init spectre_v2_select_mitigation(void) + { + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); +@@ -1718,6 +1790,9 @@ static void __init spectre_v2_select_mitigation(void) + mode == SPECTRE_V2_RETPOLINE) + spec_ctrl_disable_kernel_rrsba(); + ++ if (boot_cpu_has(X86_BUG_BHI)) ++ bhi_select_mitigation(); ++ + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); + +@@ -2695,15 +2770,15 @@ static char *stibp_state(void) + + switch (spectre_v2_user_stibp) { + case SPECTRE_V2_USER_NONE: +- return ", STIBP: disabled"; ++ return "; STIBP: disabled"; + case SPECTRE_V2_USER_STRICT: +- return ", STIBP: forced"; ++ return "; STIBP: forced"; + case SPECTRE_V2_USER_STRICT_PREFERRED: +- return ", STIBP: always-on"; ++ return "; STIBP: always-on"; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + if (static_key_enabled(&switch_to_cond_stibp)) +- return ", STIBP: conditional"; ++ return "; STIBP: conditional"; + } + return ""; + } +@@ -2712,10 +2787,10 @@ static char *ibpb_state(void) + { + if (boot_cpu_has(X86_FEATURE_IBPB)) { + if (static_key_enabled(&switch_mm_always_ibpb)) +- return ", IBPB: always-on"; ++ return "; IBPB: always-on"; + if (static_key_enabled(&switch_mm_cond_ibpb)) +- return ", IBPB: conditional"; +- return ", IBPB: disabled"; ++ return "; IBPB: conditional"; ++ return "; IBPB: disabled"; + } + return ""; + } +@@ -2725,14 +2800,31 @@ static char *pbrsb_eibrs_state(void) + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || + boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) +- return ", PBRSB-eIBRS: SW sequence"; ++ return "; PBRSB-eIBRS: SW sequence"; + else +- return ", PBRSB-eIBRS: Vulnerable"; ++ return "; PBRSB-eIBRS: Vulnerable"; + } else { +- return ", PBRSB-eIBRS: Not affected"; ++ return "; PBRSB-eIBRS: Not affected"; + } + } + ++static const char * const spectre_bhi_state(void) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_BHI)) ++ return "; BHI: Not affected"; ++ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) ++ return "; BHI: BHI_DIS_S"; ++ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) ++ return "; BHI: SW loop, KVM: SW loop"; ++ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && ++ !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) ++ return "; BHI: Retpoline"; ++ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) ++ return "; BHI: Syscall hardening, KVM: SW loop"; ++ ++ return "; BHI: Vulnerable (Syscall hardening enabled)"; ++} ++ + static ssize_t spectre_v2_show_state(char *buf) + { + if (spectre_v2_enabled == SPECTRE_V2_LFENCE) +@@ -2745,13 +2837,15 @@ static ssize_t spectre_v2_show_state(char *buf) + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + +- return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", ++ return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), +- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", ++ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", + stibp_state(), +- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", ++ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", + pbrsb_eibrs_state(), ++ spectre_bhi_state(), ++ /* this should always be at the end */ + spectre_v2_module_string()); + } + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 73cfac3fc9c4c..fc4c9a7fb1e3d 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1165,6 +1165,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) + #define NO_SPECTRE_V2 BIT(8) + #define NO_MMIO BIT(9) + #define NO_EIBRS_PBRSB BIT(10) ++#define NO_BHI BIT(11) + + #define VULNWL(vendor, family, model, whitelist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) +@@ -1227,18 +1228,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), + + /* AMD Family 0xf - 0x12 */ +- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), +- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), +- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), +- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), ++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), ++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), ++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), ++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ +- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), +- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), ++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), ++ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + + /* Zhaoxin Family 7 */ +- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), +- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), ++ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), ++ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + {} + }; + +@@ -1475,6 +1476,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + if (vulnerable_to_rfds(ia32_cap)) + setup_force_cpu_bug(X86_BUG_RFDS); + ++ /* When virtualized, eIBRS could be hidden, assume vulnerable */ ++ if (!(ia32_cap & ARCH_CAP_BHI_NO) && ++ !cpu_matches(cpu_vuln_whitelist, NO_BHI) && ++ (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || ++ boot_cpu_has(X86_FEATURE_HYPERVISOR))) ++ setup_force_cpu_bug(X86_BUG_BHI); ++ + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; + +diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c +index 20ab11aec60b8..e103c227acd3a 100644 +--- a/arch/x86/kernel/cpu/mce/core.c ++++ b/arch/x86/kernel/cpu/mce/core.c +@@ -2468,12 +2468,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, + return -EINVAL; + + b = &per_cpu(mce_banks_array, s->id)[bank]; +- + if (!b->init) + return -ENODEV; + + b->ctl = new; ++ ++ mutex_lock(&mce_sysfs_mutex); + mce_restart(); ++ mutex_unlock(&mce_sysfs_mutex); + + return size; + } +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c +index 0dad49a09b7a9..af5aa2c754c22 100644 +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, ++ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, +@@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, ++ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, + { 0, 0, 0, 0, 0 } + }; + +diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c +index bbc21798df10e..c58213bce294e 100644 +--- a/arch/x86/kernel/head64.c ++++ b/arch/x86/kernel/head64.c +@@ -41,6 +41,7 @@ + #include <asm/trapnr.h> + #include <asm/sev.h> + #include <asm/tdx.h> ++#include <asm/init.h> + + /* + * Manage page tables very early on. +@@ -84,8 +85,6 @@ static struct desc_ptr startup_gdt_descr = { + .address = 0, + }; + +-#define __head __section(".head.text") +- + static void __head *fixup_pointer(void *ptr, unsigned long physaddr) + { + return ptr - (void *)_text + (void *)physaddr; +diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c +index 15c700d358700..b223922248e9f 100644 +--- a/arch/x86/kernel/mpparse.c ++++ b/arch/x86/kernel/mpparse.c +@@ -196,12 +196,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) + if (!smp_check_mpc(mpc, oem, str)) + return 0; + +- if (early) { +- /* Initialize the lapic mapping */ +- if (!acpi_lapic) +- register_lapic_address(mpc->lapic); ++ /* Initialize the lapic mapping */ ++ if (!acpi_lapic) ++ register_lapic_address(mpc->lapic); ++ ++ if (early) + return 1; +- } + + /* Now process the configuration blocks. */ + while (count < mpc->length) { +diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c +index e63a8d05ce298..eb129277dcdd6 100644 +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -35,6 +35,7 @@ + #include <asm/bios_ebda.h> + #include <asm/bugs.h> + #include <asm/cacheinfo.h> ++#include <asm/coco.h> + #include <asm/cpu.h> + #include <asm/efi.h> + #include <asm/gart.h> +@@ -1120,6 +1121,7 @@ void __init setup_arch(char **cmdline_p) + * memory size. + */ + sev_setup_arch(); ++ cc_random_init(); + + efi_fake_memmap(); + efi_find_mirror(); +diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c +index 466fe09898ccd..acbec4de3ec31 100644 +--- a/arch/x86/kernel/sev-shared.c ++++ b/arch/x86/kernel/sev-shared.c +@@ -89,7 +89,8 @@ static bool __init sev_es_check_cpu_features(void) + return true; + } + +-static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) ++static void __head __noreturn ++sev_es_terminate(unsigned int set, unsigned int reason) + { + u64 val = GHCB_MSR_TERM_REQ; + +@@ -326,13 +327,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid + */ + static const struct snp_cpuid_table *snp_cpuid_get_table(void) + { +- void *ptr; +- +- asm ("lea cpuid_table_copy(%%rip), %0" +- : "=r" (ptr) +- : "p" (&cpuid_table_copy)); +- +- return ptr; ++ return &RIP_REL_REF(cpuid_table_copy); + } + + /* +@@ -391,7 +386,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) + return xsave_size; + } + +-static bool ++static bool __head + snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) + { + const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); +@@ -528,7 +523,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value + * should be treated as fatal by caller. + */ +-static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) ++static int __head ++snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) + { + const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); + +@@ -570,7 +566,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le + * page yet, so it only supports the MSR based communication with the + * hypervisor and only the CPUID exit-code. + */ +-void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) ++void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) + { + unsigned int subfn = lower_bits(regs->cx, 32); + unsigned int fn = lower_bits(regs->ax, 32); +@@ -1016,7 +1012,8 @@ struct cc_setup_data { + * Search for a Confidential Computing blob passed in as a setup_data entry + * via the Linux Boot Protocol. + */ +-static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) ++static __head ++struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) + { + struct cc_setup_data *sd = NULL; + struct setup_data *hdr; +@@ -1043,7 +1040,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) + * mapping needs to be updated in sync with all the changes to virtual memory + * layout and related mapping facilities throughout the boot process. + */ +-static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) ++static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) + { + const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; + int i; +diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c +index a8db68a063c46..9905dc0e0b096 100644 +--- a/arch/x86/kernel/sev.c ++++ b/arch/x86/kernel/sev.c +@@ -26,6 +26,7 @@ + #include <linux/dmi.h> + #include <uapi/linux/sev-guest.h> + ++#include <asm/init.h> + #include <asm/cpu_entry_area.h> + #include <asm/stacktrace.h> + #include <asm/sev.h> +@@ -683,8 +684,9 @@ static u64 __init get_jump_table_addr(void) + return ret; + } + +-static void early_set_pages_state(unsigned long vaddr, unsigned long paddr, +- unsigned long npages, enum psc_op op) ++static void __head ++early_set_pages_state(unsigned long vaddr, unsigned long paddr, ++ unsigned long npages, enum psc_op op) + { + unsigned long paddr_end; + u64 val; +@@ -740,7 +742,7 @@ static void early_set_pages_state(unsigned long vaddr, unsigned long paddr, + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + } + +-void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, ++void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned long npages) + { + /* +@@ -2045,7 +2047,7 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs) + * + * Scan for the blob in that order. + */ +-static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) ++static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) + { + struct cc_blob_sev_info *cc_info; + +@@ -2071,7 +2073,7 @@ static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) + return cc_info; + } + +-bool __init snp_init(struct boot_params *bp) ++bool __head snp_init(struct boot_params *bp) + { + struct cc_blob_sev_info *cc_info; + +@@ -2093,7 +2095,7 @@ bool __init snp_init(struct boot_params *bp) + return true; + } + +-void __init __noreturn snp_abort(void) ++void __head __noreturn snp_abort(void) + { + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + } +diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S +index f15fb71f280e2..54a5596adaa61 100644 +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -139,10 +139,7 @@ SECTIONS + STATIC_CALL_TEXT + + ALIGN_ENTRY_TEXT_BEGIN +-#ifdef CONFIG_CPU_SRSO + *(.text..__x86.rethunk_untrain) +-#endif +- + ENTRY_TEXT + + #ifdef CONFIG_CPU_SRSO +@@ -520,12 +517,12 @@ INIT_PER_CPU(irq_stack_backing_store); + "fixed_percpu_data is not at start of per-cpu area"); + #endif + +-#ifdef CONFIG_RETHUNK ++#ifdef CONFIG_CPU_UNRET_ENTRY + . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); +-. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); + #endif + + #ifdef CONFIG_CPU_SRSO ++. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); + /* + * GNU ld cannot do XOR until 2.41. + * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1 +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index f7901cb4d2fa4..11c484d72eab2 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -3120,7 +3120,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, + if (pud_none(pud) || !pud_present(pud)) + goto out; + +- if (pud_large(pud)) { ++ if (pud_leaf(pud)) { + level = PG_LEVEL_1G; + goto out; + } +diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h +index aadefcaa9561d..2f4e155080bad 100644 +--- a/arch/x86/kvm/reverse_cpuid.h ++++ b/arch/x86/kvm/reverse_cpuid.h +@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs { + #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) + #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) + #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) +-#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) ++#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) + #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) + + /* CPUID level 0x80000007 (EDX). */ +@@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = { + */ + static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) + { ++ BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); ++ BUILD_BUG_ON(x86_leaf == CPUID_LNX_5); + BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); + BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); + } +@@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature) + KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC); + KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); + KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); ++ KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); + default: + return x86_feature; + } +diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c +index e86231c3b8a54..c5845f31c34dc 100644 +--- a/arch/x86/kvm/svm/sev.c ++++ b/arch/x86/kvm/svm/sev.c +@@ -84,9 +84,10 @@ struct enc_region { + }; + + /* Called with the sev_bitmap_lock held, or on shutdown */ +-static int sev_flush_asids(int min_asid, int max_asid) ++static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) + { +- int ret, asid, error = 0; ++ int ret, error = 0; ++ unsigned int asid; + + /* Check if there are any ASIDs to reclaim before performing a flush */ + asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); +@@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm) + } + + /* Must be called with the sev_bitmap_lock held */ +-static bool __sev_recycle_asids(int min_asid, int max_asid) ++static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) + { + if (sev_flush_asids(min_asid, max_asid)) + return false; +@@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) + + static int sev_asid_new(struct kvm_sev_info *sev) + { +- int asid, min_asid, max_asid, ret; ++ /* ++ * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. ++ * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. ++ * Note: min ASID can end up larger than the max if basic SEV support is ++ * effectively disabled by disallowing use of ASIDs for SEV guests. ++ */ ++ unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; ++ unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; ++ unsigned int asid; + bool retry = true; ++ int ret; ++ ++ if (min_asid > max_asid) ++ return -ENOTTY; + + WARN_ON(sev->misc_cg); + sev->misc_cg = get_current_misc_cg(); +@@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev) + + mutex_lock(&sev_bitmap_lock); + +- /* +- * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. +- * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. +- */ +- min_asid = sev->es_active ? 1 : min_sev_asid; +- max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; + again: + asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); + if (asid > max_asid) { +@@ -187,7 +194,7 @@ static int sev_asid_new(struct kvm_sev_info *sev) + return ret; + } + +-static int sev_get_asid(struct kvm *kvm) ++static unsigned int sev_get_asid(struct kvm *kvm) + { + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + +@@ -284,8 +291,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) + + static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) + { ++ unsigned int asid = sev_get_asid(kvm); + struct sev_data_activate activate; +- int asid = sev_get_asid(kvm); + int ret; + + /* activate ASID on the given handle */ +@@ -2234,8 +2241,10 @@ void __init sev_hardware_setup(void) + goto out; + } + +- sev_asid_count = max_sev_asid - min_sev_asid + 1; +- WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); ++ if (min_sev_asid <= max_sev_asid) { ++ sev_asid_count = max_sev_asid - min_sev_asid + 1; ++ WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); ++ } + sev_supported = true; + + /* SEV-ES support requested? */ +@@ -2266,7 +2275,9 @@ void __init sev_hardware_setup(void) + out: + if (boot_cpu_has(X86_FEATURE_SEV)) + pr_info("SEV %s (ASIDs %u - %u)\n", +- sev_supported ? "enabled" : "disabled", ++ sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : ++ "unusable" : ++ "disabled", + min_sev_asid, max_sev_asid); + if (boot_cpu_has(X86_FEATURE_SEV_ES)) + pr_info("SEV-ES %s (ASIDs %u - %u)\n", +@@ -2314,7 +2325,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) + */ + static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) + { +- int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; ++ unsigned int asid = sev_get_asid(vcpu->kvm); + + /* + * Note! The address must be a kernel address, as regular page walk +@@ -2632,7 +2643,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) + void pre_sev_run(struct vcpu_svm *svm, int cpu) + { + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); +- int asid = sev_get_asid(svm->vcpu.kvm); ++ unsigned int asid = sev_get_asid(svm->vcpu.kvm); + + /* Assign the asid allocated with this SEV guest */ + svm->asid = asid; +diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h +index 83843379813ee..b82e6ed4f0241 100644 +--- a/arch/x86/kvm/trace.h ++++ b/arch/x86/kvm/trace.h +@@ -732,13 +732,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit, + * Tracepoint for nested #vmexit because of interrupt pending + */ + TRACE_EVENT(kvm_invlpga, +- TP_PROTO(__u64 rip, int asid, u64 address), ++ TP_PROTO(__u64 rip, unsigned int asid, u64 address), + TP_ARGS(rip, asid, address), + + TP_STRUCT__entry( +- __field( __u64, rip ) +- __field( int, asid ) +- __field( __u64, address ) ++ __field( __u64, rip ) ++ __field( unsigned int, asid ) ++ __field( __u64, address ) + ), + + TP_fast_assign( +@@ -747,7 +747,7 @@ TRACE_EVENT(kvm_invlpga, + __entry->address = address; + ), + +- TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", ++ TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", + __entry->rip, __entry->asid, __entry->address) + ); + +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S +index 139960deb7362..9522d46567f81 100644 +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) + + call vmx_spec_ctrl_restore_host + ++ CLEAR_BRANCH_HISTORY_VMEXIT ++ + /* Put return value in AX */ + mov %_ASM_BX, %_ASM_AX + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 4aafd007964fe..4ed8a7dc05369 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) + ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ + ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ +- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) ++ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) + + static u64 kvm_get_arch_capabilities(void) + { +diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile +index ea3a28e7b613c..f0dae4fb6d071 100644 +--- a/arch/x86/lib/Makefile ++++ b/arch/x86/lib/Makefile +@@ -14,19 +14,6 @@ ifdef CONFIG_KCSAN + CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE) + endif + +-# Early boot use of cmdline; don't instrument it +-ifdef CONFIG_AMD_MEM_ENCRYPT +-KCOV_INSTRUMENT_cmdline.o := n +-KASAN_SANITIZE_cmdline.o := n +-KCSAN_SANITIZE_cmdline.o := n +- +-ifdef CONFIG_FUNCTION_TRACER +-CFLAGS_REMOVE_cmdline.o = -pg +-endif +- +-CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables +-endif +- + inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk + inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt + quiet_cmd_inat_tables = GEN $@ +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index cd86aeb5fdd3e..ffa51f392e17a 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -126,12 +126,13 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) + #include <asm/GEN-for-each-reg.h> + #undef GEN + #endif +-/* +- * This function name is magical and is used by -mfunction-return=thunk-extern +- * for the compiler to generate JMPs to it. +- */ ++ + #ifdef CONFIG_RETHUNK + ++ .section .text..__x86.return_thunk ++ ++#ifdef CONFIG_CPU_SRSO ++ + /* + * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at + * special addresses: +@@ -147,9 +148,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) + * + * As a result, srso_alias_safe_ret() becomes a safe return. + */ +-#ifdef CONFIG_CPU_SRSO +- .section .text..__x86.rethunk_untrain +- ++ .pushsection .text..__x86.rethunk_untrain + SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR +@@ -158,17 +157,9 @@ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + jmp srso_alias_return_thunk + SYM_FUNC_END(srso_alias_untrain_ret) + __EXPORT_THUNK(srso_alias_untrain_ret) ++ .popsection + +- .section .text..__x86.rethunk_safe +-#else +-/* dummy definition for alternatives */ +-SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) +- ANNOTATE_UNRET_SAFE +- ret +- int3 +-SYM_FUNC_END(srso_alias_untrain_ret) +-#endif +- ++ .pushsection .text..__x86.rethunk_safe + SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) + lea 8(%_ASM_SP), %_ASM_SP + UNWIND_HINT_FUNC +@@ -177,14 +168,69 @@ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) + int3 + SYM_FUNC_END(srso_alias_safe_ret) + +- .section .text..__x86.return_thunk +- +-SYM_CODE_START(srso_alias_return_thunk) ++SYM_CODE_START_NOALIGN(srso_alias_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_alias_safe_ret + ud2 + SYM_CODE_END(srso_alias_return_thunk) ++ .popsection ++ ++/* ++ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() ++ * above. On kernel entry, srso_untrain_ret() is executed which is a ++ * ++ * movabs $0xccccc30824648d48,%rax ++ * ++ * and when the return thunk executes the inner label srso_safe_ret() ++ * later, it is a stack manipulation and a RET which is mispredicted and ++ * thus a "safe" one to use. ++ */ ++ .align 64 ++ .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc ++SYM_START(srso_untrain_ret, SYM_L_LOCAL, SYM_A_NONE) ++ ANNOTATE_NOENDBR ++ .byte 0x48, 0xb8 ++ ++/* ++ * This forces the function return instruction to speculate into a trap ++ * (UD2 in srso_return_thunk() below). This RET will then mispredict ++ * and execution will continue at the return site read from the top of ++ * the stack. ++ */ ++SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) ++ lea 8(%_ASM_SP), %_ASM_SP ++ ret ++ int3 ++ int3 ++ /* end of movabs */ ++ lfence ++ call srso_safe_ret ++ ud2 ++SYM_CODE_END(srso_safe_ret) ++SYM_FUNC_END(srso_untrain_ret) ++ ++SYM_CODE_START(srso_return_thunk) ++ UNWIND_HINT_FUNC ++ ANNOTATE_NOENDBR ++ call srso_safe_ret ++ ud2 ++SYM_CODE_END(srso_return_thunk) ++ ++#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" ++#else /* !CONFIG_CPU_SRSO */ ++#define JMP_SRSO_UNTRAIN_RET "ud2" ++/* Dummy for the alternative in CALL_UNTRAIN_RET. */ ++SYM_CODE_START(srso_alias_untrain_ret) ++ ANNOTATE_UNRET_SAFE ++ ANNOTATE_NOENDBR ++ ret ++ int3 ++SYM_FUNC_END(srso_alias_untrain_ret) ++__EXPORT_THUNK(srso_alias_untrain_ret) ++#endif /* CONFIG_CPU_SRSO */ ++ ++#ifdef CONFIG_CPU_UNRET_ENTRY + + /* + * Some generic notes on the untraining sequences: +@@ -266,65 +312,19 @@ SYM_CODE_END(retbleed_return_thunk) + SYM_FUNC_END(retbleed_untrain_ret) + __EXPORT_THUNK(retbleed_untrain_ret) + +-/* +- * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() +- * above. On kernel entry, srso_untrain_ret() is executed which is a +- * +- * movabs $0xccccc30824648d48,%rax +- * +- * and when the return thunk executes the inner label srso_safe_ret() +- * later, it is a stack manipulation and a RET which is mispredicted and +- * thus a "safe" one to use. +- */ +- .align 64 +- .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc +-SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) +- ANNOTATE_NOENDBR +- .byte 0x48, 0xb8 +- +-/* +- * This forces the function return instruction to speculate into a trap +- * (UD2 in srso_return_thunk() below). This RET will then mispredict +- * and execution will continue at the return site read from the top of +- * the stack. +- */ +-SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) +- lea 8(%_ASM_SP), %_ASM_SP +- ret +- int3 +- int3 +- /* end of movabs */ +- lfence +- call srso_safe_ret +- ud2 +-SYM_CODE_END(srso_safe_ret) +-SYM_FUNC_END(srso_untrain_ret) +-__EXPORT_THUNK(srso_untrain_ret) ++#define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret" ++#else /* !CONFIG_CPU_UNRET_ENTRY */ ++#define JMP_RETBLEED_UNTRAIN_RET "ud2" ++#endif /* CONFIG_CPU_UNRET_ENTRY */ + +-SYM_CODE_START(srso_return_thunk) +- UNWIND_HINT_FUNC +- ANNOTATE_NOENDBR +- call srso_safe_ret +- ud2 +-SYM_CODE_END(srso_return_thunk) ++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) + + SYM_FUNC_START(entry_untrain_ret) +- ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ +- "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ +- "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS ++ ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO + SYM_FUNC_END(entry_untrain_ret) + __EXPORT_THUNK(entry_untrain_ret) + +-SYM_CODE_START(__x86_return_thunk) +- UNWIND_HINT_FUNC +- ANNOTATE_NOENDBR +- ANNOTATE_UNRET_SAFE +- ret +- int3 +-SYM_CODE_END(__x86_return_thunk) +-EXPORT_SYMBOL(__x86_return_thunk) +- +-#endif /* CONFIG_RETHUNK */ ++#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */ + + #ifdef CONFIG_CALL_DEPTH_TRACKING + +@@ -359,3 +359,22 @@ SYM_FUNC_START(__x86_return_skl) + SYM_FUNC_END(__x86_return_skl) + + #endif /* CONFIG_CALL_DEPTH_TRACKING */ ++ ++/* ++ * This function name is magical and is used by -mfunction-return=thunk-extern ++ * for the compiler to generate JMPs to it. ++ * ++ * This code is only used during kernel boot or module init. All ++ * 'JMP __x86_return_thunk' sites are changed to something else by ++ * apply_returns(). ++ */ ++SYM_CODE_START(__x86_return_thunk) ++ UNWIND_HINT_FUNC ++ ANNOTATE_NOENDBR ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 ++SYM_CODE_END(__x86_return_thunk) ++EXPORT_SYMBOL(__x86_return_thunk) ++ ++#endif /* CONFIG_RETHUNK */ +diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c +index a9d69ec994b75..e238517968836 100644 +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -376,7 +376,7 @@ static void dump_pagetable(unsigned long address) + goto bad; + + pr_cont("PUD %lx ", pud_val(*pud)); +- if (!pud_present(*pud) || pud_large(*pud)) ++ if (!pud_present(*pud) || pud_leaf(*pud)) + goto out; + + pmd = pmd_offset(pud, address); +@@ -1037,7 +1037,7 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address) + if (!pud_present(*pud)) + return 0; + +- if (pud_large(*pud)) ++ if (pud_leaf(*pud)) + return spurious_kernel_fault_check(error_code, (pte_t *) pud); + + pmd = pmd_offset(pud, address); +diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c +index f50cc210a9818..968d7005f4a72 100644 +--- a/arch/x86/mm/ident_map.c ++++ b/arch/x86/mm/ident_map.c +@@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, + for (; addr < end; addr = next) { + pud_t *pud = pud_page + pud_index(addr); + pmd_t *pmd; +- bool use_gbpage; + + next = (addr & PUD_MASK) + PUD_SIZE; + if (next > end) + next = end; + +- /* if this is already a gbpage, this portion is already mapped */ +- if (pud_large(*pud)) +- continue; +- +- /* Is using a gbpage allowed? */ +- use_gbpage = info->direct_gbpages; +- +- /* Don't use gbpage if it maps more than the requested region. */ +- /* at the begining: */ +- use_gbpage &= ((addr & ~PUD_MASK) == 0); +- /* ... or at the end: */ +- use_gbpage &= ((next & ~PUD_MASK) == 0); +- +- /* Never overwrite existing mappings */ +- use_gbpage &= !pud_present(*pud); +- +- if (use_gbpage) { ++ if (info->direct_gbpages) { + pud_t pudval; + ++ if (pud_present(*pud)) ++ continue; ++ ++ addr &= PUD_MASK; + pudval = __pud((addr - info->offset) | info->page_flag); + set_pud(pud, pudval); + continue; +diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c +index a190aae8ceaf7..19d209b412d7a 100644 +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -617,7 +617,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, + } + + if (!pud_none(*pud)) { +- if (!pud_large(*pud)) { ++ if (!pud_leaf(*pud)) { + pmd = pmd_offset(pud, 0); + paddr_last = phys_pmd_init(pmd, paddr, + paddr_end, +@@ -1163,7 +1163,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, + if (!pud_present(*pud)) + continue; + +- if (pud_large(*pud) && ++ if (pud_leaf(*pud) && + IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE)) { + spin_lock(&init_mm.page_table_lock); +diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c +index 0302491d799d1..fcf508c52bdc5 100644 +--- a/arch/x86/mm/kasan_init_64.c ++++ b/arch/x86/mm/kasan_init_64.c +@@ -115,7 +115,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, + pud = pud_offset(p4d, addr); + do { + next = pud_addr_end(addr, end); +- if (!pud_large(*pud)) ++ if (!pud_leaf(*pud)) + kasan_populate_pud(pud, addr, next, nid); + } while (pud++, addr = next, addr != end); + } +diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c +index 0166ab1780ccb..cc47a818a640a 100644 +--- a/arch/x86/mm/mem_encrypt_identity.c ++++ b/arch/x86/mm/mem_encrypt_identity.c +@@ -41,9 +41,9 @@ + #include <linux/mem_encrypt.h> + #include <linux/cc_platform.h> + ++#include <asm/init.h> + #include <asm/setup.h> + #include <asm/sections.h> +-#include <asm/cmdline.h> + #include <asm/coco.h> + #include <asm/sev.h> + +@@ -95,10 +95,7 @@ struct sme_populate_pgd_data { + */ + static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); + +-static char sme_cmdline_arg[] __initdata = "mem_encrypt"; +-static char sme_cmdline_on[] __initdata = "on"; +- +-static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) ++static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) + { + unsigned long pgd_start, pgd_end, pgd_size; + pgd_t *pgd_p; +@@ -113,7 +110,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) + memset(pgd_p, 0, pgd_size); + } + +-static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) ++static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) + { + pgd_t *pgd; + p4d_t *p4d; +@@ -144,13 +141,13 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) + set_pud(pud, __pud(PUD_FLAGS | __pa(pmd))); + } + +- if (pud_large(*pud)) ++ if (pud_leaf(*pud)) + return NULL; + + return pud; + } + +-static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) ++static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) + { + pud_t *pud; + pmd_t *pmd; +@@ -166,7 +163,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) + set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); + } + +-static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) ++static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) + { + pud_t *pud; + pmd_t *pmd; +@@ -192,7 +189,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) + set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); + } + +-static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) ++static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) + { + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd_large(ppd); +@@ -202,7 +199,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) + } + } + +-static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) ++static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) + { + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd(ppd); +@@ -212,7 +209,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) + } + } + +-static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, ++static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, + pmdval_t pmd_flags, pteval_t pte_flags) + { + unsigned long vaddr_end; +@@ -236,22 +233,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, + __sme_map_range_pte(ppd); + } + +-static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) ++static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) + { + __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); + } + +-static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) ++static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) + { + __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); + } + +-static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) ++static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) + { + __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); + } + +-static unsigned long __init sme_pgtable_calc(unsigned long len) ++static unsigned long __head sme_pgtable_calc(unsigned long len) + { + unsigned long entries = 0, tables = 0; + +@@ -288,7 +285,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) + return entries + tables; + } + +-void __init sme_encrypt_kernel(struct boot_params *bp) ++void __head sme_encrypt_kernel(struct boot_params *bp) + { + unsigned long workarea_start, workarea_end, workarea_len; + unsigned long execute_start, execute_end, execute_len; +@@ -323,9 +320,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp) + * memory from being cached. + */ + +- /* Physical addresses gives us the identity mapped virtual addresses */ +- kernel_start = __pa_symbol(_text); +- kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); ++ kernel_start = (unsigned long)RIP_REL_REF(_text); ++ kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE); + kernel_len = kernel_end - kernel_start; + + initrd_start = 0; +@@ -342,14 +338,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp) + } + #endif + +- /* +- * We're running identity mapped, so we must obtain the address to the +- * SME encryption workarea using rip-relative addressing. +- */ +- asm ("lea sme_workarea(%%rip), %0" +- : "=r" (workarea_start) +- : "p" (sme_workarea)); +- + /* + * Calculate required number of workarea bytes needed: + * executable encryption area size: +@@ -359,7 +347,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) + * pagetable structures for the encryption of the kernel + * pagetable structures for workarea (in case not currently mapped) + */ +- execute_start = workarea_start; ++ execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea); + execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; + execute_len = execute_end - execute_start; + +@@ -502,13 +490,11 @@ void __init sme_encrypt_kernel(struct boot_params *bp) + native_write_cr3(__native_read_cr3()); + } + +-void __init sme_enable(struct boot_params *bp) ++void __head sme_enable(struct boot_params *bp) + { +- const char *cmdline_ptr, *cmdline_arg, *cmdline_on; + unsigned int eax, ebx, ecx, edx; + unsigned long feature_mask; + unsigned long me_mask; +- char buffer[16]; + bool snp; + u64 msr; + +@@ -551,6 +537,9 @@ void __init sme_enable(struct boot_params *bp) + + /* Check if memory encryption is enabled */ + if (feature_mask == AMD_SME_BIT) { ++ if (!(bp->hdr.xloadflags & XLF_MEM_ENCRYPTION)) ++ return; ++ + /* + * No SME if Hypervisor bit is set. This check is here to + * prevent a guest from trying to enable SME. For running as a +@@ -570,31 +559,8 @@ void __init sme_enable(struct boot_params *bp) + msr = __rdmsr(MSR_AMD64_SYSCFG); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) + return; +- } else { +- /* SEV state cannot be controlled by a command line option */ +- goto out; + } + +- /* +- * Fixups have not been applied to phys_base yet and we're running +- * identity mapped, so we must obtain the address to the SME command +- * line argument data using rip-relative addressing. +- */ +- asm ("lea sme_cmdline_arg(%%rip), %0" +- : "=r" (cmdline_arg) +- : "p" (sme_cmdline_arg)); +- asm ("lea sme_cmdline_on(%%rip), %0" +- : "=r" (cmdline_on) +- : "p" (sme_cmdline_on)); +- +- cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | +- ((u64)bp->ext_cmd_line_ptr << 32)); +- +- if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 || +- strncmp(buffer, cmdline_on, sizeof(buffer))) +- return; +- +-out: + RIP_REL_REF(sme_me_mask) = me_mask; + physical_mask &= ~me_mask; + cc_vendor = CC_VENDOR_AMD; +diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c +index de10800cd4dd4..e7b9ac63bb02a 100644 +--- a/arch/x86/mm/pat/memtype.c ++++ b/arch/x86/mm/pat/memtype.c +@@ -950,6 +950,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) + memtype_free(paddr, paddr + size); + } + ++static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, ++ pgprot_t *pgprot) ++{ ++ unsigned long prot; ++ ++ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); ++ ++ /* ++ * We need the starting PFN and cachemode used for track_pfn_remap() ++ * that covered the whole VMA. For most mappings, we can obtain that ++ * information from the page tables. For COW mappings, we might now ++ * suddenly have anon folios mapped and follow_phys() will fail. ++ * ++ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to ++ * detect the PFN. If we need the cachemode as well, we're out of luck ++ * for now and have to fail fork(). ++ */ ++ if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { ++ if (pgprot) ++ *pgprot = __pgprot(prot); ++ return 0; ++ } ++ if (is_cow_mapping(vma->vm_flags)) { ++ if (pgprot) ++ return -EINVAL; ++ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; ++ return 0; ++ } ++ WARN_ON_ONCE(1); ++ return -EINVAL; ++} ++ + /* + * track_pfn_copy is called when vma that is covering the pfnmap gets + * copied through copy_page_range(). +@@ -960,20 +992,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) + int track_pfn_copy(struct vm_area_struct *vma) + { + resource_size_t paddr; +- unsigned long prot; + unsigned long vma_size = vma->vm_end - vma->vm_start; + pgprot_t pgprot; + + if (vma->vm_flags & VM_PAT) { +- /* +- * reserve the whole chunk covered by vma. We need the +- * starting address and protection from pte. +- */ +- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { +- WARN_ON_ONCE(1); ++ if (get_pat_info(vma, &paddr, &pgprot)) + return -EINVAL; +- } +- pgprot = __pgprot(prot); ++ /* reserve the whole chunk covered by vma. */ + return reserve_pfn_range(paddr, vma_size, &pgprot, 1); + } + +@@ -1048,7 +1073,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size, bool mm_wr_locked) + { + resource_size_t paddr; +- unsigned long prot; + + if (vma && !(vma->vm_flags & VM_PAT)) + return; +@@ -1056,11 +1080,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, + /* free the chunk starting from pfn or the whole chunk */ + paddr = (resource_size_t)pfn << PAGE_SHIFT; + if (!paddr && !size) { +- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { +- WARN_ON_ONCE(1); ++ if (get_pat_info(vma, &paddr, NULL)) + return; +- } +- + size = vma->vm_end - vma->vm_start; + } + free_pfn_range(paddr, size); +diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c +index bda9f129835e9..f3c4c756fe1ee 100644 +--- a/arch/x86/mm/pat/set_memory.c ++++ b/arch/x86/mm/pat/set_memory.c +@@ -684,7 +684,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, + return NULL; + + *level = PG_LEVEL_1G; +- if (pud_large(*pud) || !pud_present(*pud)) ++ if (pud_leaf(*pud) || !pud_present(*pud)) + return (pte_t *)pud; + + pmd = pmd_offset(pud, address); +@@ -743,7 +743,7 @@ pmd_t *lookup_pmd_address(unsigned long address) + return NULL; + + pud = pud_offset(p4d, address); +- if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud)) ++ if (pud_none(*pud) || pud_leaf(*pud) || !pud_present(*pud)) + return NULL; + + return pmd_offset(pud, address); +@@ -1274,7 +1274,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end) + */ + while (end - start >= PUD_SIZE) { + +- if (pud_large(*pud)) ++ if (pud_leaf(*pud)) + pud_clear(pud); + else + unmap_pmd_range(pud, start, start + PUD_SIZE); +diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c +index 9deadf517f14a..8e1ef5345b7a8 100644 +--- a/arch/x86/mm/pgtable.c ++++ b/arch/x86/mm/pgtable.c +@@ -774,7 +774,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) + */ + int pud_clear_huge(pud_t *pud) + { +- if (pud_large(*pud)) { ++ if (pud_leaf(*pud)) { + pud_clear(pud); + return 1; + } +diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c +index 78414c6d1b5ed..51b6b78e6b175 100644 +--- a/arch/x86/mm/pti.c ++++ b/arch/x86/mm/pti.c +@@ -217,7 +217,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) + + pud = pud_offset(p4d, address); + /* The user page tables do not use large mappings: */ +- if (pud_large(*pud)) { ++ if (pud_leaf(*pud)) { + WARN_ON(1); + return NULL; + } +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index 955133077c105..a6a4d3ca8ddc6 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -344,7 +344,7 @@ static int emit_call(u8 **pprog, void *func, void *ip) + static int emit_rsb_call(u8 **pprog, void *func, void *ip) + { + OPTIMIZER_HIDE_VAR(func); +- x86_call_depth_emit_accounting(pprog, func); ++ ip += x86_call_depth_emit_accounting(pprog, func); + return emit_patch(pprog, func, ip, 0xE8); + } + +diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c +index 6f955eb1e1631..d8af46e677503 100644 +--- a/arch/x86/power/hibernate.c ++++ b/arch/x86/power/hibernate.c +@@ -170,7 +170,7 @@ int relocate_restore_code(void) + goto out; + } + pud = pud_offset(p4d, relocated_restore_code); +- if (pud_large(*pud)) { ++ if (pud_leaf(*pud)) { + set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); + goto out; + } +diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c +index b6830554ff690..9d4a9311e819b 100644 +--- a/arch/x86/xen/mmu_pv.c ++++ b/arch/x86/xen/mmu_pv.c +@@ -1082,7 +1082,7 @@ static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin) + pmd_t *pmd_tbl; + int i; + +- if (pud_large(*pud)) { ++ if (pud_leaf(*pud)) { + pa = pud_val(*pud) & PHYSICAL_PAGE_MASK; + xen_free_ro_pages(pa, PUD_SIZE); + return; +@@ -1863,7 +1863,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) + if (!pud_present(pud)) + return 0; + pa = pud_val(pud) & PTE_PFN_MASK; +- if (pud_large(pud)) ++ if (pud_leaf(pud)) + return pa + (vaddr & ~PUD_MASK); + + pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) * +diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c +index b91155ea9c343..c9131259f717b 100644 +--- a/drivers/acpi/acpica/dbnames.c ++++ b/drivers/acpi/acpica/dbnames.c +@@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle, + ACPI_FREE(buffer.pointer); + + buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; +- acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); +- ++ status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer); ++ if (ACPI_FAILURE(status)) { ++ acpi_os_printf("Could Not evaluate object %p\n", ++ obj_handle); ++ return (AE_OK); ++ } + /* + * Since this is a field unit, surround the output in braces + */ +diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c +index 45e48d653c60b..80a45e11fb5b6 100644 +--- a/drivers/ata/sata_mv.c ++++ b/drivers/ata/sata_mv.c +@@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = { + }, + }; + +-static const struct pci_device_id mv_pci_tbl[] = { +- { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, +- { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, +- { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, +- { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, +- /* RocketRAID 1720/174x have different identifiers */ +- { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, +- { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, +- { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, +- +- { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, +- { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, +- { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, +- { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, +- { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, +- +- { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, +- +- /* Adaptec 1430SA */ +- { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, +- +- /* Marvell 7042 support */ +- { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, +- +- /* Highpoint RocketRAID PCIe series */ +- { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, +- { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, +- +- { } /* terminate list */ +-}; +- + static const struct mv_hw_ops mv5xxx_ops = { + .phy_errata = mv5_phy_errata, + .enable_leds = mv5_enable_leds, +@@ -4300,6 +4269,36 @@ static int mv_pci_init_one(struct pci_dev *pdev, + static int mv_pci_device_resume(struct pci_dev *pdev); + #endif + ++static const struct pci_device_id mv_pci_tbl[] = { ++ { PCI_VDEVICE(MARVELL, 0x5040), chip_504x }, ++ { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, ++ { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, ++ { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, ++ /* RocketRAID 1720/174x have different identifiers */ ++ { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, ++ { PCI_VDEVICE(TTI, 0x1740), chip_6042 }, ++ { PCI_VDEVICE(TTI, 0x1742), chip_6042 }, ++ ++ { PCI_VDEVICE(MARVELL, 0x6040), chip_604x }, ++ { PCI_VDEVICE(MARVELL, 0x6041), chip_604x }, ++ { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 }, ++ { PCI_VDEVICE(MARVELL, 0x6080), chip_608x }, ++ { PCI_VDEVICE(MARVELL, 0x6081), chip_608x }, ++ ++ { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x }, ++ ++ /* Adaptec 1430SA */ ++ { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 }, ++ ++ /* Marvell 7042 support */ ++ { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 }, ++ ++ /* Highpoint RocketRAID PCIe series */ ++ { PCI_VDEVICE(TTI, 0x2300), chip_7042 }, ++ { PCI_VDEVICE(TTI, 0x2310), chip_7042 }, ++ ++ { } /* terminate list */ ++}; + + static struct pci_driver mv_pci_driver = { + .name = DRV_NAME, +@@ -4312,6 +4311,7 @@ static struct pci_driver mv_pci_driver = { + #endif + + }; ++MODULE_DEVICE_TABLE(pci, mv_pci_tbl); + + /** + * mv_print_info - Dump key info to kernel log for perusal. +@@ -4484,7 +4484,6 @@ static void __exit mv_exit(void) + MODULE_AUTHOR("Brett Russ"); + MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers"); + MODULE_LICENSE("GPL v2"); +-MODULE_DEVICE_TABLE(pci, mv_pci_tbl); + MODULE_VERSION(DRV_VERSION); + MODULE_ALIAS("platform:" DRV_NAME); + +diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c +index b51d7a9d0d90c..a482741eb181f 100644 +--- a/drivers/ata/sata_sx4.c ++++ b/drivers/ata/sata_sx4.c +@@ -957,8 +957,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, + + offset -= (idx * window_size); + idx++; +- dist = ((long) (window_size - (offset + size))) >= 0 ? size : +- (long) (window_size - offset); ++ dist = min(size, window_size - offset); + memcpy_fromio(psource, dimm_mmio + offset / 4, dist); + + psource += dist; +@@ -1005,8 +1004,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource, + readl(mmio + PDC_DIMM_WINDOW_CTLR); + offset -= (idx * window_size); + idx++; +- dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size : +- (long) (window_size - offset); ++ dist = min(size, window_size - offset); + memcpy_toio(dimm_mmio + offset / 4, psource, dist); + writel(0x01, mmio + PDC_GENERAL_CTLR); + readl(mmio + PDC_GENERAL_CTLR); +diff --git a/drivers/base/core.c b/drivers/base/core.c +index 2cc0ab8541680..0214288765c8c 100644 +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void); + static void __fw_devlink_link_to_consumers(struct device *dev); + static bool fw_devlink_drv_reg_done; + static bool fw_devlink_best_effort; ++static struct workqueue_struct *device_link_wq; + + /** + * __fwnode_link_add - Create a link between two fwnode_handles. +@@ -531,12 +532,26 @@ static void devlink_dev_release(struct device *dev) + /* + * It may take a while to complete this work because of the SRCU + * synchronization in device_link_release_fn() and if the consumer or +- * supplier devices get deleted when it runs, so put it into the "long" +- * workqueue. ++ * supplier devices get deleted when it runs, so put it into the ++ * dedicated workqueue. + */ +- queue_work(system_long_wq, &link->rm_work); ++ queue_work(device_link_wq, &link->rm_work); + } + ++/** ++ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate ++ */ ++void device_link_wait_removal(void) ++{ ++ /* ++ * devlink removal jobs are queued in the dedicated work queue. ++ * To be sure that all removal jobs are terminated, ensure that any ++ * scheduled work has run to completion. ++ */ ++ flush_workqueue(device_link_wq); ++} ++EXPORT_SYMBOL_GPL(device_link_wait_removal); ++ + static struct class devlink_class = { + .name = "devlink", + .dev_groups = devlink_groups, +@@ -4090,9 +4105,14 @@ int __init devices_init(void) + sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); + if (!sysfs_dev_char_kobj) + goto char_kobj_err; ++ device_link_wq = alloc_workqueue("device_link_wq", 0, 0); ++ if (!device_link_wq) ++ goto wq_err; + + return 0; + ++ wq_err: ++ kobject_put(sysfs_dev_char_kobj); + char_kobj_err: + kobject_put(sysfs_dev_block_kobj); + block_kobj_err: +diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c +index 41edd6a430eb4..55999a50ccc0b 100644 +--- a/drivers/base/regmap/regcache-maple.c ++++ b/drivers/base/regmap/regcache-maple.c +@@ -112,7 +112,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min, + unsigned long *entry, *lower, *upper; + unsigned long lower_index, lower_last; + unsigned long upper_index, upper_last; +- int ret; ++ int ret = 0; + + lower = NULL; + upper = NULL; +@@ -145,7 +145,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min, + upper_index = max + 1; + upper_last = mas.last; + +- upper = kmemdup(&entry[max + 1], ++ upper = kmemdup(&entry[max - mas.index + 1], + ((mas.last - max) * + sizeof(unsigned long)), + map->alloc_flags); +@@ -244,7 +244,7 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min, + unsigned long lmin = min; + unsigned long lmax = max; + unsigned int r, v, sync_start; +- int ret; ++ int ret = 0; + bool sync_needed = false; + + map->cache_bypass = true; +diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c +index 0211f704a358b..5277090c6d6d7 100644 +--- a/drivers/bluetooth/btqca.c ++++ b/drivers/bluetooth/btqca.c +@@ -758,11 +758,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup); + + int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) + { ++ bdaddr_t bdaddr_swapped; + struct sk_buff *skb; + int err; + +- skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr, +- HCI_EV_VENDOR, HCI_INIT_TIMEOUT); ++ baswap(&bdaddr_swapped, bdaddr); ++ ++ skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, ++ &bdaddr_swapped, HCI_EV_VENDOR, ++ HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err); +diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c +index f2d4985e036e4..8861b8017fbdf 100644 +--- a/drivers/bluetooth/hci_qca.c ++++ b/drivers/bluetooth/hci_qca.c +@@ -7,7 +7,6 @@ + * + * Copyright (C) 2007 Texas Instruments, Inc. + * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. +- * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * + * Acknowledgements: + * This file is based on hci_ll.c, which was... +@@ -226,6 +225,7 @@ struct qca_serdev { + struct qca_power *bt_power; + u32 init_speed; + u32 oper_speed; ++ bool bdaddr_property_broken; + const char *firmware_name; + }; + +@@ -1825,6 +1825,7 @@ static int qca_setup(struct hci_uart *hu) + const char *firmware_name = qca_get_firmware_name(hu); + int ret; + struct qca_btsoc_version ver; ++ struct qca_serdev *qcadev; + const char *soc_name; + + ret = qca_check_speeds(hu); +@@ -1882,16 +1883,11 @@ static int qca_setup(struct hci_uart *hu) + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: ++ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + +- /* Set BDA quirk bit for reading BDA value from fwnode property +- * only if that property exist in DT. +- */ +- if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { +- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); +- bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); +- } else { +- bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); +- } ++ qcadev = serdev_device_get_drvdata(hu->serdev); ++ if (qcadev->bdaddr_property_broken) ++ set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); + + hci_set_aosp_capable(hdev); + +@@ -2264,6 +2260,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) + if (!qcadev->oper_speed) + BT_DBG("UART will pick default operating speed"); + ++ qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev, ++ "qcom,local-bd-address-broken"); ++ + if (data) + qcadev->btsoc_type = data->soc_type; + else +diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c +index c0979c8049b5a..661de4add4c72 100644 +--- a/drivers/dma-buf/st-dma-fence-chain.c ++++ b/drivers/dma-buf/st-dma-fence-chain.c +@@ -84,11 +84,11 @@ static int sanitycheck(void *arg) + return -ENOMEM; + + chain = mock_chain(NULL, f, 1); +- if (!chain) ++ if (chain) ++ dma_fence_enable_sw_signaling(chain); ++ else + err = -ENOMEM; + +- dma_fence_enable_sw_signaling(chain); +- + dma_fence_signal(f); + dma_fence_put(f); + +diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c +index bfa30625f5d03..3dc2f9aaf08db 100644 +--- a/drivers/firmware/efi/libstub/efi-stub-helper.c ++++ b/drivers/firmware/efi/libstub/efi-stub-helper.c +@@ -24,6 +24,8 @@ static bool efi_noinitrd; + static bool efi_nosoftreserve; + static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA); + ++int efi_mem_encrypt; ++ + bool __pure __efi_soft_reserve_enabled(void) + { + return !efi_nosoftreserve; +@@ -75,6 +77,12 @@ efi_status_t efi_parse_options(char const *cmdline) + efi_noinitrd = true; + } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { + efi_no5lvl = true; ++ } else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) && ++ !strcmp(param, "mem_encrypt") && val) { ++ if (parse_option_str(val, "on")) ++ efi_mem_encrypt = 1; ++ else if (parse_option_str(val, "off")) ++ efi_mem_encrypt = -1; + } else if (!strcmp(param, "efi") && val) { + efi_nochunk = parse_option_str(val, "nochunk"); + efi_novamap |= parse_option_str(val, "novamap"); +diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h +index c04b82ea40f21..fc18fd649ed77 100644 +--- a/drivers/firmware/efi/libstub/efistub.h ++++ b/drivers/firmware/efi/libstub/efistub.h +@@ -37,8 +37,8 @@ extern bool efi_no5lvl; + extern bool efi_nochunk; + extern bool efi_nokaslr; + extern int efi_loglevel; ++extern int efi_mem_encrypt; + extern bool efi_novamap; +- + extern const efi_system_table_t *efi_system_table; + + typedef union efi_dxe_services_table efi_dxe_services_table_t; +diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c +index 8307950fe3ced..e4ae3db727efa 100644 +--- a/drivers/firmware/efi/libstub/x86-stub.c ++++ b/drivers/firmware/efi/libstub/x86-stub.c +@@ -238,6 +238,15 @@ efi_status_t efi_adjust_memory_range_protection(unsigned long start, + rounded_end = roundup(start + size, EFI_PAGE_SIZE); + + if (memattr != NULL) { ++ status = efi_call_proto(memattr, set_memory_attributes, ++ rounded_start, ++ rounded_end - rounded_start, ++ EFI_MEMORY_RO); ++ if (status != EFI_SUCCESS) { ++ efi_warn("Failed to set EFI_MEMORY_RO attribute\n"); ++ return status; ++ } ++ + status = efi_call_proto(memattr, clear_memory_attributes, + rounded_start, + rounded_end - rounded_start, +@@ -816,7 +825,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) + + *kernel_entry = addr + entry; + +- return efi_adjust_memory_range_protection(addr, kernel_total_size); ++ return efi_adjust_memory_range_protection(addr, kernel_text_size); + } + + static void __noreturn enter_kernel(unsigned long kernel_addr, +@@ -888,6 +897,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle, + } + } + ++ if (efi_mem_encrypt > 0) ++ hdr->xloadflags |= XLF_MEM_ENCRYPTION; ++ + status = efi_decompress_kernel(&kernel_entry); + if (status != EFI_SUCCESS) { + efi_err("Failed to decompress kernel\n"); +diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c +index 4f3e66ece7f78..84125e55de101 100644 +--- a/drivers/gpio/gpiolib-cdev.c ++++ b/drivers/gpio/gpiolib-cdev.c +@@ -655,6 +655,25 @@ static u32 line_event_id(int level) + GPIO_V2_LINE_EVENT_FALLING_EDGE; + } + ++static inline char *make_irq_label(const char *orig) ++{ ++ char *new; ++ ++ if (!orig) ++ return NULL; ++ ++ new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL); ++ if (!new) ++ return ERR_PTR(-ENOMEM); ++ ++ return new; ++} ++ ++static inline void free_irq_label(const char *label) ++{ ++ kfree(label); ++} ++ + #ifdef CONFIG_HTE + + static enum hte_return process_hw_ts_thread(void *p) +@@ -942,6 +961,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us) + { + unsigned long irqflags; + int ret, level, irq; ++ char *label; + + /* try hardware */ + ret = gpiod_set_debounce(line->desc, debounce_period_us); +@@ -964,11 +984,17 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us) + if (irq < 0) + return -ENXIO; + ++ label = make_irq_label(line->req->label); ++ if (IS_ERR(label)) ++ return -ENOMEM; ++ + irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING; + ret = request_irq(irq, debounce_irq_handler, irqflags, +- line->req->label, line); +- if (ret) ++ label, line); ++ if (ret) { ++ free_irq_label(label); + return ret; ++ } + line->irq = irq; + } else { + ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH); +@@ -1013,7 +1039,7 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc, + static void edge_detector_stop(struct line *line) + { + if (line->irq) { +- free_irq(line->irq, line); ++ free_irq_label(free_irq(line->irq, line)); + line->irq = 0; + } + +@@ -1038,6 +1064,7 @@ static int edge_detector_setup(struct line *line, + unsigned long irqflags = 0; + u64 eflags; + int irq, ret; ++ char *label; + + eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS; + if (eflags && !kfifo_initialized(&line->req->events)) { +@@ -1074,11 +1101,17 @@ static int edge_detector_setup(struct line *line, + IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING; + irqflags |= IRQF_ONESHOT; + ++ label = make_irq_label(line->req->label); ++ if (IS_ERR(label)) ++ return PTR_ERR(label); ++ + /* Request a thread to read the events */ + ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread, +- irqflags, line->req->label, line); +- if (ret) ++ irqflags, label, line); ++ if (ret) { ++ free_irq_label(label); + return ret; ++ } + + line->irq = irq; + return 0; +@@ -1943,7 +1976,7 @@ static void lineevent_free(struct lineevent_state *le) + blocking_notifier_chain_unregister(&le->gdev->device_notifier, + &le->device_unregistered_nb); + if (le->irq) +- free_irq(le->irq, le); ++ free_irq_label(free_irq(le->irq, le)); + if (le->desc) + gpiod_free(le->desc); + kfree(le->label); +@@ -2091,6 +2124,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) + int fd; + int ret; + int irq, irqflags = 0; ++ char *label; + + if (copy_from_user(&eventreq, ip, sizeof(eventreq))) + return -EFAULT; +@@ -2175,15 +2209,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) + if (ret) + goto out_free_le; + ++ label = make_irq_label(le->label); ++ if (IS_ERR(label)) { ++ ret = PTR_ERR(label); ++ goto out_free_le; ++ } ++ + /* Request a thread to read the events */ + ret = request_threaded_irq(irq, + lineevent_irq_handler, + lineevent_irq_thread, + irqflags, +- le->label, ++ label, + le); +- if (ret) ++ if (ret) { ++ free_irq_label(label); + goto out_free_le; ++ } + + le->irq = irq; + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 85efd686e538d..d59e8536192ca 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -1369,6 +1369,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, + void amdgpu_driver_release_kms(struct drm_device *dev); + + int amdgpu_device_ip_suspend(struct amdgpu_device *adev); ++int amdgpu_device_prepare(struct drm_device *dev); + int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); + int amdgpu_device_resume(struct drm_device *dev, bool fbcon); + u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index 79261bec26542..062d78818da16 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -1549,6 +1549,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, + } else { + pr_info("switched off\n"); + dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; ++ amdgpu_device_prepare(dev); + amdgpu_device_suspend(dev, true); + amdgpu_device_cache_pci_state(pdev); + /* Shut down the device */ +@@ -4094,6 +4095,43 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) + /* + * Suspend & resume. + */ ++/** ++ * amdgpu_device_prepare - prepare for device suspend ++ * ++ * @dev: drm dev pointer ++ * ++ * Prepare to put the hw in the suspend state (all asics). ++ * Returns 0 for success or an error on failure. ++ * Called at driver suspend. ++ */ ++int amdgpu_device_prepare(struct drm_device *dev) ++{ ++ struct amdgpu_device *adev = drm_to_adev(dev); ++ int i, r; ++ ++ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) ++ return 0; ++ ++ /* Evict the majority of BOs before starting suspend sequence */ ++ r = amdgpu_device_evict_resources(adev); ++ if (r) ++ return r; ++ ++ flush_delayed_work(&adev->gfx.gfx_off_delay_work); ++ ++ for (i = 0; i < adev->num_ip_blocks; i++) { ++ if (!adev->ip_blocks[i].status.valid) ++ continue; ++ if (!adev->ip_blocks[i].version->funcs->prepare_suspend) ++ continue; ++ r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); ++ if (r) ++ return r; ++ } ++ ++ return 0; ++} ++ + /** + * amdgpu_device_suspend - initiate device suspend + * +@@ -4114,11 +4152,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) + + adev->in_suspend = true; + +- /* Evict the majority of BOs before grabbing the full access */ +- r = amdgpu_device_evict_resources(adev); +- if (r) +- return r; +- + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_fini_data_exchange(adev); + r = amdgpu_virt_request_full_gpu(adev, false); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 3204c3a42f2a3..f9bc38d20ce3e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -2386,8 +2386,9 @@ static int amdgpu_pmops_prepare(struct device *dev) + /* Return a positive number here so + * DPM_FLAG_SMART_SUSPEND works properly + */ +- if (amdgpu_device_supports_boco(drm_dev)) +- return pm_runtime_suspended(dev); ++ if (amdgpu_device_supports_boco(drm_dev) && ++ pm_runtime_suspended(dev)) ++ return 1; + + /* if we will not support s3 or s2i for the device + * then skip suspend +@@ -2396,7 +2397,7 @@ static int amdgpu_pmops_prepare(struct device *dev) + !amdgpu_acpi_is_s3_active(adev)) + return 1; + +- return 0; ++ return amdgpu_device_prepare(drm_dev); + } + + static void amdgpu_pmops_complete(struct device *dev) +@@ -2598,6 +2599,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_UNLOAD; + ++ ret = amdgpu_device_prepare(drm_dev); ++ if (ret) ++ return ret; + ret = amdgpu_device_suspend(drm_dev, false); + if (ret) { + adev->in_runpm = false; +diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +index 251dd800a2a66..7b5c1498941dd 100644 +--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c ++++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +@@ -1179,9 +1179,10 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) + dto_params.timing = &pipe_ctx->stream->timing; + dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; + if (dccg) { +- dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); + dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); ++ if (dccg && dccg->funcs->set_dtbclk_dto) ++ dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + } + } else if (dccg && dccg->funcs->disable_symclk_se) { + dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, +diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +index 1e3803739ae61..12af2859002f7 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +@@ -2728,18 +2728,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) + } + + if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { +- dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; +- dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); +- +- phyd32clk = get_phyd32clk_src(link); +- dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); +- + dto_params.otg_inst = tg->inst; + dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10; + dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx); + dto_params.timing = &pipe_ctx->stream->timing; + dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); ++ dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; ++ dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); ++ ++ phyd32clk = get_phyd32clk_src(link); ++ dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); + } else { + } + if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) { +diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h +index abe829bbd54af..a9880fc531955 100644 +--- a/drivers/gpu/drm/amd/include/amd_shared.h ++++ b/drivers/gpu/drm/amd/include/amd_shared.h +@@ -295,6 +295,7 @@ struct amd_ip_funcs { + int (*hw_init)(void *handle); + int (*hw_fini)(void *handle); + void (*late_fini)(void *handle); ++ int (*prepare_suspend)(void *handle); + int (*suspend)(void *handle); + int (*resume)(void *handle); + bool (*is_idle)(void *handle); +diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c +index 7352bde299d54..03bd3c7bd0dc2 100644 +--- a/drivers/gpu/drm/drm_prime.c ++++ b/drivers/gpu/drm/drm_prime.c +@@ -582,7 +582,12 @@ int drm_gem_map_attach(struct dma_buf *dma_buf, + { + struct drm_gem_object *obj = dma_buf->priv; + +- if (!obj->funcs->get_sg_table) ++ /* ++ * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers ++ * that implement their own ->map_dma_buf() do not. ++ */ ++ if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf && ++ !obj->funcs->get_sg_table) + return -ENOSYS; + + return drm_gem_pin(obj); +diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile +index 79f65eff6bb2a..23400313d8a64 100644 +--- a/drivers/gpu/drm/i915/Makefile ++++ b/drivers/gpu/drm/i915/Makefile +@@ -104,6 +104,7 @@ gt-y += \ + gt/intel_ggtt_fencing.o \ + gt/intel_gt.o \ + gt/intel_gt_buffer_pool.o \ ++ gt/intel_gt_ccs_mode.o \ + gt/intel_gt_clock_utils.o \ + gt/intel_gt_debugfs.o \ + gt/intel_gt_engines_debugfs.o \ +diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c +index b342fad180ca5..61df6cd3f3778 100644 +--- a/drivers/gpu/drm/i915/display/intel_cursor.c ++++ b/drivers/gpu/drm/i915/display/intel_cursor.c +@@ -23,6 +23,8 @@ + #include "intel_psr.h" + #include "skl_watermark.h" + ++#include "gem/i915_gem_object.h" ++ + /* Cursor formats */ + static const u32 intel_cursor_formats[] = { + DRM_FORMAT_ARGB8888, +@@ -32,12 +34,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) + { + struct drm_i915_private *dev_priv = + to_i915(plane_state->uapi.plane->dev); +- const struct drm_framebuffer *fb = plane_state->hw.fb; +- const struct drm_i915_gem_object *obj = intel_fb_obj(fb); + u32 base; + + if (DISPLAY_INFO(dev_priv)->cursor_needs_physical) +- base = sg_dma_address(obj->mm.pages->sgl); ++ base = plane_state->phys_dma_addr; + else + base = intel_plane_ggtt_offset(plane_state); + +diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h +index 7fc92b1474cc4..8b0dc2b75da4a 100644 +--- a/drivers/gpu/drm/i915/display/intel_display_types.h ++++ b/drivers/gpu/drm/i915/display/intel_display_types.h +@@ -701,6 +701,7 @@ struct intel_plane_state { + #define PLANE_HAS_FENCE BIT(0) + + struct intel_fb_view view; ++ u32 phys_dma_addr; /* for cursor_needs_physical */ + + /* Plane pxp decryption state */ + bool decrypt; +diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c +index fffd568070d41..a131656757f2b 100644 +--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c ++++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c +@@ -254,6 +254,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) + return PTR_ERR(vma); + + plane_state->ggtt_vma = vma; ++ ++ /* ++ * Pre-populate the dma address before we enter the vblank ++ * evade critical section as i915_gem_object_get_dma_address() ++ * will trigger might_sleep() even if it won't actually sleep, ++ * which is the case when the fb has already been pinned. ++ */ ++ if (phys_cursor) ++ plane_state->phys_dma_addr = ++ i915_gem_object_get_dma_address(intel_fb_obj(fb), 0); + } else { + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + +diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c +index ffc15d278a39d..d557ecd4e1ebe 100644 +--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c ++++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c +@@ -20,6 +20,7 @@ + #include "skl_scaler.h" + #include "skl_universal_plane.h" + #include "skl_watermark.h" ++#include "gt/intel_gt.h" + #include "pxp/intel_pxp.h" + + static const u32 skl_plane_formats[] = { +@@ -2169,8 +2170,8 @@ static bool skl_plane_has_rc_ccs(struct drm_i915_private *i915, + enum pipe pipe, enum plane_id plane_id) + { + /* Wa_14017240301 */ +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) ++ if (IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 71), STEP_A0, STEP_B0)) + return false; + + /* Wa_22011186057 */ +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c +index d24c0ce8805c7..19156ba4b9ef4 100644 +--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c +@@ -405,8 +405,8 @@ static int ext_set_pat(struct i915_user_extension __user *base, void *data) + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != + offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); + +- /* Limiting the extension only to Meteor Lake */ +- if (!IS_METEORLAKE(i915)) ++ /* Limiting the extension only to Xe_LPG and beyond */ ++ if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)) + return -ENODEV; + + if (copy_from_user(&ext, base, sizeof(ext))) +diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +index 7ad36198aab2a..cddf8c16e9a72 100644 +--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c ++++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +@@ -4,9 +4,9 @@ + */ + + #include "gen8_engine_cs.h" +-#include "i915_drv.h" + #include "intel_engine_regs.h" + #include "intel_gpu_commands.h" ++#include "intel_gt.h" + #include "intel_lrc.h" + #include "intel_ring.h" + +@@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) + static int mtl_dummy_pipe_control(struct i915_request *rq) + { + /* Wa_14016712196 */ +- if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) { ++ if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || ++ IS_DG2(rq->i915)) { + u32 *cs; + + /* dummy PIPE_CONTROL + depth flush */ +@@ -808,6 +808,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) + u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) + { + struct drm_i915_private *i915 = rq->i915; ++ struct intel_gt *gt = rq->engine->gt; + u32 flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | + PIPE_CONTROL_TILE_CACHE_FLUSH | +@@ -818,8 +819,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) + PIPE_CONTROL_FLUSH_ENABLE); + + /* Wa_14016712196 */ +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) ++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) + /* dummy PIPE_CONTROL + depth flush */ + cs = gen12_emit_pipe_control(cs, 0, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); +diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c +index e85d70a62123f..765387639dabb 100644 +--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c ++++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c +@@ -912,6 +912,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) + info->engine_mask &= ~BIT(GSC0); + } + ++ /* ++ * Do not create the command streamer for CCS slices beyond the first. ++ * All the workload submitted to the first engine will be shared among ++ * all the slices. ++ * ++ * Once the user will be allowed to customize the CCS mode, then this ++ * check needs to be removed. ++ */ ++ if (IS_DG2(gt->i915)) { ++ u8 first_ccs = __ffs(CCS_MASK(gt)); ++ ++ /* Mask off all the CCS engine */ ++ info->engine_mask &= ~GENMASK(CCS3, CCS0); ++ /* Put back in the first CCS engine */ ++ info->engine_mask |= BIT(_CCS(first_ccs)); ++ } ++ + return info->engine_mask; + } + +@@ -1616,9 +1633,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, + * Wa_22011802037: Prior to doing a reset, ensure CS is + * stopped, set ring stop bit and prefetch disable bit to halt CS + */ +- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || +- (GRAPHICS_VER(engine->i915) >= 11 && +- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) ++ if (intel_engine_reset_needs_wa_22011802037(engine->gt)) + intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), + _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); + +diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c +index a95615b345cd7..5a3a5b29d1507 100644 +--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c ++++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c +@@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) + { + struct drm_i915_private *i915 = engine->i915; + +- if (IS_METEORLAKE(i915) && engine->id == GSC0) { ++ if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) { + intel_uncore_write(engine->gt->uncore, + RC_PSMI_CTRL_GSCCS, + _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); +diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +index 5a720e2523126..42e09f1589205 100644 +--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c ++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +@@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) + * Wa_22011802037: In addition to stopping the cs, we need + * to wait for any pending mi force wakeups + */ +- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || +- (GRAPHICS_VER(engine->i915) >= 11 && +- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) ++ if (intel_engine_reset_needs_wa_22011802037(engine->gt)) + intel_engine_wait_for_pending_mi_fw(engine); + + engine->execlists.reset_ccid = active_ccid(engine); +diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h +index 6c34547b58b59..6e63b46682f76 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gt.h ++++ b/drivers/gpu/drm/i915/gt/intel_gt.h +@@ -14,6 +14,37 @@ + struct drm_i915_private; + struct drm_printer; + ++/* ++ * Check that the GT is a graphics GT and has an IP version within the ++ * specified range (inclusive). ++ */ ++#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \ ++ BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \ ++ BUILD_BUG_ON_ZERO((until) < (from)) + \ ++ ((gt)->type != GT_MEDIA && \ ++ GRAPHICS_VER_FULL((gt)->i915) >= (from) && \ ++ GRAPHICS_VER_FULL((gt)->i915) <= (until))) ++ ++/* ++ * Check that the GT is a graphics GT with a specific IP version and has ++ * a stepping in the range [from, until). The lower stepping bound is ++ * inclusive, the upper bound is exclusive. The most common use-case of this ++ * macro is for checking bounds for workarounds, which usually have a stepping ++ * ("from") at which the hardware issue is first present and another stepping ++ * ("until") at which a hardware fix is present and the software workaround is ++ * no longer necessary. E.g., ++ * ++ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ++ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER) ++ * ++ * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper ++ * stepping bound for the specified IP version. ++ */ ++#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \ ++ BUILD_BUG_ON_ZERO((until) <= (from)) + \ ++ (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \ ++ IS_GRAPHICS_STEP((gt)->i915, (from), (until)))) ++ + #define GT_TRACE(gt, fmt, ...) do { \ + const struct intel_gt *gt__ __maybe_unused = (gt); \ + GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ +diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c +new file mode 100644 +index 0000000000000..044219c5960a5 +--- /dev/null ++++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c +@@ -0,0 +1,39 @@ ++// SPDX-License-Identifier: MIT ++/* ++ * Copyright © 2024 Intel Corporation ++ */ ++ ++#include "i915_drv.h" ++#include "intel_gt.h" ++#include "intel_gt_ccs_mode.h" ++#include "intel_gt_regs.h" ++ ++void intel_gt_apply_ccs_mode(struct intel_gt *gt) ++{ ++ int cslice; ++ u32 mode = 0; ++ int first_ccs = __ffs(CCS_MASK(gt)); ++ ++ if (!IS_DG2(gt->i915)) ++ return; ++ ++ /* Build the value for the fixed CCS load balancing */ ++ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) { ++ if (CCS_MASK(gt) & BIT(cslice)) ++ /* ++ * If available, assign the cslice ++ * to the first available engine... ++ */ ++ mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs); ++ ++ else ++ /* ++ * ... otherwise, mark the cslice as ++ * unavailable if no CCS dispatches here ++ */ ++ mode |= XEHP_CCS_MODE_CSLICE(cslice, ++ XEHP_CCS_MODE_CSLICE_MASK); ++ } ++ ++ intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode); ++} +diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h +new file mode 100644 +index 0000000000000..9e5549caeb269 +--- /dev/null ++++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h +@@ -0,0 +1,13 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Copyright © 2024 Intel Corporation ++ */ ++ ++#ifndef __INTEL_GT_CCS_MODE_H__ ++#define __INTEL_GT_CCS_MODE_H__ ++ ++struct intel_gt; ++ ++void intel_gt_apply_ccs_mode(struct intel_gt *gt); ++ ++#endif /* __INTEL_GT_CCS_MODE_H__ */ +diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +index 2c0f1f3e28ff8..c6dec485aefbe 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +@@ -3,8 +3,7 @@ + * Copyright © 2022 Intel Corporation + */ + +-#include "i915_drv.h" +- ++#include "intel_gt.h" + #include "intel_gt_mcr.h" + #include "intel_gt_print.h" + #include "intel_gt_regs.h" +@@ -166,8 +165,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) + gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table; + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + /* Wa_14016747170 */ +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) + fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK, + intel_uncore_read(gt->uncore, + MTL_GT_ACTIVITY_FACTOR)); +diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h +index 2cdfb2f713d02..64acab146b52f 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h ++++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h +@@ -1468,8 +1468,14 @@ + #define ECOBITS_PPGTT_CACHE4B (0 << 8) + + #define GEN12_RCU_MODE _MMIO(0x14800) ++#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) + #define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) + ++#define XEHP_CCS_MODE _MMIO(0x14804) ++#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */ ++#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1) ++#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH)) ++ + #define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168) + #define CHV_FGT_DISABLE_SS0 (1 << 10) + #define CHV_FGT_DISABLE_SS1 (1 << 11) +diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c +index c378cc7c953c4..b99efa348ad1e 100644 +--- a/drivers/gpu/drm/i915/gt/intel_lrc.c ++++ b/drivers/gpu/drm/i915/gt/intel_lrc.c +@@ -1316,29 +1316,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) + return cs; + } + +-/* +- * On DG2 during context restore of a preempted context in GPGPU mode, +- * RCS restore hang is detected. This is extremely timing dependent. +- * To address this below sw wabb is implemented for DG2 A steppings. +- */ +-static u32 * +-dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) +-{ +- *cs++ = MI_LOAD_REGISTER_IMM(1); +- *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base)); +- *cs++ = 0x21; +- +- *cs++ = MI_LOAD_REGISTER_REG; +- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); +- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1); +- +- *cs++ = MI_LOAD_REGISTER_REG; +- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); +- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2); +- +- return cs; +-} +- + /* + * The bspec's tuning guide asks us to program a vertical watermark value of + * 0x3FF. However this register is not saved/restored properly by the +@@ -1363,21 +1340,15 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) + cs = gen12_emit_cmd_buf_wa(ce, cs); + cs = gen12_emit_restore_scratch(ce, cs); + +- /* Wa_22011450934:dg2 */ +- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) || +- IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0)) +- cs = dg2_emit_rcs_hang_wabb(ce, cs); +- + /* Wa_16013000631:dg2 */ +- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || +- IS_DG2_G11(ce->engine->i915)) ++ if (IS_DG2_G11(ce->engine->i915)) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); + + cs = gen12_emit_aux_table_inv(ce->engine, cs); + + /* Wa_16014892111 */ +- if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) || ++ if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) || + IS_DG2(ce->engine->i915)) + cs = dg2_emit_draw_watermark_setting(cs); + +@@ -1391,8 +1362,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) + cs = gen12_emit_restore_scratch(ce, cs); + + /* Wa_16013000631:dg2 */ +- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || +- IS_DG2_G11(ce->engine->i915)) ++ if (IS_DG2_G11(ce->engine->i915)) + if (ce->engine->class == COMPUTE_CLASS) + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, +diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c +index 2c014407225cc..07269ff3be136 100644 +--- a/drivers/gpu/drm/i915/gt/intel_mocs.c ++++ b/drivers/gpu/drm/i915/gt/intel_mocs.c +@@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = { + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), + }; + +-static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { +- /* Wa_14011441408: Set Go to Memory for MOCS#0 */ +- MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), +- /* UC - Coherent; GO:Memory */ +- MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), +- /* UC - Non-Coherent; GO:Memory */ +- MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), +- +- /* WB - LC */ +- MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +-}; +- + static const struct drm_i915_mocs_entry pvc_mocs_table[] = { + /* Error */ + MOCS_ENTRY(0, 0, L3_3_WB), +@@ -507,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + + table->unused_entries_index = I915_MOCS_PTE; +- if (IS_METEORLAKE(i915)) { ++ if (IS_GFX_GT_IP_RANGE(&i915->gt0, IP_VER(12, 70), IP_VER(12, 71))) { + table->size = ARRAY_SIZE(mtl_mocs_table); + table->table = mtl_mocs_table; + table->n_entries = MTL_NUM_MOCS_ENTRIES; +@@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, + table->wb_index = 2; + table->unused_entries_index = 2; + } else if (IS_DG2(i915)) { +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { +- table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); +- table->table = dg2_mocs_table_g10_ax; +- } else { +- table->size = ARRAY_SIZE(dg2_mocs_table); +- table->table = dg2_mocs_table; +- } ++ table->size = ARRAY_SIZE(dg2_mocs_table); ++ table->table = dg2_mocs_table; + table->uc_index = 1; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->unused_entries_index = 3; +diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c +index ccdc1afbf11b5..9e113e9473260 100644 +--- a/drivers/gpu/drm/i915/gt/intel_rc6.c ++++ b/drivers/gpu/drm/i915/gt/intel_rc6.c +@@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) + GEN6_RC_CTL_EI_MODE(1); + + /* +- * Wa_16011777198 and BSpec 52698 - Render powergating must be off. ++ * BSpec 52698 - Render powergating must be off. + * FIXME BSpec is outdated, disabling powergating for MTL is just + * temporary wa and should be removed after fixing real cause + * of forcewake timeouts. + */ +- if (IS_METEORLAKE(gt->i915) || +- IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || +- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) ++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + pg_enable = + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; +diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c +index 5fa57a34cf4bb..13fb8e5042c58 100644 +--- a/drivers/gpu/drm/i915/gt/intel_reset.c ++++ b/drivers/gpu/drm/i915/gt/intel_reset.c +@@ -705,7 +705,7 @@ static int __reset_guc(struct intel_gt *gt) + + static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask) + { +- if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0)) ++ if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0)) + return false; + + if (!__HAS_ENGINE(engine_mask, GSC0)) +@@ -1632,6 +1632,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w) + w->gt = NULL; + } + ++/* ++ * Wa_22011802037 requires that we (or the GuC) ensure that no command ++ * streamers are executing MI_FORCE_WAKE while an engine reset is initiated. ++ */ ++bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt) ++{ ++ if (GRAPHICS_VER(gt->i915) < 11) ++ return false; ++ ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)) ++ return true; ++ ++ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) ++ return false; ++ ++ return true; ++} ++ + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) + #include "selftest_reset.c" + #include "selftest_hangcheck.c" +diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h +index 25c975b6e8fc0..f615b30b81c59 100644 +--- a/drivers/gpu/drm/i915/gt/intel_reset.h ++++ b/drivers/gpu/drm/i915/gt/intel_reset.h +@@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w); + bool intel_has_gpu_reset(const struct intel_gt *gt); + bool intel_has_reset_engine(const struct intel_gt *gt); + ++bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt); ++ + #endif /* I915_RESET_H */ +diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c +index 092542f53aad9..4feef874e6d69 100644 +--- a/drivers/gpu/drm/i915/gt/intel_rps.c ++++ b/drivers/gpu/drm/i915/gt/intel_rps.c +@@ -1161,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c + { + struct drm_i915_private *i915 = rps_to_i915(rps); + +- if (IS_METEORLAKE(i915)) ++ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + return mtl_get_freq_caps(rps, caps); + else + return __gen6_rps_get_freq_caps(rps, caps); +diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c +index 3ae0dbd39eaa3..be060b32bd9ce 100644 +--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c ++++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c +@@ -10,6 +10,7 @@ + #include "intel_engine_regs.h" + #include "intel_gpu_commands.h" + #include "intel_gt.h" ++#include "intel_gt_ccs_mode.h" + #include "intel_gt_mcr.h" + #include "intel_gt_regs.h" + #include "intel_ring.h" +@@ -50,7 +51,8 @@ + * registers belonging to BCS, VCS or VECS should be implemented in + * xcs_engine_wa_init(). Workarounds for registers not belonging to a specific + * engine's MMIO range but that are part of of the common RCS/CCS reset domain +- * should be implemented in general_render_compute_wa_init(). ++ * should be implemented in general_render_compute_wa_init(). The settings ++ * about the CCS load balancing should be added in ccs_engine_wa_mode(). + * + * - GT workarounds: the list of these WAs is applied whenever these registers + * revert to their default values: on GPU reset, suspend/resume [1]_, etc. +@@ -764,39 +766,15 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, + { + dg2_ctx_gt_tuning_init(engine, wal); + +- /* Wa_16011186671:dg2_g11 */ +- if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { +- wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); +- wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); +- } +- +- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { +- /* Wa_14010469329:dg2_g10 */ +- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, +- XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); +- +- /* +- * Wa_22010465075:dg2_g10 +- * Wa_22010613112:dg2_g10 +- * Wa_14010698770:dg2_g10 +- */ +- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, +- GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); +- } +- + /* Wa_16013271637:dg2 */ + wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE); + + /* Wa_14014947963:dg2 */ +- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || +- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) +- wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); ++ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + + /* Wa_18018764978:dg2 */ +- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) || +- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) +- wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); ++ wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); + + /* Wa_15010599737:dg2 */ + wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); +@@ -805,27 +783,32 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, + wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); + } + +-static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine, +- struct i915_wa_list *wal) ++static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, ++ struct i915_wa_list *wal) + { +- struct drm_i915_private *i915 = engine->i915; ++ struct intel_gt *gt = engine->gt; + + dg2_ctx_gt_tuning_init(engine, wal); + +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) ++ /* ++ * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in ++ * gen12_emit_indirect_ctx_rcs() rather than here on some early ++ * steppings. ++ */ ++ if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))) + wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); + } + +-static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine, +- struct i915_wa_list *wal) ++static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, ++ struct i915_wa_list *wal) + { +- struct drm_i915_private *i915 = engine->i915; ++ struct intel_gt *gt = engine->gt; + +- mtl_ctx_gt_tuning_init(engine, wal); ++ xelpg_ctx_gt_tuning_init(engine, wal); + +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { + /* Wa_14014947963 */ + wa_masked_field_set(wal, VF_PREEMPTION, + PREEMPTION_VERTEX_COUNT, 0x4000); +@@ -931,8 +914,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, + if (engine->class != RENDER_CLASS) + goto done; + +- if (IS_METEORLAKE(i915)) +- mtl_ctx_workarounds_init(engine, wal); ++ if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) ++ xelpg_ctx_workarounds_init(engine, wal); + else if (IS_PONTEVECCHIO(i915)) + ; /* noop; none at this time */ + else if (IS_DG2(i915)) +@@ -1606,31 +1589,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) + static void + dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) + { +- struct intel_engine_cs *engine; +- int id; +- + xehp_init_mcr(gt, wal); + + /* Wa_14011060649:dg2 */ + wa_14011060649(gt, wal); + +- /* +- * Although there are per-engine instances of these registers, +- * they technically exist outside the engine itself and are not +- * impacted by engine resets. Furthermore, they're part of the +- * GuC blacklist so trying to treat them as engine workarounds +- * will result in GuC initialization failure and a wedged GPU. +- */ +- for_each_engine(engine, gt, id) { +- if (engine->class != VIDEO_DECODE_CLASS) +- continue; +- +- /* Wa_16010515920:dg2_g10 */ +- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) +- wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base), +- ALNUNIT_CLKGATE_DIS); +- } +- + if (IS_DG2_G10(gt->i915)) { + /* Wa_22010523718:dg2 */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, +@@ -1641,65 +1604,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) + DSS_ROUTER_CLKGATE_DIS); + } + +- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || +- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { +- /* Wa_14012362059:dg2 */ +- wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); +- } +- +- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { +- /* Wa_14010948348:dg2_g10 */ +- wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); +- +- /* Wa_14011037102:dg2_g10 */ +- wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); +- +- /* Wa_14011371254:dg2_g10 */ +- wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); +- +- /* Wa_14011431319:dg2_g10 */ +- wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | +- GAMTLBVDBOX7_CLKGATE_DIS | +- GAMTLBVDBOX6_CLKGATE_DIS | +- GAMTLBVDBOX5_CLKGATE_DIS | +- GAMTLBVDBOX4_CLKGATE_DIS | +- GAMTLBVDBOX3_CLKGATE_DIS | +- GAMTLBVDBOX2_CLKGATE_DIS | +- GAMTLBVDBOX1_CLKGATE_DIS | +- GAMTLBVDBOX0_CLKGATE_DIS | +- GAMTLBKCR_CLKGATE_DIS | +- GAMTLBGUC_CLKGATE_DIS | +- GAMTLBBLT_CLKGATE_DIS); +- wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | +- GAMTLBGFXA1_CLKGATE_DIS | +- GAMTLBCOMPA0_CLKGATE_DIS | +- GAMTLBCOMPA1_CLKGATE_DIS | +- GAMTLBCOMPB0_CLKGATE_DIS | +- GAMTLBCOMPB1_CLKGATE_DIS | +- GAMTLBCOMPC0_CLKGATE_DIS | +- GAMTLBCOMPC1_CLKGATE_DIS | +- GAMTLBCOMPD0_CLKGATE_DIS | +- GAMTLBCOMPD1_CLKGATE_DIS | +- GAMTLBMERT_CLKGATE_DIS | +- GAMTLBVEBOX3_CLKGATE_DIS | +- GAMTLBVEBOX2_CLKGATE_DIS | +- GAMTLBVEBOX1_CLKGATE_DIS | +- GAMTLBVEBOX0_CLKGATE_DIS); +- +- /* Wa_14010569222:dg2_g10 */ +- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, +- GAMEDIA_CLKGATE_DIS); +- +- /* Wa_14011028019:dg2_g10 */ +- wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); +- +- /* Wa_14010680813:dg2_g10 */ +- wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL, +- CONTROL_BLOCK_CLKGATE_DIS | +- EGRESS_BLOCK_CLKGATE_DIS | +- TAG_BLOCK_CLKGATE_DIS); +- } +- + /* Wa_14014830051:dg2 */ + wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); + +@@ -1741,14 +1645,15 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) + static void + xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) + { +- /* Wa_14018778641 / Wa_18018781329 */ ++ /* Wa_14018575942 / Wa_18018781329 */ ++ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + + /* Wa_22016670082 */ + wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); + +- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) { ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { + /* Wa_14014830051 */ + wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); + +@@ -1791,10 +1696,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) + */ + static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) + { +- if (IS_METEORLAKE(gt->i915)) { +- if (gt->type != GT_MEDIA) +- wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); +- ++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { ++ wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); + } + +@@ -1826,7 +1729,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) + return; + } + +- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) ++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) + xelpg_gt_workarounds_init(gt, wal); + else if (IS_PONTEVECCHIO(i915)) + pvc_gt_workarounds_init(gt, wal); +@@ -2242,29 +2145,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) + + switch (engine->class) { + case RENDER_CLASS: +- /* +- * Wa_1507100340:dg2_g10 +- * +- * This covers 4 registers which are next to one another : +- * - PS_INVOCATION_COUNT +- * - PS_INVOCATION_COUNT_UDW +- * - PS_DEPTH_COUNT +- * - PS_DEPTH_COUNT_UDW +- */ +- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) +- whitelist_reg_ext(w, PS_INVOCATION_COUNT, +- RING_FORCE_TO_NONPRIV_ACCESS_RD | +- RING_FORCE_TO_NONPRIV_RANGE_4); +- + /* Required by recommended tuning setting (not a workaround) */ + whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3); + + break; +- case COMPUTE_CLASS: +- /* Wa_16011157294:dg2_g10 */ +- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) +- whitelist_reg(w, GEN9_CTX_PREEMPT_REG); +- break; + default: + break; + } +@@ -2294,7 +2178,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine) + blacklist_trtt(engine); + } + +-static void mtl_whitelist_build(struct intel_engine_cs *engine) ++static void xelpg_whitelist_build(struct intel_engine_cs *engine) + { + struct i915_wa_list *w = &engine->whitelist; + +@@ -2316,8 +2200,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) + + wa_init_start(w, engine->gt, "whitelist", engine->name); + +- if (IS_METEORLAKE(i915)) +- mtl_whitelist_build(engine); ++ if (engine->gt->type == GT_MEDIA) ++ ; /* none yet */ ++ else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) ++ xelpg_whitelist_build(engine); + else if (IS_PONTEVECCHIO(i915)) + pvc_whitelist_build(engine); + else if (IS_DG2(i915)) +@@ -2415,62 +2301,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) + } + } + +-static bool needs_wa_1308578152(struct intel_engine_cs *engine) +-{ +- return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >= +- GEN_DSS_PER_GSLICE; +-} +- + static void + rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) + { + struct drm_i915_private *i915 = engine->i915; ++ struct intel_gt *gt = engine->gt; + +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { + /* Wa_22014600077 */ + wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, + ENABLE_EU_COUNT_FOR_TDL_FLUSH); + } + +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || +- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || +- IS_DG2_G11(i915) || IS_DG2_G12(i915)) { ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || ++ IS_DG2(i915)) { + /* Wa_1509727124 */ + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + SC_DISABLE_POWER_OPTIMIZATION_EBB); + } + +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || +- IS_DG2_G11(i915) || IS_DG2_G12(i915) || +- IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) { ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_DG2(i915)) { + /* Wa_22012856258 */ + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + GEN12_DISABLE_READ_SUPPRESSION); + } + +- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { +- /* Wa_14013392000:dg2_g11 */ +- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); +- } +- +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || +- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { +- /* Wa_14012419201:dg2 */ +- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, +- GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); +- } +- +- /* Wa_1308578152:dg2_g10 when first gslice is fused off */ +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) && +- needs_wa_1308578152(engine)) { +- wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, +- GEN12_REPLAY_MODE_GRANULARITY); +- } +- +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || +- IS_DG2_G11(i915) || IS_DG2_G12(i915)) { ++ if (IS_DG2(i915)) { + /* + * Wa_22010960976:dg2 + * Wa_14013347512:dg2 +@@ -2479,34 +2338,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); + } + +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { +- /* +- * Wa_1608949956:dg2_g10 +- * Wa_14010198302:dg2_g10 +- */ +- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, +- MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); +- } +- +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) +- /* Wa_22010430635:dg2 */ +- wa_mcr_masked_en(wal, +- GEN9_ROW_CHICKEN4, +- GEN12_DISABLE_GRF_CLEAR); +- +- /* Wa_14013202645:dg2 */ +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || +- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) +- wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); +- +- /* Wa_22012532006:dg2 */ +- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || +- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) +- wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, +- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); +- +- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || +- IS_DG2_G10(i915)) { ++ if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { + /* Wa_22014600077:dg2 */ + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), +@@ -2514,6 +2346,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) + true); + } + ++ if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || ++ IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { ++ /* ++ * Wa_1606700617:tgl,dg1,adl-p ++ * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p ++ * Wa_14010826681:tgl,dg1,rkl,adl-p ++ * Wa_18019627453:dg2 ++ */ ++ wa_masked_en(wal, ++ GEN9_CS_DEBUG_MODE1, ++ FF_DOP_CLOCK_GATE_DISABLE); ++ } ++ + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || + IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { + /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ +@@ -2527,19 +2372,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) + */ + wa_write_or(wal, GEN7_FF_THREAD_MODE, + GEN12_FF_TESSELATION_DOP_GATE_DISABLE); +- } + +- if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) || +- IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { +- /* +- * Wa_1606700617:tgl,dg1,adl-p +- * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p +- * Wa_14010826681:tgl,dg1,rkl,adl-p +- * Wa_18019627453:dg2 +- */ +- wa_masked_en(wal, +- GEN9_CS_DEBUG_MODE1, +- FF_DOP_CLOCK_GATE_DISABLE); ++ /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ ++ wa_mcr_masked_en(wal, ++ GEN10_SAMPLER_MODE, ++ ENABLE_SMALLPL); + } + + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || +@@ -2566,14 +2403,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) + GEN8_RC_SEMA_IDLE_MSG_DISABLE); + } + +- if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || +- IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { +- /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ +- wa_mcr_masked_en(wal, +- GEN10_SAMPLER_MODE, +- ENABLE_SMALLPL); +- } +- + if (GRAPHICS_VER(i915) == 11) { + /* This is not an Wa. Enable for better image quality */ + wa_masked_en(wal, +@@ -2975,10 +2804,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) + * function invoked by __intel_engine_init_ctx_wa(). + */ + static void +-add_render_compute_tuning_settings(struct drm_i915_private *i915, ++add_render_compute_tuning_settings(struct intel_gt *gt, + struct i915_wa_list *wal) + { +- if (IS_METEORLAKE(i915) || IS_DG2(i915)) ++ struct drm_i915_private *i915 = gt->i915; ++ ++ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) + wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); + + /* +@@ -2994,6 +2825,28 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, + wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC); + } + ++static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) ++{ ++ struct intel_gt *gt = engine->gt; ++ ++ if (!IS_DG2(gt->i915)) ++ return; ++ ++ /* ++ * Wa_14019159160: This workaround, along with others, leads to ++ * significant challenges in utilizing load balancing among the ++ * CCS slices. Consequently, an architectural decision has been ++ * made to completely disable automatic CCS load balancing. ++ */ ++ wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE); ++ ++ /* ++ * After having disabled automatic load balancing we need to ++ * assign all slices to a single CCS. We will call it CCS mode 1 ++ */ ++ intel_gt_apply_ccs_mode(gt); ++} ++ + /* + * The workarounds in this function apply to shared registers in + * the general render reset domain that aren't tied to a +@@ -3007,8 +2860,9 @@ static void + general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) + { + struct drm_i915_private *i915 = engine->i915; ++ struct intel_gt *gt = engine->gt; + +- add_render_compute_tuning_settings(i915, wal); ++ add_render_compute_tuning_settings(gt, wal); + + if (GRAPHICS_VER(i915) >= 11) { + /* This is not a Wa (although referred to as +@@ -3029,13 +2883,14 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li + GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE); + } + +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) || ++ IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) + /* Wa_14017856879 */ + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); + +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) + /* + * Wa_14017066071 + * Wa_14017654203 +@@ -3043,37 +2898,47 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + MTL_DISABLE_SAMPLER_SC_OOO); + +- if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) + /* Wa_22015279794 */ + wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, + DISABLE_PREFETCH_INTO_IC); + +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || +- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || +- IS_DG2_G11(i915) || IS_DG2_G12(i915)) { ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || ++ IS_DG2(i915)) { + /* Wa_22013037850 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW); ++ ++ /* Wa_18017747507 */ ++ wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); + } + +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || ++ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || + IS_PONTEVECCHIO(i915) || + IS_DG2(i915)) { + /* Wa_22014226127 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); + } + +- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || +- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || +- IS_DG2(i915)) { +- /* Wa_18017747507 */ +- wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); ++ if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { ++ /* Wa_14015227452:dg2,pvc */ ++ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); ++ ++ /* Wa_16015675438:dg2,pvc */ ++ wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); ++ } ++ ++ if (IS_DG2(i915)) { ++ /* ++ * Wa_16011620976:dg2_g11 ++ * Wa_22015475538:dg2 ++ */ ++ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + } + +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || +- IS_DG2_G11(i915)) { ++ if (IS_DG2_G11(i915)) { + /* + * Wa_22012826095:dg2 + * Wa_22013059131:dg2 +@@ -3085,18 +2950,18 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li + /* Wa_22013059131:dg2 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, + FORCE_1_SUB_MESSAGE_PER_FRAGMENT); +- } + +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { + /* +- * Wa_14010918519:dg2_g10 ++ * Wa_22012654132 + * +- * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, +- * so ignoring verification. ++ * Note that register 0xE420 is write-only and cannot be read ++ * back for verification on DG2 (due to Wa_14012342262), so ++ * we need to explicitly skip the readback. + */ +- wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, +- FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, +- 0, false); ++ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, ++ _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), ++ 0 /* write-only, so skip validation */, ++ true); + } + + if (IS_XEHPSDV(i915)) { +@@ -3114,35 +2979,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + } +- +- if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { +- /* Wa_14015227452:dg2,pvc */ +- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); +- +- /* Wa_16015675438:dg2,pvc */ +- wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); +- } +- +- if (IS_DG2(i915)) { +- /* +- * Wa_16011620976:dg2_g11 +- * Wa_22015475538:dg2 +- */ +- wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); +- } +- +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915)) +- /* +- * Wa_22012654132 +- * +- * Note that register 0xE420 is write-only and cannot be read +- * back for verification on DG2 (due to Wa_14012342262), so +- * we need to explicitly skip the readback. +- */ +- wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, +- _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), +- 0 /* write-only, so skip validation */, +- true); + } + + static void +@@ -3158,8 +2994,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal + * to a single RCS/CCS engine's workaround list since + * they're reset as part of the general render domain reset. + */ +- if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) ++ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) { + general_render_compute_wa_init(engine, wal); ++ ccs_engine_wa_mode(engine, wal); ++ } + + if (engine->class == COMPUTE_CLASS) + ccs_engine_wa_init(engine, wal); +diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c +index 569b5fe94c416..861d0c58388cf 100644 +--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c ++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c +@@ -272,18 +272,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) + GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) + flags |= GUC_WA_POLLCS; + +- /* Wa_16011759253:dg2_g10:a0 */ +- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) +- flags |= GUC_WA_GAM_CREDITS; +- + /* Wa_14014475959 */ +- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || ++ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_DG2(gt->i915)) + flags |= GUC_WA_HOLD_CCS_SWITCHOUT; + + /* +- * Wa_14012197797:dg2_g10:a0,dg2_g11:a0 +- * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12 ++ * Wa_14012197797 ++ * Wa_22011391025 + * + * The same WA bit is used for both and 22011391025 is applicable to + * all DG2. +@@ -292,22 +288,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) + flags |= GUC_WA_DUAL_QUEUE; + + /* Wa_22011802037: graphics version 11/12 */ +- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || +- (GRAPHICS_VER(gt->i915) >= 11 && +- GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))) ++ if (intel_engine_reset_needs_wa_22011802037(gt)) + flags |= GUC_WA_PRE_PARSER; + +- /* Wa_16011777198:dg2 */ +- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || +- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) +- flags |= GUC_WA_RCS_RESET_BEFORE_RC6; +- + /* +- * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..) +- * Wa_22012727685:dg2_g11[a0..) ++ * Wa_22012727170 ++ * Wa_22012727685 + */ +- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || +- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) ++ if (IS_DG2_G11(gt->i915)) + flags |= GUC_WA_CONTEXT_ISOLATION; + + /* Wa_16015675438 */ +diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +index 836e4d9d65ef6..b5de5a9f59671 100644 +--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c ++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +@@ -1690,9 +1690,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) + * Wa_22011802037: In addition to stopping the cs, we need + * to wait for any pending mi force wakeups + */ +- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || +- (GRAPHICS_VER(engine->i915) >= 11 && +- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) { ++ if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { + intel_engine_stop_cs(engine); + intel_engine_wait_for_pending_mi_fw(engine); + } +@@ -4299,7 +4297,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) + + /* Wa_14014475959:dg2 */ + if (engine->class == COMPUTE_CLASS) +- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || ++ if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_DG2(engine->i915)) + engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + +diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c +index 4de44cf1026dc..7a90a2e32c9f1 100644 +--- a/drivers/gpu/drm/i915/i915_debugfs.c ++++ b/drivers/gpu/drm/i915/i915_debugfs.c +@@ -144,7 +144,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj) + { + struct drm_i915_private *i915 = obj_to_i915(obj); + +- if (IS_METEORLAKE(i915)) { ++ if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) { + switch (obj->pat_index) { + case 0: return " WB"; + case 1: return " WT"; +diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h +index 7a8ce7239bc9e..e0e0493d6c1f0 100644 +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -658,10 +658,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, + #define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \ + (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until)) + +-#define IS_MTL_GRAPHICS_STEP(__i915, variant, since, until) \ +- (IS_SUBPLATFORM(__i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_##variant) && \ +- IS_GRAPHICS_STEP(__i915, since, until)) +- + #define IS_MTL_DISPLAY_STEP(__i915, since, until) \ + (IS_METEORLAKE(__i915) && \ + IS_DISPLAY_STEP(__i915, since, until)) +diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c +index 8f4a25d2cfc24..3f90403d86cb4 100644 +--- a/drivers/gpu/drm/i915/i915_perf.c ++++ b/drivers/gpu/drm/i915/i915_perf.c +@@ -3255,11 +3255,10 @@ get_sseu_config(struct intel_sseu *out_sseu, + */ + u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) + { +- /* +- * Wa_18013179988:dg2 +- * Wa_14015846243:mtl +- */ +- if (IS_DG2(i915) || IS_METEORLAKE(i915)) { ++ struct intel_gt *gt = to_gt(i915); ++ ++ /* Wa_18013179988 */ ++ if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { + intel_wakeref_t wakeref; + u32 reg, shift; + +@@ -4564,7 +4563,7 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) + + static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) + { +- if (IS_METEORLAKE(perf->i915)) ++ if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70)) + return reg_in_range_table(addr, mtl_oa_mux_regs); + else + return reg_in_range_table(addr, gen12_oa_mux_regs); +diff --git a/drivers/gpu/drm/i915/intel_clock_gating.c b/drivers/gpu/drm/i915/intel_clock_gating.c +index 81a4d32734e94..c66eb6abd4a2e 100644 +--- a/drivers/gpu/drm/i915/intel_clock_gating.c ++++ b/drivers/gpu/drm/i915/intel_clock_gating.c +@@ -396,14 +396,6 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915) + /* Wa_22010954014:dg2 */ + intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, + SGSI_SIDECLK_DIS); +- +- /* +- * Wa_14010733611:dg2_g10 +- * Wa_22010146351:dg2_g10 +- */ +- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) +- intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, +- SGR_DIS | SGGI_DIS); + } + + static void pvc_init_clock_gating(struct drm_i915_private *i915) +diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +index aae780e4a4aa3..2bbcdc649e862 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -804,15 +804,15 @@ op_remap(struct drm_gpuva_op_remap *r, + struct drm_gpuva_op_unmap *u = r->unmap; + struct nouveau_uvma *uvma = uvma_from_va(u->va); + u64 addr = uvma->va.va.addr; +- u64 range = uvma->va.va.range; ++ u64 end = uvma->va.va.addr + uvma->va.va.range; + + if (r->prev) + addr = r->prev->va.addr + r->prev->va.range; + + if (r->next) +- range = r->next->va.addr - addr; ++ end = r->next->va.addr; + +- op_unmap_range(u, addr, range); ++ op_unmap_range(u, addr, end - addr); + } + + static int +diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c +index eca45b83e4e67..c067ff550692a 100644 +--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c ++++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c +@@ -387,19 +387,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev) + + gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present); + ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO, +- val, !val, 1, 1000); ++ val, !val, 1, 2000); + if (ret) + dev_err(pfdev->dev, "shader power transition timeout"); + + gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present); + ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO, +- val, !val, 1, 1000); ++ val, !val, 1, 2000); + if (ret) + dev_err(pfdev->dev, "tiler power transition timeout"); + + gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present); + ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO, +- val, !val, 0, 1000); ++ val, !val, 0, 2000); + if (ret) + dev_err(pfdev->dev, "l2 power transition timeout"); + } +diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c +index e7cd27e387df1..470add73f7bda 100644 +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -4231,7 +4231,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv + } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) { + log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval); + } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) { +- if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { ++ if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) { + r = -EINVAL; + ti->error = "Invalid bitmap_flush_interval argument"; + goto bad; +diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c +index 5ad51271a5349..b8fde22aebf93 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -5386,8 +5386,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { + .family = MV88E6XXX_FAMILY_6250, + .name = "Marvell 88E6020", + .num_databases = 64, +- .num_ports = 4, ++ /* Ports 2-4 are not routed to pins ++ * => usable ports 0, 1, 5, 6 ++ */ ++ .num_ports = 7, + .num_internal_phys = 2, ++ .invalid_port_mask = BIT(2) | BIT(3) | BIT(4), + .max_vid = 4095, + .port_base_addr = 0x8, + .phy_base_addr = 0x0, +diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c +index 833e55e4b9612..52ddb4ef259e9 100644 +--- a/drivers/net/dsa/sja1105/sja1105_mdio.c ++++ b/drivers/net/dsa/sja1105/sja1105_mdio.c +@@ -94,7 +94,7 @@ int sja1110_pcs_mdio_read_c45(struct mii_bus *bus, int phy, int mmd, int reg) + return tmp & 0xffff; + } + +-int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd, ++int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg, + u16 val) + { + struct sja1105_mdio_private *mdio_priv = bus->priv; +diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +index 9cae5a3090000..b3d04f49f77e9 100644 +--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c ++++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +@@ -391,7 +391,9 @@ static void umac_reset(struct bcmasp_intf *intf) + umac_wl(intf, 0x0, UMC_CMD); + umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD); + usleep_range(10, 100); +- umac_wl(intf, 0x0, UMC_CMD); ++ /* We hold the umac in reset and bring it out of ++ * reset when phy link is up. ++ */ + } + + static void umac_set_hw_addr(struct bcmasp_intf *intf, +@@ -411,6 +413,8 @@ static void umac_enable_set(struct bcmasp_intf *intf, u32 mask, + u32 reg; + + reg = umac_rl(intf, UMC_CMD); ++ if (reg & UMC_CMD_SW_RESET) ++ return; + if (enable) + reg |= mask; + else +@@ -429,7 +433,6 @@ static void umac_init(struct bcmasp_intf *intf) + umac_wl(intf, 0x800, UMC_FRM_LEN); + umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL); + umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ); +- umac_enable_set(intf, UMC_CMD_PROMISC, 1); + } + + static int bcmasp_tx_poll(struct napi_struct *napi, int budget) +@@ -656,6 +659,12 @@ static void bcmasp_adj_link(struct net_device *dev) + UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE | + UMC_CMD_TX_PAUSE_IGNORE); + reg |= cmd_bits; ++ if (reg & UMC_CMD_SW_RESET) { ++ reg &= ~UMC_CMD_SW_RESET; ++ umac_wl(intf, reg, UMC_CMD); ++ udelay(2); ++ reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC; ++ } + umac_wl(intf, reg, UMC_CMD); + + intf->eee.eee_active = phy_init_eee(phydev, 0) >= 0; +@@ -1061,9 +1070,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) + + umac_init(intf); + +- /* Disable the UniMAC RX/TX */ +- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0); +- + umac_set_hw_addr(intf, dev->dev_addr); + + intf->old_duplex = -1; +@@ -1083,9 +1089,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) + + bcmasp_enable_rx(intf, 1); + +- /* Turn on UniMAC TX/RX */ +- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1); +- + intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD); + + bcmasp_netif_start(dev); +@@ -1321,7 +1324,14 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf) + if (intf->wolopts & WAKE_FILTER) + bcmasp_netfilt_suspend(intf); + +- /* UniMAC receive needs to be turned on */ ++ /* Bring UniMAC out of reset if needed and enable RX */ ++ reg = umac_rl(intf, UMC_CMD); ++ if (reg & UMC_CMD_SW_RESET) ++ reg &= ~UMC_CMD_SW_RESET; ++ ++ reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC; ++ umac_wl(intf, reg, UMC_CMD); ++ + umac_enable_set(intf, UMC_CMD_RX_EN, 1); + + if (intf->parent->wol_irq > 0) { +diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c +index 54da59286df4e..7ca8cd78d5574 100644 +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -2381,8 +2381,6 @@ static int fec_enet_mii_probe(struct net_device *ndev) + fep->link = 0; + fep->full_duplex = 0; + +- phy_dev->mac_managed_pm = true; +- + phy_attached_info(phy_dev); + + return 0; +@@ -2394,10 +2392,12 @@ static int fec_enet_mii_init(struct platform_device *pdev) + struct net_device *ndev = platform_get_drvdata(pdev); + struct fec_enet_private *fep = netdev_priv(ndev); + bool suppress_preamble = false; ++ struct phy_device *phydev; + struct device_node *node; + int err = -ENXIO; + u32 mii_speed, holdtime; + u32 bus_freq; ++ int addr; + + /* + * The i.MX28 dual fec interfaces are not equal. +@@ -2511,6 +2511,13 @@ static int fec_enet_mii_init(struct platform_device *pdev) + goto err_out_free_mdiobus; + of_node_put(node); + ++ /* find all the PHY devices on the bus and set mac_managed_pm to true */ ++ for (addr = 0; addr < PHY_MAX_ADDR; addr++) { ++ phydev = mdiobus_get_phy(fep->mii_bus, addr); ++ if (phydev) ++ phydev->mac_managed_pm = true; ++ } ++ + mii_cnt++; + + /* save fec0 mii_bus */ +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c +index f3c9395d8351c..618f66d9586b3 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c +@@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, + hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS, + true); + +- desc.data[0] = cpu_to_le32(tqp->index & 0x1ff); ++ desc.data[0] = cpu_to_le32(tqp->index); + ret = hclge_comm_cmd_send(hw, &desc, 1); + if (ret) { + dev_err(&hw->cmq.csq.pdev->dev, +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +index 682239f33082b..78181eea93c1c 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +@@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = { + #define HNS3_NIC_LB_TEST_NO_MEM_ERR 1 + #define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 + #define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 ++#define HNS3_NIC_LB_TEST_UNEXECUTED 4 ++ ++static int hns3_get_sset_count(struct net_device *netdev, int stringset); + + static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en) + { +@@ -418,18 +421,26 @@ static void hns3_do_external_lb(struct net_device *ndev, + static void hns3_self_test(struct net_device *ndev, + struct ethtool_test *eth_test, u64 *data) + { ++ int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST); + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + int st_param[HNAE3_LOOP_NONE][2]; + bool if_running = netif_running(ndev); ++ int i; ++ ++ /* initialize the loopback test result, avoid marking an unexcuted ++ * loopback test as PASS. ++ */ ++ for (i = 0; i < cnt; i++) ++ data[i] = HNS3_NIC_LB_TEST_UNEXECUTED; + + if (hns3_nic_resetting(ndev)) { + netdev_err(ndev, "dev resetting!"); +- return; ++ goto failure; + } + + if (!(eth_test->flags & ETH_TEST_FL_OFFLINE)) +- return; ++ goto failure; + + if (netif_msg_ifdown(h)) + netdev_info(ndev, "self test start\n"); +@@ -451,6 +462,10 @@ static void hns3_self_test(struct net_device *ndev, + + if (netif_msg_ifdown(h)) + netdev_info(ndev, "self test end\n"); ++ return; ++ ++failure: ++ eth_test->flags |= ETH_TEST_FL_FAILED; + } + + static void hns3_update_limit_promisc_mode(struct net_device *netdev, +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +index f1ca2cda2961e..dfd0c5f4cb9f5 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -11614,6 +11614,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) + if (ret) + goto err_pci_uninit; + ++ devl_lock(hdev->devlink); ++ + /* Firmware command queue initialize */ + ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw); + if (ret) +@@ -11793,6 +11795,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) + + hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + ++ devl_unlock(hdev->devlink); + return 0; + + err_mdiobus_unreg: +@@ -11805,6 +11808,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) + err_cmd_uninit: + hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw); + err_devlink_uninit: ++ devl_unlock(hdev->devlink); + hclge_devlink_uninit(hdev); + err_pci_uninit: + pcim_iounmap(pdev, hdev->hw.hw.io_base); +diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c +index 4542e2bc28e8d..f9328f2e669f8 100644 +--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c ++++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c +@@ -5,6 +5,7 @@ + * Shared functions for accessing and configuring the MAC + */ + ++#include <linux/bitfield.h> + #include "e1000.h" + + static s32 e1000_check_downshift(struct e1000_hw *hw); +@@ -3260,8 +3261,7 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw, + return ret_val; + + phy_info->mdix_mode = +- (e1000_auto_x_mode) ((phy_data & IGP01E1000_PSSR_MDIX) >> +- IGP01E1000_PSSR_MDIX_SHIFT); ++ (e1000_auto_x_mode)FIELD_GET(IGP01E1000_PSSR_MDIX, phy_data); + + if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) == + IGP01E1000_PSSR_SPEED_1000MBPS) { +@@ -3272,11 +3272,11 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw, + if (ret_val) + return ret_val; + +- phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >> +- SR_1000T_LOCAL_RX_STATUS_SHIFT) ? ++ phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS, ++ phy_data) ? + e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; +- phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >> +- SR_1000T_REMOTE_RX_STATUS_SHIFT) ? ++ phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS, ++ phy_data) ? + e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; + + /* Get cable length */ +@@ -3326,14 +3326,12 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw, + return ret_val; + + phy_info->extended_10bt_distance = +- ((phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >> +- M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT) ? ++ FIELD_GET(M88E1000_PSCR_10BT_EXT_DIST_ENABLE, phy_data) ? + e1000_10bt_ext_dist_enable_lower : + e1000_10bt_ext_dist_enable_normal; + + phy_info->polarity_correction = +- ((phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >> +- M88E1000_PSCR_POLARITY_REVERSAL_SHIFT) ? ++ FIELD_GET(M88E1000_PSCR_POLARITY_REVERSAL, phy_data) ? + e1000_polarity_reversal_disabled : e1000_polarity_reversal_enabled; + + /* Check polarity status */ +@@ -3347,27 +3345,25 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw, + return ret_val; + + phy_info->mdix_mode = +- (e1000_auto_x_mode) ((phy_data & M88E1000_PSSR_MDIX) >> +- M88E1000_PSSR_MDIX_SHIFT); ++ (e1000_auto_x_mode)FIELD_GET(M88E1000_PSSR_MDIX, phy_data); + + if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { + /* Cable Length Estimation and Local/Remote Receiver Information + * are only valid at 1000 Mbps. + */ + phy_info->cable_length = +- (e1000_cable_length) ((phy_data & +- M88E1000_PSSR_CABLE_LENGTH) >> +- M88E1000_PSSR_CABLE_LENGTH_SHIFT); ++ (e1000_cable_length)FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, ++ phy_data); + + ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); + if (ret_val) + return ret_val; + +- phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >> +- SR_1000T_LOCAL_RX_STATUS_SHIFT) ? ++ phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS, ++ phy_data) ? + e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; +- phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >> +- SR_1000T_REMOTE_RX_STATUS_SHIFT) ? ++ phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS, ++ phy_data) ? + e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; + } + +@@ -3515,7 +3511,7 @@ s32 e1000_init_eeprom_params(struct e1000_hw *hw) + if (ret_val) + return ret_val; + eeprom_size = +- (eeprom_size & EEPROM_SIZE_MASK) >> EEPROM_SIZE_SHIFT; ++ FIELD_GET(EEPROM_SIZE_MASK, eeprom_size); + /* 256B eeprom size was not supported in earlier hardware, so we + * bump eeprom_size up one to ensure that "1" (which maps to + * 256B) is never the result used in the shifting logic below. +@@ -4891,8 +4887,7 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length, + &phy_data); + if (ret_val) + return ret_val; +- cable_length = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> +- M88E1000_PSSR_CABLE_LENGTH_SHIFT; ++ cable_length = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); + + /* Convert the enum value to ranged values */ + switch (cable_length) { +@@ -5001,8 +4996,7 @@ static s32 e1000_check_polarity(struct e1000_hw *hw, + &phy_data); + if (ret_val) + return ret_val; +- *polarity = ((phy_data & M88E1000_PSSR_REV_POLARITY) >> +- M88E1000_PSSR_REV_POLARITY_SHIFT) ? ++ *polarity = FIELD_GET(M88E1000_PSSR_REV_POLARITY, phy_data) ? + e1000_rev_polarity_reversed : e1000_rev_polarity_normal; + + } else if (hw->phy_type == e1000_phy_igp) { +@@ -5072,8 +5066,8 @@ static s32 e1000_check_downshift(struct e1000_hw *hw) + if (ret_val) + return ret_val; + +- hw->speed_downgraded = (phy_data & M88E1000_PSSR_DOWNSHIFT) >> +- M88E1000_PSSR_DOWNSHIFT_SHIFT; ++ hw->speed_downgraded = FIELD_GET(M88E1000_PSSR_DOWNSHIFT, ++ phy_data); + } + + return E1000_SUCCESS; +diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c +index be9c695dde127..c51fb6bf9c4e0 100644 +--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c ++++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c +@@ -92,8 +92,7 @@ static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw) + + nvm->type = e1000_nvm_eeprom_spi; + +- size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> +- E1000_EECD_SIZE_EX_SHIFT); ++ size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd); + + /* Added to a constant, "size" becomes the left-shift value + * for setting word_size. +diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c +index 0b1e890dd583b..969f855a79ee6 100644 +--- a/drivers/net/ethernet/intel/e1000e/82571.c ++++ b/drivers/net/ethernet/intel/e1000e/82571.c +@@ -157,8 +157,7 @@ static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw) + fallthrough; + default: + nvm->type = e1000_nvm_eeprom_spi; +- size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> +- E1000_EECD_SIZE_EX_SHIFT); ++ size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd); + /* Added to a constant, "size" becomes the left-shift value + * for setting word_size. + */ +diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c +index 9835e6a90d56c..fc0f98ea61332 100644 +--- a/drivers/net/ethernet/intel/e1000e/ethtool.c ++++ b/drivers/net/ethernet/intel/e1000e/ethtool.c +@@ -654,8 +654,8 @@ static void e1000_get_drvinfo(struct net_device *netdev, + */ + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d-%d", +- (adapter->eeprom_vers & 0xF000) >> 12, +- (adapter->eeprom_vers & 0x0FF0) >> 4, ++ FIELD_GET(0xF000, adapter->eeprom_vers), ++ FIELD_GET(0x0FF0, adapter->eeprom_vers), + (adapter->eeprom_vers & 0x000F)); + + strscpy(drvinfo->bus_info, pci_name(adapter->pdev), +@@ -925,8 +925,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) + } + + if (mac->type >= e1000_pch_lpt) +- wlock_mac = (er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK) >> +- E1000_FWSM_WLOCK_MAC_SHIFT; ++ wlock_mac = FIELD_GET(E1000_FWSM_WLOCK_MAC_MASK, er32(FWSM)); + + for (i = 0; i < mac->rar_entry_count; i++) { + if (mac->type >= e1000_pch_lpt) { +diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h +index 1fef6bb5a5fbc..4b6e7536170ab 100644 +--- a/drivers/net/ethernet/intel/e1000e/hw.h ++++ b/drivers/net/ethernet/intel/e1000e/hw.h +@@ -628,6 +628,7 @@ struct e1000_phy_info { + u32 id; + u32 reset_delay_us; /* in usec */ + u32 revision; ++ u32 retry_count; + + enum e1000_media_type media_type; + +@@ -644,6 +645,7 @@ struct e1000_phy_info { + bool polarity_correction; + bool speed_downgraded; + bool autoneg_wait_to_complete; ++ bool retry_enabled; + }; + + struct e1000_nvm_info { +diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c +index 39e9fc601bf5a..4d83c9a0c023a 100644 +--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c ++++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c +@@ -222,11 +222,18 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) + if (hw->mac.type >= e1000_pch_lpt) { + /* Only unforce SMBus if ME is not active */ + if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { ++ /* Switching PHY interface always returns MDI error ++ * so disable retry mechanism to avoid wasting time ++ */ ++ e1000e_disable_phy_retry(hw); ++ + /* Unforce SMBus mode in PHY */ + e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg); + phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg); + ++ e1000e_enable_phy_retry(hw); ++ + /* Unforce SMBus mode in MAC */ + mac_reg = er32(CTRL_EXT); + mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; +@@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) + goto out; + } + ++ /* There is no guarantee that the PHY is accessible at this time ++ * so disable retry mechanism to avoid wasting time ++ */ ++ e1000e_disable_phy_retry(hw); ++ + /* The MAC-PHY interconnect may be in SMBus mode. If the PHY is + * inaccessible and resetting the PHY is not blocked, toggle the + * LANPHYPC Value bit to force the interconnect to PCIe mode. +@@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) + break; + } + ++ e1000e_enable_phy_retry(hw); ++ + hw->phy.ops.release(hw); + if (!ret_val) { + +@@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) + + phy->id = e1000_phy_unknown; + ++ if (hw->mac.type == e1000_pch_mtp) { ++ phy->retry_count = 2; ++ e1000e_enable_phy_retry(hw); ++ } ++ + ret_val = e1000_init_phy_workarounds_pchlan(hw); + if (ret_val) + return ret_val; +@@ -1072,13 +1091,11 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) + + lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) * + (1U << (E1000_LTRV_SCALE_FACTOR * +- ((lat_enc & E1000_LTRV_SCALE_MASK) +- >> E1000_LTRV_SCALE_SHIFT))); ++ FIELD_GET(E1000_LTRV_SCALE_MASK, lat_enc))); + + max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) * +- (1U << (E1000_LTRV_SCALE_FACTOR * +- ((max_ltr_enc & E1000_LTRV_SCALE_MASK) +- >> E1000_LTRV_SCALE_SHIFT))); ++ (1U << (E1000_LTRV_SCALE_FACTOR * ++ FIELD_GET(E1000_LTRV_SCALE_MASK, max_ltr_enc))); + + if (lat_enc_d > max_ltr_enc_d) + lat_enc = max_ltr_enc; +@@ -1148,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) + if (ret_val) + goto out; + +- /* Force SMBus mode in PHY */ +- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); +- if (ret_val) +- goto release; +- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; +- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); +- +- /* Force SMBus mode in MAC */ +- mac_reg = er32(CTRL_EXT); +- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; +- ew32(CTRL_EXT, mac_reg); +- + /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable + * LPLU and disable Gig speed when entering ULP + */ +@@ -1315,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) + /* Toggle LANPHYPC Value bit */ + e1000_toggle_lanphypc_pch_lpt(hw); + ++ /* Switching PHY interface always returns MDI error ++ * so disable retry mechanism to avoid wasting time ++ */ ++ e1000e_disable_phy_retry(hw); ++ + /* Unforce SMBus mode in PHY */ + ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); + if (ret_val) { +@@ -1335,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) + phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; + e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); + ++ e1000e_enable_phy_retry(hw); ++ + /* Unforce SMBus mode in MAC */ + mac_reg = er32(CTRL_EXT); + mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; +@@ -2075,8 +2087,7 @@ static s32 e1000_write_smbus_addr(struct e1000_hw *hw) + { + u16 phy_data; + u32 strap = er32(STRAP); +- u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >> +- E1000_STRAP_SMT_FREQ_SHIFT; ++ u32 freq = FIELD_GET(E1000_STRAP_SMT_FREQ_MASK, strap); + s32 ret_val; + + strap &= E1000_STRAP_SMBUS_ADDRESS_MASK; +@@ -2562,8 +2573,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) + hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), + (u16)(mac_reg & 0xFFFF)); + hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), +- (u16)((mac_reg & E1000_RAH_AV) +- >> 16)); ++ FIELD_GET(E1000_RAH_AV, mac_reg)); + } + + e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); +@@ -3205,7 +3215,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) + &nvm_dword); + if (ret_val) + return ret_val; +- sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); ++ sig_byte = FIELD_GET(0xFF00, nvm_dword); + if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == + E1000_ICH_NVM_SIG_VALUE) { + *bank = 0; +@@ -3218,7 +3228,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) + &nvm_dword); + if (ret_val) + return ret_val; +- sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); ++ sig_byte = FIELD_GET(0xFF00, nvm_dword); + if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == + E1000_ICH_NVM_SIG_VALUE) { + *bank = 1; +diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c +index 5df7ad93f3d77..30515bfb259ea 100644 +--- a/drivers/net/ethernet/intel/e1000e/mac.c ++++ b/drivers/net/ethernet/intel/e1000e/mac.c +@@ -52,7 +52,7 @@ void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw) + * for the device regardless of function swap state. + */ + reg = er32(STATUS); +- bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; ++ bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg); + } + + /** +diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c +index f536c856727cb..3692fce201959 100644 +--- a/drivers/net/ethernet/intel/e1000e/netdev.c ++++ b/drivers/net/ethernet/intel/e1000e/netdev.c +@@ -1788,8 +1788,7 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data) + adapter->corr_errors += + pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; + adapter->uncorr_errors += +- (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >> +- E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT; ++ FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts); + + /* Do the reset outside of interrupt context */ + schedule_work(&adapter->reset_task); +@@ -1868,8 +1867,7 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data) + adapter->corr_errors += + pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; + adapter->uncorr_errors += +- (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >> +- E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT; ++ FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts); + + /* Do the reset outside of interrupt context */ + schedule_work(&adapter->reset_task); +@@ -5031,8 +5029,7 @@ static void e1000e_update_stats(struct e1000_adapter *adapter) + adapter->corr_errors += + pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; + adapter->uncorr_errors += +- (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >> +- E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT; ++ FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts); + } + } + +@@ -6249,7 +6246,7 @@ static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc) + phy_reg |= BM_RCTL_MPE; + phy_reg &= ~(BM_RCTL_MO_MASK); + if (mac_reg & E1000_RCTL_MO_3) +- phy_reg |= (((mac_reg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) ++ phy_reg |= (FIELD_GET(E1000_RCTL_MO_3, mac_reg) + << BM_RCTL_MO_SHIFT); + if (mac_reg & E1000_RCTL_BAM) + phy_reg |= BM_RCTL_BAM; +@@ -6626,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) + struct e1000_hw *hw = &adapter->hw; + u32 ctrl, ctrl_ext, rctl, status, wufc; + int retval = 0; ++ u16 smb_ctrl; + + /* Runtime suspend should only enable wakeup for link changes */ + if (runtime) +@@ -6691,14 +6689,31 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) + if (adapter->hw.phy.type == e1000_phy_igp_3) { + e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); + } else if (hw->mac.type >= e1000_pch_lpt) { +- if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) ++ if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) { + /* ULP does not support wake from unicast, multicast + * or broadcast. + */ + retval = e1000_enable_ulp_lpt_lp(hw, !runtime); ++ if (retval) ++ return retval; ++ } ++ ++ /* Force SMBUS to allow WOL */ ++ /* Switching PHY interface always returns MDI error ++ * so disable retry mechanism to avoid wasting time ++ */ ++ e1000e_disable_phy_retry(hw); ++ ++ e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl); ++ smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS; ++ e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl); + +- if (retval) +- return retval; ++ e1000e_enable_phy_retry(hw); ++ ++ /* Force SMBus mode in MAC */ ++ ctrl_ext = er32(CTRL_EXT); ++ ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS; ++ ew32(CTRL_EXT, ctrl_ext); + } + + /* Ensure that the appropriate bits are set in LPI_CTRL +diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c +index 08c3d477dd6f7..395746bcf8f7c 100644 +--- a/drivers/net/ethernet/intel/e1000e/phy.c ++++ b/drivers/net/ethernet/intel/e1000e/phy.c +@@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw) + return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0); + } + ++void e1000e_disable_phy_retry(struct e1000_hw *hw) ++{ ++ hw->phy.retry_enabled = false; ++} ++ ++void e1000e_enable_phy_retry(struct e1000_hw *hw) ++{ ++ hw->phy.retry_enabled = true; ++} ++ + /** + * e1000e_read_phy_reg_mdic - Read MDI control register + * @hw: pointer to the HW structure +@@ -118,57 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw) + **/ + s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) + { ++ u32 i, mdic = 0, retry_counter, retry_max; + struct e1000_phy_info *phy = &hw->phy; +- u32 i, mdic = 0; ++ bool success; + + if (offset > MAX_PHY_REG_ADDRESS) { + e_dbg("PHY Address %d is out of range\n", offset); + return -E1000_ERR_PARAM; + } + ++ retry_max = phy->retry_enabled ? phy->retry_count : 0; ++ + /* Set up Op-code, Phy Address, and register offset in the MDI + * Control register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ +- mdic = ((offset << E1000_MDIC_REG_SHIFT) | +- (phy->addr << E1000_MDIC_PHY_SHIFT) | +- (E1000_MDIC_OP_READ)); ++ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) { ++ success = true; + +- ew32(MDIC, mdic); ++ mdic = ((offset << E1000_MDIC_REG_SHIFT) | ++ (phy->addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_READ)); + +- /* Poll the ready bit to see if the MDI read completed +- * Increasing the time out as testing showed failures with +- * the lower time out +- */ +- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { +- udelay(50); +- mdic = er32(MDIC); +- if (mdic & E1000_MDIC_READY) +- break; +- } +- if (!(mdic & E1000_MDIC_READY)) { +- e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset); +- return -E1000_ERR_PHY; +- } +- if (mdic & E1000_MDIC_ERROR) { +- e_dbg("MDI Read PHY Reg Address %d Error\n", offset); +- return -E1000_ERR_PHY; +- } +- if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { +- e_dbg("MDI Read offset error - requested %d, returned %d\n", +- offset, +- (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); +- return -E1000_ERR_PHY; +- } +- *data = (u16)mdic; ++ ew32(MDIC, mdic); + +- /* Allow some time after each MDIC transaction to avoid +- * reading duplicate data in the next MDIC transaction. +- */ +- if (hw->mac.type == e1000_pch2lan) +- udelay(100); ++ /* Poll the ready bit to see if the MDI read completed ++ * Increasing the time out as testing showed failures with ++ * the lower time out ++ */ ++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { ++ usleep_range(50, 60); ++ mdic = er32(MDIC); ++ if (mdic & E1000_MDIC_READY) ++ break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ e_dbg("MDI Read PHY Reg Address %d did not complete\n", ++ offset); ++ success = false; ++ } ++ if (mdic & E1000_MDIC_ERROR) { ++ e_dbg("MDI Read PHY Reg Address %d Error\n", offset); ++ success = false; ++ } ++ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { ++ e_dbg("MDI Read offset error - requested %d, returned %d\n", ++ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); ++ success = false; ++ } + +- return 0; ++ /* Allow some time after each MDIC transaction to avoid ++ * reading duplicate data in the next MDIC transaction. ++ */ ++ if (hw->mac.type == e1000_pch2lan) ++ usleep_range(100, 150); ++ ++ if (success) { ++ *data = (u16)mdic; ++ return 0; ++ } ++ ++ if (retry_counter != retry_max) { ++ e_dbg("Perform retry on PHY transaction...\n"); ++ mdelay(10); ++ } ++ } ++ ++ return -E1000_ERR_PHY; + } + + /** +@@ -181,57 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) + **/ + s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) + { ++ u32 i, mdic = 0, retry_counter, retry_max; + struct e1000_phy_info *phy = &hw->phy; +- u32 i, mdic = 0; ++ bool success; + + if (offset > MAX_PHY_REG_ADDRESS) { + e_dbg("PHY Address %d is out of range\n", offset); + return -E1000_ERR_PARAM; + } + ++ retry_max = phy->retry_enabled ? phy->retry_count : 0; ++ + /* Set up Op-code, Phy Address, and register offset in the MDI + * Control register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ +- mdic = (((u32)data) | +- (offset << E1000_MDIC_REG_SHIFT) | +- (phy->addr << E1000_MDIC_PHY_SHIFT) | +- (E1000_MDIC_OP_WRITE)); ++ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) { ++ success = true; + +- ew32(MDIC, mdic); ++ mdic = (((u32)data) | ++ (offset << E1000_MDIC_REG_SHIFT) | ++ (phy->addr << E1000_MDIC_PHY_SHIFT) | ++ (E1000_MDIC_OP_WRITE)); + +- /* Poll the ready bit to see if the MDI read completed +- * Increasing the time out as testing showed failures with +- * the lower time out +- */ +- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { +- udelay(50); +- mdic = er32(MDIC); +- if (mdic & E1000_MDIC_READY) +- break; +- } +- if (!(mdic & E1000_MDIC_READY)) { +- e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset); +- return -E1000_ERR_PHY; +- } +- if (mdic & E1000_MDIC_ERROR) { +- e_dbg("MDI Write PHY Red Address %d Error\n", offset); +- return -E1000_ERR_PHY; +- } +- if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { +- e_dbg("MDI Write offset error - requested %d, returned %d\n", +- offset, +- (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); +- return -E1000_ERR_PHY; +- } ++ ew32(MDIC, mdic); + +- /* Allow some time after each MDIC transaction to avoid +- * reading duplicate data in the next MDIC transaction. +- */ +- if (hw->mac.type == e1000_pch2lan) +- udelay(100); ++ /* Poll the ready bit to see if the MDI read completed ++ * Increasing the time out as testing showed failures with ++ * the lower time out ++ */ ++ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { ++ usleep_range(50, 60); ++ mdic = er32(MDIC); ++ if (mdic & E1000_MDIC_READY) ++ break; ++ } ++ if (!(mdic & E1000_MDIC_READY)) { ++ e_dbg("MDI Write PHY Reg Address %d did not complete\n", ++ offset); ++ success = false; ++ } ++ if (mdic & E1000_MDIC_ERROR) { ++ e_dbg("MDI Write PHY Reg Address %d Error\n", offset); ++ success = false; ++ } ++ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { ++ e_dbg("MDI Write offset error - requested %d, returned %d\n", ++ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); ++ success = false; ++ } + +- return 0; ++ /* Allow some time after each MDIC transaction to avoid ++ * reading duplicate data in the next MDIC transaction. ++ */ ++ if (hw->mac.type == e1000_pch2lan) ++ usleep_range(100, 150); ++ ++ if (success) ++ return 0; ++ ++ if (retry_counter != retry_max) { ++ e_dbg("Perform retry on PHY transaction...\n"); ++ mdelay(10); ++ } ++ } ++ ++ return -E1000_ERR_PHY; + } + + /** +@@ -1793,8 +1834,7 @@ s32 e1000e_get_cable_length_m88(struct e1000_hw *hw) + if (ret_val) + return ret_val; + +- index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> +- M88E1000_PSSR_CABLE_LENGTH_SHIFT); ++ index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); + + if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) + return -E1000_ERR_PHY; +@@ -3234,8 +3274,7 @@ s32 e1000_get_cable_length_82577(struct e1000_hw *hw) + if (ret_val) + return ret_val; + +- length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >> +- I82577_DSTATUS_CABLE_LENGTH_SHIFT); ++ length = FIELD_GET(I82577_DSTATUS_CABLE_LENGTH, phy_data); + + if (length == E1000_CABLE_LENGTH_UNDEFINED) + return -E1000_ERR_PHY; +diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h +index c48777d095235..049bb325b4b14 100644 +--- a/drivers/net/ethernet/intel/e1000e/phy.h ++++ b/drivers/net/ethernet/intel/e1000e/phy.h +@@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data); + s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data); + void e1000_power_up_phy_copper(struct e1000_hw *hw); + void e1000_power_down_phy_copper(struct e1000_hw *hw); ++void e1000e_disable_phy_retry(struct e1000_hw *hw); ++void e1000e_enable_phy_retry(struct e1000_hw *hw); + s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); + s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); + s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data); +diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +index af1b0cde36703..aed5e0bf6313e 100644 +--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c ++++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2019 Intel Corporation. */ + ++#include <linux/bitfield.h> + #include "fm10k_pf.h" + #include "fm10k_vf.h" + +@@ -1575,8 +1576,7 @@ static s32 fm10k_get_fault_pf(struct fm10k_hw *hw, int type, + if (func & FM10K_FAULT_FUNC_PF) + fault->func = 0; + else +- fault->func = 1 + ((func & FM10K_FAULT_FUNC_VF_MASK) >> +- FM10K_FAULT_FUNC_VF_SHIFT); ++ fault->func = 1 + FIELD_GET(FM10K_FAULT_FUNC_VF_MASK, func); + + /* record fault type */ + fault->type = func & FM10K_FAULT_FUNC_TYPE_MASK; +diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +index dc8ccd378ec92..7fb1961f29210 100644 +--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c ++++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2019 Intel Corporation. */ + ++#include <linux/bitfield.h> + #include "fm10k_vf.h" + + /** +@@ -126,15 +127,14 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw) + hw->mac.max_queues = i; + + /* fetch default VLAN and ITR scale */ +- hw->mac.default_vid = (fm10k_read_reg(hw, FM10K_TXQCTL(0)) & +- FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT; ++ hw->mac.default_vid = FIELD_GET(FM10K_TXQCTL_VID_MASK, ++ fm10k_read_reg(hw, FM10K_TXQCTL(0))); + /* Read the ITR scale from TDLEN. See the definition of + * FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is + * used here. + */ +- hw->mac.itr_scale = (fm10k_read_reg(hw, FM10K_TDLEN(0)) & +- FM10K_TDLEN_ITR_SCALE_MASK) >> +- FM10K_TDLEN_ITR_SCALE_SHIFT; ++ hw->mac.itr_scale = FIELD_GET(FM10K_TDLEN_ITR_SCALE_MASK, ++ fm10k_read_reg(hw, FM10K_TDLEN(0))); + + return 0; + +diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h +index 55bb0b5310d5b..3e6839ac1f0f1 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e.h ++++ b/drivers/net/ethernet/intel/i40e/i40e.h +@@ -4,47 +4,20 @@ + #ifndef _I40E_H_ + #define _I40E_H_ + +-#include <net/tcp.h> +-#include <net/udp.h> +-#include <linux/types.h> +-#include <linux/errno.h> +-#include <linux/module.h> +-#include <linux/pci.h> +-#include <linux/netdevice.h> +-#include <linux/ioport.h> +-#include <linux/iommu.h> +-#include <linux/slab.h> +-#include <linux/list.h> +-#include <linux/hashtable.h> +-#include <linux/string.h> +-#include <linux/in.h> +-#include <linux/ip.h> +-#include <linux/sctp.h> +-#include <linux/pkt_sched.h> +-#include <linux/ipv6.h> +-#include <net/checksum.h> +-#include <net/ip6_checksum.h> + #include <linux/ethtool.h> +-#include <linux/if_vlan.h> +-#include <linux/if_macvlan.h> +-#include <linux/if_bridge.h> +-#include <linux/clocksource.h> +-#include <linux/net_tstamp.h> ++#include <linux/pci.h> + #include <linux/ptp_clock_kernel.h> ++#include <linux/types.h> ++#include <linux/avf/virtchnl.h> ++#include <linux/net/intel/i40e_client.h> + #include <net/pkt_cls.h> +-#include <net/pkt_sched.h> +-#include <net/tc_act/tc_gact.h> +-#include <net/tc_act/tc_mirred.h> + #include <net/udp_tunnel.h> +-#include <net/xdp_sock.h> +-#include <linux/bitfield.h> +-#include "i40e_type.h" ++#include "i40e_dcb.h" ++#include "i40e_debug.h" ++#include "i40e_io.h" + #include "i40e_prototype.h" +-#include <linux/net/intel/i40e_client.h> +-#include <linux/avf/virtchnl.h> +-#include "i40e_virtchnl_pf.h" ++#include "i40e_register.h" + #include "i40e_txrx.h" +-#include "i40e_dcb.h" + + /* Useful i40e defaults */ + #define I40E_MAX_VEB 16 +@@ -108,7 +81,7 @@ + #define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */ + + /* driver state flags */ +-enum i40e_state_t { ++enum i40e_state { + __I40E_TESTING, + __I40E_CONFIG_BUSY, + __I40E_CONFIG_DONE, +@@ -156,7 +129,7 @@ enum i40e_state_t { + BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED) + + /* VSI state flags */ +-enum i40e_vsi_state_t { ++enum i40e_vsi_state { + __I40E_VSI_DOWN, + __I40E_VSI_NEEDS_RESTART, + __I40E_VSI_SYNCING_FILTERS, +@@ -992,6 +965,7 @@ struct i40e_q_vector { + struct rcu_head rcu; /* to avoid race with update stats on free */ + char name[I40E_INT_NAME_STR_LEN]; + bool arm_wb_state; ++ bool in_busy_poll; + int irq_num; /* IRQ assigned to this q_vector */ + } ____cacheline_internodealigned_in_smp; + +@@ -1321,4 +1295,15 @@ static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf) + return pf->flags & I40E_FLAG_TC_MQPRIO; + } + ++/** ++ * i40e_hw_to_pf - get pf pointer from the hardware structure ++ * @hw: pointer to the device HW structure ++ **/ ++static inline struct i40e_pf *i40e_hw_to_pf(struct i40e_hw *hw) ++{ ++ return container_of(hw, struct i40e_pf, hw); ++} ++ ++struct device *i40e_hw_to_dev(struct i40e_hw *hw); ++ + #endif /* _I40E_H_ */ +diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c +index 100eb77b8dfe6..9ce6e633cc2f0 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c +@@ -1,9 +1,9 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + +-#include "i40e_type.h" ++#include <linux/delay.h> ++#include "i40e_alloc.h" + #include "i40e_register.h" +-#include "i40e_adminq.h" + #include "i40e_prototype.h" + + static void i40e_resume_aq(struct i40e_hw *hw); +@@ -51,7 +51,6 @@ static int i40e_alloc_adminq_asq_ring(struct i40e_hw *hw) + int ret_code; + + ret_code = i40e_allocate_dma_mem(hw, &hw->aq.asq.desc_buf, +- i40e_mem_atq_ring, + (hw->aq.num_asq_entries * + sizeof(struct i40e_aq_desc)), + I40E_ADMINQ_DESC_ALIGNMENT); +@@ -78,7 +77,6 @@ static int i40e_alloc_adminq_arq_ring(struct i40e_hw *hw) + int ret_code; + + ret_code = i40e_allocate_dma_mem(hw, &hw->aq.arq.desc_buf, +- i40e_mem_arq_ring, + (hw->aq.num_arq_entries * + sizeof(struct i40e_aq_desc)), + I40E_ADMINQ_DESC_ALIGNMENT); +@@ -136,7 +134,6 @@ static int i40e_alloc_arq_bufs(struct i40e_hw *hw) + for (i = 0; i < hw->aq.num_arq_entries; i++) { + bi = &hw->aq.arq.r.arq_bi[i]; + ret_code = i40e_allocate_dma_mem(hw, bi, +- i40e_mem_arq_buf, + hw->aq.arq_buf_size, + I40E_ADMINQ_DESC_ALIGNMENT); + if (ret_code) +@@ -198,7 +195,6 @@ static int i40e_alloc_asq_bufs(struct i40e_hw *hw) + for (i = 0; i < hw->aq.num_asq_entries; i++) { + bi = &hw->aq.asq.r.asq_bi[i]; + ret_code = i40e_allocate_dma_mem(hw, bi, +- i40e_mem_asq_buf, + hw->aq.asq_buf_size, + I40E_ADMINQ_DESC_ALIGNMENT); + if (ret_code) +diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h +index 267f2e0a21ce8..80125bea80a2a 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h +@@ -4,7 +4,8 @@ + #ifndef _I40E_ADMINQ_H_ + #define _I40E_ADMINQ_H_ + +-#include "i40e_osdep.h" ++#include <linux/mutex.h> ++#include "i40e_alloc.h" + #include "i40e_adminq_cmd.h" + + #define I40E_ADMINQ_DESC(R, i) \ +diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +index 3357d65a906bf..18a1c3b6d72c5 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +@@ -4,6 +4,8 @@ + #ifndef _I40E_ADMINQ_CMD_H_ + #define _I40E_ADMINQ_CMD_H_ + ++#include <linux/bits.h> ++ + /* This header file defines the i40e Admin Queue commands and is shared between + * i40e Firmware and Software. + * +diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h +index a6c9a9e343d11..e0dde326255d6 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_alloc.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h +@@ -4,25 +4,25 @@ + #ifndef _I40E_ALLOC_H_ + #define _I40E_ALLOC_H_ + ++#include <linux/types.h> ++ + struct i40e_hw; + +-/* Memory allocation types */ +-enum i40e_memory_type { +- i40e_mem_arq_buf = 0, /* ARQ indirect command buffer */ +- i40e_mem_asq_buf = 1, +- i40e_mem_atq_buf = 2, /* ATQ indirect command buffer */ +- i40e_mem_arq_ring = 3, /* ARQ descriptor ring */ +- i40e_mem_atq_ring = 4, /* ATQ descriptor ring */ +- i40e_mem_pd = 5, /* Page Descriptor */ +- i40e_mem_bp = 6, /* Backing Page - 4KB */ +- i40e_mem_bp_jumbo = 7, /* Backing Page - > 4KB */ +- i40e_mem_reserved ++/* memory allocation tracking */ ++struct i40e_dma_mem { ++ void *va; ++ dma_addr_t pa; ++ u32 size; ++}; ++ ++struct i40e_virt_mem { ++ void *va; ++ u32 size; + }; + + /* prototype for functions used for dynamic memory allocation */ + int i40e_allocate_dma_mem(struct i40e_hw *hw, + struct i40e_dma_mem *mem, +- enum i40e_memory_type type, + u64 size, u32 alignment); + int i40e_free_dma_mem(struct i40e_hw *hw, + struct i40e_dma_mem *mem); +diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c +index 639c5a1ca853b..306758428aefd 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_client.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_client.c +@@ -6,7 +6,6 @@ + #include <linux/net/intel/i40e_client.h> + + #include "i40e.h" +-#include "i40e_prototype.h" + + static LIST_HEAD(i40e_devices); + static DEFINE_MUTEX(i40e_device_mutex); +diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c +index 1b493854f5229..4d7caa1199719 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_common.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_common.c +@@ -1,11 +1,15 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2021 Intel Corporation. */ + +-#include "i40e.h" +-#include "i40e_type.h" +-#include "i40e_adminq.h" +-#include "i40e_prototype.h" + #include <linux/avf/virtchnl.h> ++#include <linux/bitfield.h> ++#include <linux/delay.h> ++#include <linux/etherdevice.h> ++#include <linux/pci.h> ++#include "i40e_adminq_cmd.h" ++#include "i40e_devids.h" ++#include "i40e_prototype.h" ++#include "i40e_register.h" + + /** + * i40e_set_mac_type - Sets MAC type +diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c +index f81e744c0fb36..d57dd30b024fa 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c +@@ -1,9 +1,11 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2021 Intel Corporation. */ + ++#include <linux/bitfield.h> + #include "i40e_adminq.h" +-#include "i40e_prototype.h" ++#include "i40e_alloc.h" + #include "i40e_dcb.h" ++#include "i40e_prototype.h" + + /** + * i40e_get_dcbx_status +diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c +index 195421d863ab1..077a95dad32cf 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c +@@ -2,8 +2,8 @@ + /* Copyright(c) 2013 - 2021 Intel Corporation. */ + + #ifdef CONFIG_I40E_DCB +-#include "i40e.h" + #include <net/dcbnl.h> ++#include "i40e.h" + + #define I40E_DCBNL_STATUS_SUCCESS 0 + #define I40E_DCBNL_STATUS_ERROR 1 +diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c +index 0e72abd178ae3..21b3518c40968 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c +@@ -1,9 +1,9 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + ++#include <linux/firmware.h> + #include "i40e.h" + +-#include <linux/firmware.h> + + /** + * i40e_ddp_profiles_eq - checks if DDP profiles are the equivalent +diff --git a/drivers/net/ethernet/intel/i40e/i40e_debug.h b/drivers/net/ethernet/intel/i40e/i40e_debug.h +new file mode 100644 +index 0000000000000..27ebc72d8bfe5 +--- /dev/null ++++ b/drivers/net/ethernet/intel/i40e/i40e_debug.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* Copyright(c) 2023 Intel Corporation. */ ++ ++#ifndef _I40E_DEBUG_H_ ++#define _I40E_DEBUG_H_ ++ ++#include <linux/dev_printk.h> ++ ++/* debug masks - set these bits in hw->debug_mask to control output */ ++enum i40e_debug_mask { ++ I40E_DEBUG_INIT = 0x00000001, ++ I40E_DEBUG_RELEASE = 0x00000002, ++ ++ I40E_DEBUG_LINK = 0x00000010, ++ I40E_DEBUG_PHY = 0x00000020, ++ I40E_DEBUG_HMC = 0x00000040, ++ I40E_DEBUG_NVM = 0x00000080, ++ I40E_DEBUG_LAN = 0x00000100, ++ I40E_DEBUG_FLOW = 0x00000200, ++ I40E_DEBUG_DCB = 0x00000400, ++ I40E_DEBUG_DIAG = 0x00000800, ++ I40E_DEBUG_FD = 0x00001000, ++ I40E_DEBUG_PACKAGE = 0x00002000, ++ I40E_DEBUG_IWARP = 0x00F00000, ++ I40E_DEBUG_AQ_MESSAGE = 0x01000000, ++ I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000, ++ I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000, ++ I40E_DEBUG_AQ_COMMAND = 0x06000000, ++ I40E_DEBUG_AQ = 0x0F000000, ++ ++ I40E_DEBUG_USER = 0xF0000000, ++ ++ I40E_DEBUG_ALL = 0xFFFFFFFF ++}; ++ ++struct i40e_hw; ++struct device *i40e_hw_to_dev(struct i40e_hw *hw); ++ ++#define hw_dbg(hw, S, A...) dev_dbg(i40e_hw_to_dev(hw), S, ##A) ++ ++#define i40e_debug(h, m, s, ...) \ ++do { \ ++ if (((m) & (h)->debug_mask)) \ ++ dev_info(i40e_hw_to_dev(hw), s, ##__VA_ARGS__); \ ++} while (0) ++ ++#endif /* _I40E_DEBUG_H_ */ +diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +index 1a497cb077100..999c9708def53 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +@@ -5,8 +5,9 @@ + + #include <linux/fs.h> + #include <linux/debugfs.h> +- ++#include <linux/if_bridge.h> + #include "i40e.h" ++#include "i40e_virtchnl_pf.h" + + static struct dentry *i40e_dbg_root; + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h +index c3ce5f35211f0..ece3a6b9a5c61 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h +@@ -4,7 +4,10 @@ + #ifndef _I40E_DIAG_H_ + #define _I40E_DIAG_H_ + +-#include "i40e_type.h" ++#include "i40e_adminq_cmd.h" ++ ++/* forward-declare the HW struct for the compiler */ ++struct i40e_hw; + + enum i40e_lb_mode { + I40E_LB_MODE_NONE = 0x0, +diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +index bd1321bf7e268..4e90570ba7803 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +@@ -3,9 +3,10 @@ + + /* ethtool support for i40e */ + +-#include "i40e.h" ++#include "i40e_devids.h" + #include "i40e_diag.h" + #include "i40e_txrx_common.h" ++#include "i40e_virtchnl_pf.h" + + /* ethtool statistics helpers */ + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c +index 96ee63aca7a10..1742624ca62ed 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c +@@ -1,10 +1,8 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + +-#include "i40e.h" +-#include "i40e_osdep.h" +-#include "i40e_register.h" + #include "i40e_alloc.h" ++#include "i40e_debug.h" + #include "i40e_hmc.h" + #include "i40e_type.h" + +@@ -22,7 +20,6 @@ int i40e_add_sd_table_entry(struct i40e_hw *hw, + enum i40e_sd_entry_type type, + u64 direct_mode_sz) + { +- enum i40e_memory_type mem_type __attribute__((unused)); + struct i40e_hmc_sd_entry *sd_entry; + bool dma_mem_alloc_done = false; + struct i40e_dma_mem mem; +@@ -43,16 +40,13 @@ int i40e_add_sd_table_entry(struct i40e_hw *hw, + + sd_entry = &hmc_info->sd_table.sd_entry[sd_index]; + if (!sd_entry->valid) { +- if (I40E_SD_TYPE_PAGED == type) { +- mem_type = i40e_mem_pd; ++ if (type == I40E_SD_TYPE_PAGED) + alloc_len = I40E_HMC_PAGED_BP_SIZE; +- } else { +- mem_type = i40e_mem_bp_jumbo; ++ else + alloc_len = direct_mode_sz; +- } + + /* allocate a 4K pd page or 2M backing page */ +- ret_code = i40e_allocate_dma_mem(hw, &mem, mem_type, alloc_len, ++ ret_code = i40e_allocate_dma_mem(hw, &mem, alloc_len, + I40E_HMC_PD_BP_BUF_ALIGNMENT); + if (ret_code) + goto exit; +@@ -140,7 +134,7 @@ int i40e_add_pd_table_entry(struct i40e_hw *hw, + page = rsrc_pg; + } else { + /* allocate a 4K backing page */ +- ret_code = i40e_allocate_dma_mem(hw, page, i40e_mem_bp, ++ ret_code = i40e_allocate_dma_mem(hw, page, + I40E_HMC_PAGED_BP_SIZE, + I40E_HMC_PD_BP_BUF_ALIGNMENT); + if (ret_code) +diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h +index 9960da07a5732..480e3a883cc7a 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h +@@ -4,6 +4,10 @@ + #ifndef _I40E_HMC_H_ + #define _I40E_HMC_H_ + ++#include "i40e_alloc.h" ++#include "i40e_io.h" ++#include "i40e_register.h" ++ + #define I40E_HMC_MAX_BP_COUNT 512 + + /* forward-declare the HW struct for the compiler */ +diff --git a/drivers/net/ethernet/intel/i40e/i40e_io.h b/drivers/net/ethernet/intel/i40e/i40e_io.h +new file mode 100644 +index 0000000000000..2a2ed9a1d476b +--- /dev/null ++++ b/drivers/net/ethernet/intel/i40e/i40e_io.h +@@ -0,0 +1,16 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* Copyright(c) 2023 Intel Corporation. */ ++ ++#ifndef _I40E_IO_H_ ++#define _I40E_IO_H_ ++ ++/* get readq/writeq support for 32 bit kernels, use the low-first version */ ++#include <linux/io-64-nonatomic-lo-hi.h> ++ ++#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) ++#define rd32(a, reg) readl((a)->hw_addr + (reg)) ++ ++#define rd64(a, reg) readq((a)->hw_addr + (reg)) ++#define i40e_flush(a) readl((a)->hw_addr + I40E_GLGEN_STAT) ++ ++#endif /* _I40E_IO_H_ */ +diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c +index 474365bf06480..beaaf5c309d51 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c +@@ -1,13 +1,10 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + +-#include "i40e.h" +-#include "i40e_osdep.h" +-#include "i40e_register.h" +-#include "i40e_type.h" +-#include "i40e_hmc.h" ++#include "i40e_alloc.h" ++#include "i40e_debug.h" + #include "i40e_lan_hmc.h" +-#include "i40e_prototype.h" ++#include "i40e_type.h" + + /* lan specific interface functions */ + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h +index 9f960404c2b37..305a276953b01 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h +@@ -4,6 +4,8 @@ + #ifndef _I40E_LAN_HMC_H_ + #define _I40E_LAN_HMC_H_ + ++#include "i40e_hmc.h" ++ + /* forward-declare the HW struct for the compiler */ + struct i40e_hw; + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c +index ae32e83a69902..a21fc92aa2725 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -1,19 +1,22 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2021 Intel Corporation. */ + +-#include <linux/etherdevice.h> +-#include <linux/of_net.h> +-#include <linux/pci.h> +-#include <linux/bpf.h> + #include <generated/utsrelease.h> + #include <linux/crash_dump.h> ++#include <linux/if_bridge.h> ++#include <linux/if_macvlan.h> ++#include <linux/module.h> ++#include <net/pkt_cls.h> ++#include <net/xdp_sock_drv.h> + + /* Local includes */ + #include "i40e.h" ++#include "i40e_devids.h" + #include "i40e_diag.h" ++#include "i40e_lan_hmc.h" ++#include "i40e_virtchnl_pf.h" + #include "i40e_xsk.h" +-#include <net/udp_tunnel.h> +-#include <net/xdp_sock_drv.h> ++ + /* All i40e tracepoints are defined by the include below, which + * must be included exactly once across the whole kernel with + * CREATE_TRACE_POINTS defined +@@ -126,16 +129,27 @@ static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f, + } + + /** +- * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code ++ * i40e_hw_to_dev - get device pointer from the hardware structure ++ * @hw: pointer to the device HW structure ++ **/ ++struct device *i40e_hw_to_dev(struct i40e_hw *hw) ++{ ++ struct i40e_pf *pf = i40e_hw_to_pf(hw); ++ ++ return &pf->pdev->dev; ++} ++ ++/** ++ * i40e_allocate_dma_mem - OS specific memory alloc for shared code + * @hw: pointer to the HW structure + * @mem: ptr to mem struct to fill out + * @size: size of memory requested + * @alignment: what to align the allocation to + **/ +-int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem, +- u64 size, u32 alignment) ++int i40e_allocate_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem, ++ u64 size, u32 alignment) + { +- struct i40e_pf *pf = (struct i40e_pf *)hw->back; ++ struct i40e_pf *pf = i40e_hw_to_pf(hw); + + mem->size = ALIGN(size, alignment); + mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa, +@@ -147,13 +161,13 @@ int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem, + } + + /** +- * i40e_free_dma_mem_d - OS specific memory free for shared code ++ * i40e_free_dma_mem - OS specific memory free for shared code + * @hw: pointer to the HW structure + * @mem: ptr to mem struct to free + **/ +-int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem) ++int i40e_free_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem) + { +- struct i40e_pf *pf = (struct i40e_pf *)hw->back; ++ struct i40e_pf *pf = i40e_hw_to_pf(hw); + + dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa); + mem->va = NULL; +@@ -164,13 +178,13 @@ int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem) + } + + /** +- * i40e_allocate_virt_mem_d - OS specific memory alloc for shared code ++ * i40e_allocate_virt_mem - OS specific memory alloc for shared code + * @hw: pointer to the HW structure + * @mem: ptr to mem struct to fill out + * @size: size of memory requested + **/ +-int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem, +- u32 size) ++int i40e_allocate_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem, ++ u32 size) + { + mem->size = size; + mem->va = kzalloc(size, GFP_KERNEL); +@@ -182,11 +196,11 @@ int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem, + } + + /** +- * i40e_free_virt_mem_d - OS specific memory free for shared code ++ * i40e_free_virt_mem - OS specific memory free for shared code + * @hw: pointer to the HW structure + * @mem: ptr to mem struct to free + **/ +-int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem) ++int i40e_free_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem) + { + /* it's ok to kfree a NULL pointer */ + kfree(mem->va); +@@ -1249,8 +1263,11 @@ int i40e_count_filters(struct i40e_vsi *vsi) + int bkt; + int cnt = 0; + +- hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) +- ++cnt; ++ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { ++ if (f->state == I40E_FILTER_NEW || ++ f->state == I40E_FILTER_ACTIVE) ++ ++cnt; ++ } + + return cnt; + } +@@ -3905,6 +3922,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) + q_vector->tx.target_itr >> 1); + q_vector->tx.current_itr = q_vector->tx.target_itr; + ++ /* Set ITR for software interrupts triggered after exiting ++ * busy-loop polling. ++ */ ++ wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1), ++ I40E_ITR_20K); ++ + wr32(hw, I40E_PFINT_RATEN(vector - 1), + i40e_intrl_usec_to_reg(vsi->int_rate_limit)); + +@@ -15644,10 +15667,10 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw) + **/ + static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw) + { +- struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev; ++ struct i40e_pf *pf = i40e_hw_to_pf(hw); + +- hw->subsystem_device_id = pdev->subsystem_device ? +- pdev->subsystem_device : ++ hw->subsystem_device_id = pf->pdev->subsystem_device ? ++ pf->pdev->subsystem_device : + (ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX); + } + +@@ -15717,7 +15740,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + set_bit(__I40E_DOWN, pf->state); + + hw = &pf->hw; +- hw->back = pf; + + pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0), + I40E_MAX_CSR_SPACE); +diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c +index 07a46adeab38e..e5aec09d58e27 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c +@@ -1,6 +1,9 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + ++#include <linux/bitfield.h> ++#include <linux/delay.h> ++#include "i40e_alloc.h" + #include "i40e_prototype.h" + + /** +diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h +deleted file mode 100644 +index 2bd4de03dafa2..0000000000000 +--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h ++++ /dev/null +@@ -1,59 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* Copyright(c) 2013 - 2018 Intel Corporation. */ +- +-#ifndef _I40E_OSDEP_H_ +-#define _I40E_OSDEP_H_ +- +-#include <linux/types.h> +-#include <linux/if_ether.h> +-#include <linux/if_vlan.h> +-#include <linux/tcp.h> +-#include <linux/pci.h> +-#include <linux/highuid.h> +- +-/* get readq/writeq support for 32 bit kernels, use the low-first version */ +-#include <linux/io-64-nonatomic-lo-hi.h> +- +-/* File to be the magic between shared code and +- * actual OS primitives +- */ +- +-#define hw_dbg(hw, S, A...) \ +-do { \ +- dev_dbg(&((struct i40e_pf *)hw->back)->pdev->dev, S, ##A); \ +-} while (0) +- +-#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) +-#define rd32(a, reg) readl((a)->hw_addr + (reg)) +- +-#define rd64(a, reg) readq((a)->hw_addr + (reg)) +-#define i40e_flush(a) readl((a)->hw_addr + I40E_GLGEN_STAT) +- +-/* memory allocation tracking */ +-struct i40e_dma_mem { +- void *va; +- dma_addr_t pa; +- u32 size; +-}; +- +-#define i40e_allocate_dma_mem(h, m, unused, s, a) \ +- i40e_allocate_dma_mem_d(h, m, s, a) +-#define i40e_free_dma_mem(h, m) i40e_free_dma_mem_d(h, m) +- +-struct i40e_virt_mem { +- void *va; +- u32 size; +-}; +- +-#define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s) +-#define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m) +- +-#define i40e_debug(h, m, s, ...) \ +-do { \ +- if (((m) & (h)->debug_mask)) \ +- pr_info("i40e %02x:%02x.%x " s, \ +- (h)->bus.bus_id, (h)->bus.device, \ +- (h)->bus.func, ##__VA_ARGS__); \ +-} while (0) +- +-#endif /* _I40E_OSDEP_H_ */ +diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h +index 3eeee224f1fb2..2001fefa0c52d 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h +@@ -4,9 +4,9 @@ + #ifndef _I40E_PROTOTYPE_H_ + #define _I40E_PROTOTYPE_H_ + +-#include "i40e_type.h" +-#include "i40e_alloc.h" + #include <linux/avf/virtchnl.h> ++#include "i40e_debug.h" ++#include "i40e_type.h" + + /* Prototypes for shared code functions that are not in + * the standard function pointer structures. These are +diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c +index 8a26811140b47..65c714d0bfffd 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c +@@ -1,9 +1,10 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + +-#include "i40e.h" + #include <linux/ptp_classify.h> + #include <linux/posix-clock.h> ++#include "i40e.h" ++#include "i40e_devids.h" + + /* The XL710 timesync is very much like Intel's 82599 design when it comes to + * the fundamental clock design. However, the clock operations are much simpler +@@ -34,7 +35,7 @@ enum i40e_ptp_pin { + GPIO_4 + }; + +-enum i40e_can_set_pins_t { ++enum i40e_can_set_pins { + CANT_DO_PINS = -1, + CAN_SET_PINS, + CAN_DO_PINS +@@ -192,7 +193,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw) + * return CAN_DO_PINS if pins can be manipulated within a NIC or + * return CANT_DO_PINS otherwise. + **/ +-static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf) ++static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf) + { + if (!i40e_is_ptp_pin_dev(&pf->hw)) { + dev_warn(&pf->pdev->dev, +@@ -1070,7 +1071,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf) + static int i40e_ptp_set_pins(struct i40e_pf *pf, + struct i40e_ptp_pins_settings *pins) + { +- enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf); ++ enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf); + int i = 0; + + if (pin_caps == CANT_DO_PINS) +diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h +index 7339003aa17cd..989c186824733 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_register.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_register.h +@@ -202,7 +202,9 @@ + #define I40E_GLGEN_MSCA_DEVADD_SHIFT 16 + #define I40E_GLGEN_MSCA_PHYADD_SHIFT 21 + #define I40E_GLGEN_MSCA_OPCODE_SHIFT 26 ++#define I40E_GLGEN_MSCA_OPCODE_MASK(_i) I40E_MASK(_i, I40E_GLGEN_MSCA_OPCODE_SHIFT) + #define I40E_GLGEN_MSCA_STCODE_SHIFT 28 ++#define I40E_GLGEN_MSCA_STCODE_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_STCODE_SHIFT) + #define I40E_GLGEN_MSCA_MDICMD_SHIFT 30 + #define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT) + #define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31 +@@ -328,8 +330,11 @@ + #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3 + #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) + #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5 ++#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT) + #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24 + #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT) ++#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25 ++#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT) + #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */ + #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0 + #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT) +diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c +index 1df2f93388128..c962987d8b51b 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c +@@ -1,14 +1,13 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + +-#include <linux/prefetch.h> + #include <linux/bpf_trace.h> ++#include <linux/prefetch.h> ++#include <linux/sctp.h> + #include <net/mpls.h> + #include <net/xdp.h> +-#include "i40e.h" +-#include "i40e_trace.h" +-#include "i40e_prototype.h" + #include "i40e_txrx_common.h" ++#include "i40e_trace.h" + #include "i40e_xsk.h" + + #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) +@@ -2644,7 +2643,22 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, + return failure ? budget : (int)total_rx_packets; + } + +-static inline u32 i40e_buildreg_itr(const int type, u16 itr) ++/** ++ * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register ++ * @itr_idx: interrupt throttling index ++ * @interval: interrupt throttling interval value in usecs ++ * @force_swint: force software interrupt ++ * ++ * The function builds a value for I40E_PFINT_DYN_CTLN register that ++ * is used to update interrupt throttling interval for specified ITR index ++ * and optionally enforces a software interrupt. If the @itr_idx is equal ++ * to I40E_ITR_NONE then no interval change is applied and only @force_swint ++ * parameter is taken into account. If the interval change and enforced ++ * software interrupt are not requested then the built value just enables ++ * appropriate vector interrupt. ++ **/ ++static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval, ++ bool force_swint) + { + u32 val; + +@@ -2658,23 +2672,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * +- * The itr value is reported in microseconds, and the register +- * value is recorded in 2 microsecond units. For this reason we +- * only need to shift by the interval shift - 1 instead of the +- * full value. ++ * We have to shift the given value as it is reported in microseconds ++ * and the register value is recorded in 2 microsecond units. + */ +- itr &= I40E_ITR_MASK; ++ interval >>= 1; + ++ /* 1. Enable vector interrupt ++ * 2. Update the interval for the specified ITR index ++ * (I40E_ITR_NONE in the register is used to indicate that ++ * no interval update is requested) ++ */ + val = I40E_PFINT_DYN_CTLN_INTENA_MASK | +- (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | +- (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); ++ FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) | ++ FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval); ++ ++ /* 3. Enforce software interrupt trigger if requested ++ * (These software interrupts rate is limited by ITR2 that is ++ * set to 20K interrupts per second) ++ */ ++ if (force_swint) ++ val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | ++ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK | ++ FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK, ++ I40E_SW_ITR); + + return val; + } + +-/* a small macro to shorten up some long lines */ +-#define INTREG I40E_PFINT_DYN_CTLN +- + /* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are +@@ -2693,8 +2717,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr) + static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) + { ++ enum i40e_dyn_idx itr_idx = I40E_ITR_NONE; + struct i40e_hw *hw = &vsi->back->hw; +- u32 intval; ++ u16 interval = 0; ++ u32 itr_val; + + /* If we don't have MSIX, then we only need to re-enable icr0 */ + if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { +@@ -2716,8 +2742,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, + */ + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ +- intval = i40e_buildreg_itr(I40E_RX_ITR, +- q_vector->rx.target_itr); ++ itr_idx = I40E_RX_ITR; ++ interval = q_vector->rx.target_itr; + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || +@@ -2726,25 +2752,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority + */ +- intval = i40e_buildreg_itr(I40E_TX_ITR, +- q_vector->tx.target_itr); ++ itr_idx = I40E_TX_ITR; ++ interval = q_vector->tx.target_itr; + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ +- intval = i40e_buildreg_itr(I40E_RX_ITR, +- q_vector->rx.target_itr); ++ itr_idx = I40E_RX_ITR; ++ interval = q_vector->rx.target_itr; + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ +- intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; + } + +- if (!test_bit(__I40E_VSI_DOWN, vsi->state)) +- wr32(hw, INTREG(q_vector->reg_idx), intval); ++ /* Do not update interrupt control register if VSI is down */ ++ if (test_bit(__I40E_VSI_DOWN, vsi->state)) ++ return; ++ ++ /* Update ITR interval if necessary and enforce software interrupt ++ * if we are exiting busy poll. ++ */ ++ if (q_vector->in_busy_poll) { ++ itr_val = i40e_buildreg_itr(itr_idx, interval, true); ++ q_vector->in_busy_poll = false; ++ } else { ++ itr_val = i40e_buildreg_itr(itr_idx, interval, false); ++ } ++ wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val); + } + + /** +@@ -2859,6 +2896,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) + */ + if (likely(napi_complete_done(napi, work_done))) + i40e_update_enable_itr(vsi, q_vector); ++ else ++ q_vector->in_busy_poll = true; + + return min(work_done, budget - 1); + } +diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h +index 900b0d9ede9f5..2b1d50873a4d1 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h +@@ -5,6 +5,7 @@ + #define _I40E_TXRX_H_ + + #include <net/xdp.h> ++#include "i40e_type.h" + + /* Interrupt Throttling and Rate Limiting Goodies */ + #define I40E_DEFAULT_IRQ_WORK 256 +@@ -57,7 +58,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl) + * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any + * register but instead is a special value meaning "don't update" ITR0/1/2. + */ +-enum i40e_dyn_idx_t { ++enum i40e_dyn_idx { + I40E_IDX_ITR0 = 0, + I40E_IDX_ITR1 = 1, + I40E_IDX_ITR2 = 2, +@@ -67,6 +68,7 @@ enum i40e_dyn_idx_t { + /* these are indexes into ITRN registers */ + #define I40E_RX_ITR I40E_IDX_ITR0 + #define I40E_TX_ITR I40E_IDX_ITR1 ++#define I40E_SW_ITR I40E_IDX_ITR2 + + /* Supported RSS offloads */ + #define I40E_DEFAULT_RSS_HENA ( \ +@@ -305,7 +307,7 @@ struct i40e_rx_queue_stats { + u64 page_busy_count; + }; + +-enum i40e_ring_state_t { ++enum i40e_ring_state { + __I40E_TX_FDIR_INIT_DONE, + __I40E_TX_XPS_INIT_DONE, + __I40E_RING_STATE_NBITS /* must be last */ +diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h +index 8c5118c8baafb..e26807fd21232 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h +@@ -4,6 +4,8 @@ + #ifndef I40E_TXRX_COMMON_ + #define I40E_TXRX_COMMON_ + ++#include "i40e.h" ++ + int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring); + void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw, + u64 qword1); +diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h +index 232131bedc3e7..4092f82bcfb12 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_type.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_type.h +@@ -4,12 +4,9 @@ + #ifndef _I40E_TYPE_H_ + #define _I40E_TYPE_H_ + +-#include "i40e_osdep.h" +-#include "i40e_register.h" ++#include <uapi/linux/if_ether.h> + #include "i40e_adminq.h" + #include "i40e_hmc.h" +-#include "i40e_lan_hmc.h" +-#include "i40e_devids.h" + + /* I40E_MASK is a macro used on 32 bit registers */ + #define I40E_MASK(mask, shift) ((u32)(mask) << (shift)) +@@ -43,48 +40,14 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *); + #define I40E_QTX_CTL_VM_QUEUE 0x1 + #define I40E_QTX_CTL_PF_QUEUE 0x2 + +-/* debug masks - set these bits in hw->debug_mask to control output */ +-enum i40e_debug_mask { +- I40E_DEBUG_INIT = 0x00000001, +- I40E_DEBUG_RELEASE = 0x00000002, +- +- I40E_DEBUG_LINK = 0x00000010, +- I40E_DEBUG_PHY = 0x00000020, +- I40E_DEBUG_HMC = 0x00000040, +- I40E_DEBUG_NVM = 0x00000080, +- I40E_DEBUG_LAN = 0x00000100, +- I40E_DEBUG_FLOW = 0x00000200, +- I40E_DEBUG_DCB = 0x00000400, +- I40E_DEBUG_DIAG = 0x00000800, +- I40E_DEBUG_FD = 0x00001000, +- I40E_DEBUG_PACKAGE = 0x00002000, +- I40E_DEBUG_IWARP = 0x00F00000, +- I40E_DEBUG_AQ_MESSAGE = 0x01000000, +- I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000, +- I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000, +- I40E_DEBUG_AQ_COMMAND = 0x06000000, +- I40E_DEBUG_AQ = 0x0F000000, +- +- I40E_DEBUG_USER = 0xF0000000, +- +- I40E_DEBUG_ALL = 0xFFFFFFFF +-}; +- +-#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_MASK(1, \ +- I40E_GLGEN_MSCA_STCODE_SHIFT) +-#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_MASK(1, \ +- I40E_GLGEN_MSCA_OPCODE_SHIFT) +-#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_MASK(2, \ +- I40E_GLGEN_MSCA_OPCODE_SHIFT) +- +-#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_MASK(0, \ +- I40E_GLGEN_MSCA_STCODE_SHIFT) +-#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_MASK(0, \ +- I40E_GLGEN_MSCA_OPCODE_SHIFT) +-#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_MASK(1, \ +- I40E_GLGEN_MSCA_OPCODE_SHIFT) +-#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_MASK(3, \ +- I40E_GLGEN_MSCA_OPCODE_SHIFT) ++#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK ++#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1) ++#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(2) ++ ++#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK ++#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_GLGEN_MSCA_OPCODE_MASK(0) ++#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1) ++#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(3) + + #define I40E_PHY_COM_REG_PAGE 0x1E + #define I40E_PHY_LED_LINK_MODE_MASK 0xF0 +@@ -525,7 +488,6 @@ struct i40e_dcbx_config { + /* Port hardware description */ + struct i40e_hw { + u8 __iomem *hw_addr; +- void *back; + + /* subsystem structs */ + struct i40e_phy_info phy; +diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +index 082c099209995..7d47a05274548 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +@@ -2,6 +2,8 @@ + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + + #include "i40e.h" ++#include "i40e_lan_hmc.h" ++#include "i40e_virtchnl_pf.h" + + /*********************notification routines***********************/ + +@@ -1628,8 +1630,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + { + struct i40e_hw *hw = &pf->hw; + struct i40e_vf *vf; +- int i, v; + u32 reg; ++ int i; + + /* If we don't have any VFs, then there is nothing to reset */ + if (!pf->num_alloc_vfs) +@@ -1640,11 +1642,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + return false; + + /* Begin reset on all VFs at once */ +- for (v = 0; v < pf->num_alloc_vfs; v++) { +- vf = &pf->vf[v]; ++ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { + /* If VF is being reset no need to trigger reset again */ + if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) +- i40e_trigger_vf_reset(&pf->vf[v], flr); ++ i40e_trigger_vf_reset(vf, flr); + } + + /* HW requires some time to make sure it can flush the FIFO for a VF +@@ -1653,14 +1654,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + * the VFs using a simple iterator that increments once that VF has + * finished resetting. + */ +- for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { ++ for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) { + usleep_range(10000, 20000); + + /* Check each VF in sequence, beginning with the VF to fail + * the previous check. + */ +- while (v < pf->num_alloc_vfs) { +- vf = &pf->vf[v]; ++ while (vf < &pf->vf[pf->num_alloc_vfs]) { + if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) { + reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); + if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) +@@ -1670,7 +1670,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + /* If the current VF has finished resetting, move on + * to the next VF in sequence. + */ +- v++; ++ ++vf; + } + } + +@@ -1680,39 +1680,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + /* Display a warning if at least one VF didn't manage to reset in + * time, but continue on with the operation. + */ +- if (v < pf->num_alloc_vfs) ++ if (vf < &pf->vf[pf->num_alloc_vfs]) + dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n", +- pf->vf[v].vf_id); ++ vf->vf_id); + usleep_range(10000, 20000); + + /* Begin disabling all the rings associated with VFs, but do not wait + * between each VF. + */ +- for (v = 0; v < pf->num_alloc_vfs; v++) { ++ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { + /* On initial reset, we don't have any queues to disable */ +- if (pf->vf[v].lan_vsi_idx == 0) ++ if (vf->lan_vsi_idx == 0) + continue; + + /* If VF is reset in another thread just continue */ + if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) + continue; + +- i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]); ++ i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]); + } + + /* Now that we've notified HW to disable all of the VF rings, wait + * until they finish. + */ +- for (v = 0; v < pf->num_alloc_vfs; v++) { ++ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { + /* On initial reset, we don't have any queues to disable */ +- if (pf->vf[v].lan_vsi_idx == 0) ++ if (vf->lan_vsi_idx == 0) + continue; + + /* If VF is reset in another thread just continue */ + if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) + continue; + +- i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]); ++ i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]); + } + + /* Hw may need up to 50ms to finish disabling the RX queues. We +@@ -1721,12 +1721,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + mdelay(50); + + /* Finish the reset on each VF */ +- for (v = 0; v < pf->num_alloc_vfs; v++) { ++ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) { + /* If VF is reset in another thread just continue */ + if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) + continue; + +- i40e_cleanup_reset_vf(&pf->vf[v]); ++ i40e_cleanup_reset_vf(vf); + } + + i40e_flush(hw); +@@ -3143,11 +3143,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) + /* Allow to delete VF primary MAC only if it was not set + * administratively by PF or if VF is trusted. + */ +- if (ether_addr_equal(addr, vf->default_lan_addr.addr) && +- i40e_can_vf_change_mac(vf)) +- was_unimac_deleted = true; +- else +- continue; ++ if (ether_addr_equal(addr, vf->default_lan_addr.addr)) { ++ if (i40e_can_vf_change_mac(vf)) ++ was_unimac_deleted = true; ++ else ++ continue; ++ } + + if (i40e_del_mac_filter(vsi, al->list[i].addr)) { + ret = -EINVAL; +diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +index cf190762421cc..66f95e2f3146a 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +@@ -4,7 +4,9 @@ + #ifndef _I40E_VIRTCHNL_PF_H_ + #define _I40E_VIRTCHNL_PF_H_ + +-#include "i40e.h" ++#include <linux/avf/virtchnl.h> ++#include <linux/netdevice.h> ++#include "i40e_type.h" + + #define I40E_MAX_VLANID 4095 + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c +index 1f8ae6f5d9807..65f38a57b3dfe 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c +@@ -2,11 +2,7 @@ + /* Copyright(c) 2018 Intel Corporation. */ + + #include <linux/bpf_trace.h> +-#include <linux/stringify.h> + #include <net/xdp_sock_drv.h> +-#include <net/xdp.h> +- +-#include "i40e.h" + #include "i40e_txrx_common.h" + #include "i40e_xsk.h" + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h +index 821df248f8bee..ef156fad52f26 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h +@@ -4,6 +4,8 @@ + #ifndef _I40E_XSK_H_ + #define _I40E_XSK_H_ + ++#include <linux/types.h> ++ + /* This value should match the pragma in the loop_unrolled_for + * macro. Why 4? It is strictly empirical. It seems to be a good + * compromise between the advantage of having simultaneous outstanding +@@ -20,7 +22,9 @@ + #define loop_unrolled_for for + #endif + ++struct i40e_ring; + struct i40e_vsi; ++struct net_device; + struct xsk_buff_pool; + + int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair); +diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c +index 1afd761d80520..f7988cf5efa58 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_common.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_common.c +@@ -1,10 +1,11 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + ++#include <linux/avf/virtchnl.h> ++#include <linux/bitfield.h> + #include "iavf_type.h" + #include "iavf_adminq.h" + #include "iavf_prototype.h" +-#include <linux/avf/virtchnl.h> + + /** + * iavf_set_mac_type - Sets MAC type +diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +index 892c6a4f03bb8..1ac97bd606e38 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +@@ -1,11 +1,12 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + ++#include <linux/bitfield.h> ++#include <linux/uaccess.h> ++ + /* ethtool support for iavf */ + #include "iavf.h" + +-#include <linux/uaccess.h> +- + /* ethtool statistics helpers */ + + /** +diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c +index 03e774bd2a5b4..65ddcd81c993e 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c +@@ -3,6 +3,7 @@ + + /* flow director ethtool support for iavf */ + ++#include <linux/bitfield.h> + #include "iavf.h" + + #define GTPU_PORT 2152 +diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c +index 8c5f6096b0022..f998ecf743c46 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2013 - 2018 Intel Corporation. */ + ++#include <linux/bitfield.h> + #include <linux/prefetch.h> + + #include "iavf.h" +diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +index 45f3e351653db..72ca2199c9572 100644 +--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h ++++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +@@ -592,8 +592,9 @@ struct ice_aqc_recipe_data_elem { + struct ice_aqc_recipe_to_profile { + __le16 profile_id; + u8 rsvd[6]; +- DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES); ++ __le64 recipe_assoc; + }; ++static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16); + + /* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3) + */ +diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c +index 23e197c3d02a7..4e675c7c199fa 100644 +--- a/drivers/net/ethernet/intel/ice/ice_lag.c ++++ b/drivers/net/ethernet/intel/ice/ice_lag.c +@@ -2000,14 +2000,14 @@ int ice_init_lag(struct ice_pf *pf) + /* associate recipes to profiles */ + for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) { + err = ice_aq_get_recipe_to_profile(&pf->hw, n, +- (u8 *)&recipe_bits, NULL); ++ &recipe_bits, NULL); + if (err) + continue; + + if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) { + recipe_bits |= BIT(lag->pf_recipe); + ice_aq_map_recipe_to_profile(&pf->hw, n, +- (u8 *)&recipe_bits, NULL); ++ recipe_bits, NULL); + } + } + +diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c +index 7f4bc110ead44..2004120a58acd 100644 +--- a/drivers/net/ethernet/intel/ice/ice_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_lib.c +@@ -3084,27 +3084,26 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, + } + + /** +- * ice_vsi_realloc_stat_arrays - Frees unused stat structures ++ * ice_vsi_realloc_stat_arrays - Frees unused stat structures or alloc new ones + * @vsi: VSI pointer +- * @prev_txq: Number of Tx rings before ring reallocation +- * @prev_rxq: Number of Rx rings before ring reallocation + */ +-static void +-ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) ++static int ++ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi) + { ++ u16 req_txq = vsi->req_txq ? vsi->req_txq : vsi->alloc_txq; ++ u16 req_rxq = vsi->req_rxq ? vsi->req_rxq : vsi->alloc_rxq; ++ struct ice_ring_stats **tx_ring_stats; ++ struct ice_ring_stats **rx_ring_stats; + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; ++ u16 prev_txq = vsi->alloc_txq; ++ u16 prev_rxq = vsi->alloc_rxq; + int i; + +- if (!prev_txq || !prev_rxq) +- return; +- if (vsi->type == ICE_VSI_CHNL) +- return; +- + vsi_stat = pf->vsi_stats[vsi->idx]; + +- if (vsi->num_txq < prev_txq) { +- for (i = vsi->num_txq; i < prev_txq; i++) { ++ if (req_txq < prev_txq) { ++ for (i = req_txq; i < prev_txq; i++) { + if (vsi_stat->tx_ring_stats[i]) { + kfree_rcu(vsi_stat->tx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL); +@@ -3112,14 +3111,36 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) + } + } + +- if (vsi->num_rxq < prev_rxq) { +- for (i = vsi->num_rxq; i < prev_rxq; i++) { ++ tx_ring_stats = vsi_stat->tx_ring_stats; ++ vsi_stat->tx_ring_stats = ++ krealloc_array(vsi_stat->tx_ring_stats, req_txq, ++ sizeof(*vsi_stat->tx_ring_stats), ++ GFP_KERNEL | __GFP_ZERO); ++ if (!vsi_stat->tx_ring_stats) { ++ vsi_stat->tx_ring_stats = tx_ring_stats; ++ return -ENOMEM; ++ } ++ ++ if (req_rxq < prev_rxq) { ++ for (i = req_rxq; i < prev_rxq; i++) { + if (vsi_stat->rx_ring_stats[i]) { + kfree_rcu(vsi_stat->rx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL); + } + } + } ++ ++ rx_ring_stats = vsi_stat->rx_ring_stats; ++ vsi_stat->rx_ring_stats = ++ krealloc_array(vsi_stat->rx_ring_stats, req_rxq, ++ sizeof(*vsi_stat->rx_ring_stats), ++ GFP_KERNEL | __GFP_ZERO); ++ if (!vsi_stat->rx_ring_stats) { ++ vsi_stat->rx_ring_stats = rx_ring_stats; ++ return -ENOMEM; ++ } ++ ++ return 0; + } + + /** +@@ -3136,9 +3157,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) + { + struct ice_vsi_cfg_params params = {}; + struct ice_coalesce_stored *coalesce; +- int ret, prev_txq, prev_rxq; +- int prev_num_q_vectors = 0; ++ int prev_num_q_vectors; + struct ice_pf *pf; ++ int ret; + + if (!vsi) + return -EINVAL; +@@ -3150,6 +3171,15 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) + if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf)) + return -EINVAL; + ++ ret = ice_vsi_realloc_stat_arrays(vsi); ++ if (ret) ++ goto err_vsi_cfg; ++ ++ ice_vsi_decfg(vsi); ++ ret = ice_vsi_cfg_def(vsi, ¶ms); ++ if (ret) ++ goto err_vsi_cfg; ++ + coalesce = kcalloc(vsi->num_q_vectors, + sizeof(struct ice_coalesce_stored), GFP_KERNEL); + if (!coalesce) +@@ -3157,14 +3187,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) + + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + +- prev_txq = vsi->num_txq; +- prev_rxq = vsi->num_rxq; +- +- ice_vsi_decfg(vsi); +- ret = ice_vsi_cfg_def(vsi, ¶ms); +- if (ret) +- goto err_vsi_cfg; +- + ret = ice_vsi_cfg_tc_lan(pf, vsi); + if (ret) { + if (vsi_flags & ICE_VSI_FLAG_INIT) { +@@ -3176,8 +3198,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) + return ice_schedule_reset(pf, ICE_RESET_PFR); + } + +- ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq); +- + ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); + kfree(coalesce); + +@@ -3185,8 +3205,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) + + err_vsi_cfg_tc_lan: + ice_vsi_decfg(vsi); +-err_vsi_cfg: + kfree(coalesce); ++err_vsi_cfg: + return ret; + } + +diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c +index 2f77b684ff765..4c6d58bb2690d 100644 +--- a/drivers/net/ethernet/intel/ice/ice_switch.c ++++ b/drivers/net/ethernet/intel/ice/ice_switch.c +@@ -2032,12 +2032,12 @@ ice_update_recipe_lkup_idx(struct ice_hw *hw, + * ice_aq_map_recipe_to_profile - Map recipe to packet profile + * @hw: pointer to the HW struct + * @profile_id: package profile ID to associate the recipe with +- * @r_bitmap: Recipe bitmap filled in and need to be returned as response ++ * @r_assoc: Recipe bitmap filled in and need to be returned as response + * @cd: pointer to command details structure or NULL + * Recipe to profile association (0x0291) + */ + int +-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, ++ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc, + struct ice_sq_cd *cd) + { + struct ice_aqc_recipe_to_profile *cmd; +@@ -2049,7 +2049,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, + /* Set the recipe ID bit in the bitmask to let the device know which + * profile we are associating the recipe to + */ +- memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc)); ++ cmd->recipe_assoc = cpu_to_le64(r_assoc); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + } +@@ -2058,12 +2058,12 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, + * ice_aq_get_recipe_to_profile - Map recipe to packet profile + * @hw: pointer to the HW struct + * @profile_id: package profile ID to associate the recipe with +- * @r_bitmap: Recipe bitmap filled in and need to be returned as response ++ * @r_assoc: Recipe bitmap filled in and need to be returned as response + * @cd: pointer to command details structure or NULL + * Associate profile ID with given recipe (0x0293) + */ + int +-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, ++ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, + struct ice_sq_cd *cd) + { + struct ice_aqc_recipe_to_profile *cmd; +@@ -2076,7 +2076,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (!status) +- memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc)); ++ *r_assoc = le64_to_cpu(cmd->recipe_assoc); + + return status; + } +@@ -2121,6 +2121,7 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid) + static void ice_get_recp_to_prof_map(struct ice_hw *hw) + { + DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); ++ u64 recp_assoc; + u16 i; + + for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) { +@@ -2128,8 +2129,9 @@ static void ice_get_recp_to_prof_map(struct ice_hw *hw) + + bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES); + bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES); +- if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL)) ++ if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL)) + continue; ++ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES); + bitmap_copy(profile_to_recipe[i], r_bitmap, + ICE_MAX_NUM_RECIPES); + for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES) +@@ -5431,22 +5433,24 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + */ + list_for_each_entry(fvit, &rm->fv_list, list_entry) { + DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); ++ u64 recp_assoc; + u16 j; + + status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id, +- (u8 *)r_bitmap, NULL); ++ &recp_assoc, NULL); + if (status) + goto err_unroll; + ++ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES); + bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap, + ICE_MAX_NUM_RECIPES); + status = ice_acquire_change_lock(hw, ICE_RES_WRITE); + if (status) + goto err_unroll; + ++ bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES); + status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id, +- (u8 *)r_bitmap, +- NULL); ++ recp_assoc, NULL); + ice_release_change_lock(hw); + + if (status) +diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h +index db7e501b7e0a4..89ffa1b51b5ad 100644 +--- a/drivers/net/ethernet/intel/ice/ice_switch.h ++++ b/drivers/net/ethernet/intel/ice/ice_switch.h +@@ -424,10 +424,10 @@ int ice_aq_add_recipe(struct ice_hw *hw, + struct ice_aqc_recipe_data_elem *s_recipe_list, + u16 num_recipes, struct ice_sq_cd *cd); + int +-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, ++ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc, + struct ice_sq_cd *cd); + int +-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, ++ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc, + struct ice_sq_cd *cd); + + #endif /* _ICE_SWITCH_H_ */ +diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +index 80dc4bcdd3a41..b3e1bdcb80f84 100644 +--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c ++++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +@@ -26,24 +26,22 @@ static void ice_port_vlan_on(struct ice_vsi *vsi) + struct ice_vsi_vlan_ops *vlan_ops; + struct ice_pf *pf = vsi->back; + +- if (ice_is_dvm_ena(&pf->hw)) { +- vlan_ops = &vsi->outer_vlan_ops; +- +- /* setup outer VLAN ops */ +- vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; +- vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; ++ /* setup inner VLAN ops */ ++ vlan_ops = &vsi->inner_vlan_ops; + +- /* setup inner VLAN ops */ +- vlan_ops = &vsi->inner_vlan_ops; ++ if (ice_is_dvm_ena(&pf->hw)) { + vlan_ops->add_vlan = noop_vlan_arg; + vlan_ops->del_vlan = noop_vlan_arg; + vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping; + vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping; + vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion; + vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion; +- } else { +- vlan_ops = &vsi->inner_vlan_ops; + ++ /* setup outer VLAN ops */ ++ vlan_ops = &vsi->outer_vlan_ops; ++ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; ++ vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; ++ } else { + vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan; + vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan; + } +diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c +index 8d6e44ee1895a..64dfc362d1dc4 100644 +--- a/drivers/net/ethernet/intel/igb/e1000_82575.c ++++ b/drivers/net/ethernet/intel/igb/e1000_82575.c +@@ -222,8 +222,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) + } + + /* set lan id */ +- hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >> +- E1000_STATUS_FUNC_SHIFT; ++ hw->bus.func = FIELD_GET(E1000_STATUS_FUNC_MASK, rd32(E1000_STATUS)); + + /* Set phy->phy_addr and phy->id. */ + ret_val = igb_get_phy_id_82575(hw); +@@ -262,8 +261,8 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) + if (ret_val) + goto out; + +- data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >> +- E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT; ++ data = FIELD_GET(E1000_M88E1112_MAC_CTRL_1_MODE_MASK, ++ data); + if (data == E1000_M88E1112_AUTO_COPPER_SGMII || + data == E1000_M88E1112_AUTO_COPPER_BASEX) + hw->mac.ops.check_for_link = +@@ -330,8 +329,7 @@ static s32 igb_init_nvm_params_82575(struct e1000_hw *hw) + u32 eecd = rd32(E1000_EECD); + u16 size; + +- size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> +- E1000_EECD_SIZE_EX_SHIFT); ++ size = FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd); + + /* Added to a constant, "size" becomes the left-shift value + * for setting word_size. +@@ -2798,7 +2796,7 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) + return 0; + + hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg); +- if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) ++ if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg) + != NVM_ETS_TYPE_EMC) + return E1000_NOT_IMPLEMENTED; + +@@ -2808,10 +2806,8 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) + + for (i = 1; i < num_sensors; i++) { + hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor); +- sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> +- NVM_ETS_DATA_INDEX_SHIFT); +- sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> +- NVM_ETS_DATA_LOC_SHIFT); ++ sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor); ++ sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor); + + if (sensor_location != 0) + hw->phy.ops.read_i2c_byte(hw, +@@ -2859,20 +2855,17 @@ static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw) + return 0; + + hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg); +- if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) ++ if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg) + != NVM_ETS_TYPE_EMC) + return E1000_NOT_IMPLEMENTED; + +- low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >> +- NVM_ETS_LTHRES_DELTA_SHIFT); ++ low_thresh_delta = FIELD_GET(NVM_ETS_LTHRES_DELTA_MASK, ets_cfg); + num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); + + for (i = 1; i <= num_sensors; i++) { + hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor); +- sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> +- NVM_ETS_DATA_INDEX_SHIFT); +- sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> +- NVM_ETS_DATA_LOC_SHIFT); ++ sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor); ++ sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor); + therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK; + + hw->phy.ops.write_i2c_byte(hw, +diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c +index b9b9d35494d27..503b239868e8e 100644 +--- a/drivers/net/ethernet/intel/igb/e1000_i210.c ++++ b/drivers/net/ethernet/intel/igb/e1000_i210.c +@@ -5,9 +5,9 @@ + * e1000_i211 + */ + +-#include <linux/types.h> ++#include <linux/bitfield.h> + #include <linux/if_ether.h> +- ++#include <linux/types.h> + #include "e1000_hw.h" + #include "e1000_i210.h" + +@@ -473,7 +473,7 @@ s32 igb_read_invm_version(struct e1000_hw *hw, + /* Check if we have second version location used */ + else if ((i == 1) && + ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) { +- version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; ++ version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record); + status = 0; + break; + } +@@ -483,8 +483,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw, + else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) && + ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) && + (i != 1))) { +- version = (*next_record & E1000_INVM_VER_FIELD_TWO) +- >> 13; ++ version = FIELD_GET(E1000_INVM_VER_FIELD_TWO, ++ *next_record); + status = 0; + break; + } +@@ -493,15 +493,15 @@ s32 igb_read_invm_version(struct e1000_hw *hw, + */ + else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) && + ((*record & 0x3) == 0)) { +- version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; ++ version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record); + status = 0; + break; + } + } + + if (!status) { +- invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK) +- >> E1000_INVM_MAJOR_SHIFT; ++ invm_ver->invm_major = FIELD_GET(E1000_INVM_MAJOR_MASK, ++ version); + invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK; + } + /* Read Image Type */ +@@ -520,7 +520,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw, + ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) || + ((((*record & 0x3) != 0) && (i != 1)))) { + invm_ver->invm_img_type = +- (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23; ++ FIELD_GET(E1000_INVM_IMGTYPE_FIELD, ++ *next_record); + status = 0; + break; + } +diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c +index caf91c6f52b4d..ceaec2cf08a43 100644 +--- a/drivers/net/ethernet/intel/igb/e1000_mac.c ++++ b/drivers/net/ethernet/intel/igb/e1000_mac.c +@@ -56,7 +56,7 @@ s32 igb_get_bus_info_pcie(struct e1000_hw *hw) + } + + reg = rd32(E1000_STATUS); +- bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; ++ bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg); + + return 0; + } +diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c +index fa136e6e93285..2dcd64d6dec31 100644 +--- a/drivers/net/ethernet/intel/igb/e1000_nvm.c ++++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c +@@ -1,9 +1,9 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2007 - 2018 Intel Corporation. */ + +-#include <linux/if_ether.h> ++#include <linux/bitfield.h> + #include <linux/delay.h> +- ++#include <linux/if_ether.h> + #include "e1000_mac.h" + #include "e1000_nvm.h" + +@@ -708,10 +708,10 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) + */ + if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) { + hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); +- fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) +- >> NVM_MAJOR_SHIFT; +- fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK) +- >> NVM_MINOR_SHIFT; ++ fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK, ++ fw_version); ++ fw_vers->eep_minor = FIELD_GET(NVM_MINOR_MASK, ++ fw_version); + fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK); + goto etrack_id; + } +@@ -753,15 +753,13 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) + return; + } + hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); +- fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) +- >> NVM_MAJOR_SHIFT; ++ fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK, fw_version); + + /* check for old style version format in newer images*/ + if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) { + eeprom_verl = (fw_version & NVM_COMB_VER_MASK); + } else { +- eeprom_verl = (fw_version & NVM_MINOR_MASK) +- >> NVM_MINOR_SHIFT; ++ eeprom_verl = FIELD_GET(NVM_MINOR_MASK, fw_version); + } + /* Convert minor value to hex before assigning to output struct + * Val to be converted will not be higher than 99, per tool output +diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c +index a018000f7db92..bed94e50a6693 100644 +--- a/drivers/net/ethernet/intel/igb/e1000_phy.c ++++ b/drivers/net/ethernet/intel/igb/e1000_phy.c +@@ -1,9 +1,9 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2007 - 2018 Intel Corporation. */ + +-#include <linux/if_ether.h> ++#include <linux/bitfield.h> + #include <linux/delay.h> +- ++#include <linux/if_ether.h> + #include "e1000_mac.h" + #include "e1000_phy.h" + +@@ -1682,8 +1682,7 @@ s32 igb_get_cable_length_m88(struct e1000_hw *hw) + if (ret_val) + goto out; + +- index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> +- M88E1000_PSSR_CABLE_LENGTH_SHIFT; ++ index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); + if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) { + ret_val = -E1000_ERR_PHY; + goto out; +@@ -1796,8 +1795,7 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) + if (ret_val) + goto out; + +- index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> +- M88E1000_PSSR_CABLE_LENGTH_SHIFT; ++ index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); + if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) { + ret_val = -E1000_ERR_PHY; + goto out; +@@ -2578,8 +2576,7 @@ s32 igb_get_cable_length_82580(struct e1000_hw *hw) + if (ret_val) + goto out; + +- length = (phy_data & I82580_DSTATUS_CABLE_LENGTH) >> +- I82580_DSTATUS_CABLE_LENGTH_SHIFT; ++ length = FIELD_GET(I82580_DSTATUS_CABLE_LENGTH, phy_data); + + if (length == E1000_CABLE_LENGTH_UNDEFINED) + ret_val = -E1000_ERR_PHY; +diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c +index 4ee849985e2b8..92b2be06a6e93 100644 +--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c ++++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c +@@ -2434,7 +2434,7 @@ static int igb_get_ts_info(struct net_device *dev, + } + } + +-#define ETHER_TYPE_FULL_MASK ((__force __be16)~0) ++#define ETHER_TYPE_FULL_MASK cpu_to_be16(FIELD_MAX(U16_MAX)) + static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd) + { +@@ -2733,8 +2733,8 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, + u32 vlapqf; + + vlapqf = rd32(E1000_VLAPQF); +- vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) +- >> VLAN_PRIO_SHIFT; ++ vlan_priority = FIELD_GET(VLAN_PRIO_MASK, ++ ntohs(input->filter.vlan_tci)); + queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK; + + /* check whether this vlan prio is already set */ +@@ -2817,7 +2817,7 @@ static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter, + u8 vlan_priority; + u32 vlapqf; + +- vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; ++ vlan_priority = FIELD_GET(VLAN_PRIO_MASK, vlan_tci); + + vlapqf = rd32(E1000_VLAPQF); + vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority); +diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c +index 11921141b6079..4431e7693d45f 100644 +--- a/drivers/net/ethernet/intel/igb/igb_main.c ++++ b/drivers/net/ethernet/intel/igb/igb_main.c +@@ -7283,7 +7283,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) + static int igb_set_vf_multicasts(struct igb_adapter *adapter, + u32 *msgbuf, u32 vf) + { +- int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; ++ int n = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]); + u16 *hash_list = (u16 *)&msgbuf[1]; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + int i; +@@ -7543,7 +7543,7 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, + + static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) + { +- int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; ++ int add = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]); + int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); + int ret; + +diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c +index a3cd7ac48d4b6..d15282ee5ea8f 100644 +--- a/drivers/net/ethernet/intel/igbvf/mbx.c ++++ b/drivers/net/ethernet/intel/igbvf/mbx.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright(c) 2009 - 2018 Intel Corporation. */ + ++#include <linux/bitfield.h> + #include "mbx.h" + + /** +diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c +index 7ff2752dd763a..c5012fa36af2f 100644 +--- a/drivers/net/ethernet/intel/igbvf/netdev.c ++++ b/drivers/net/ethernet/intel/igbvf/netdev.c +@@ -3,25 +3,25 @@ + + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +-#include <linux/module.h> +-#include <linux/types.h> +-#include <linux/init.h> +-#include <linux/pci.h> +-#include <linux/vmalloc.h> +-#include <linux/pagemap.h> ++#include <linux/bitfield.h> + #include <linux/delay.h> +-#include <linux/netdevice.h> +-#include <linux/tcp.h> +-#include <linux/ipv6.h> +-#include <linux/slab.h> +-#include <net/checksum.h> +-#include <net/ip6_checksum.h> +-#include <linux/mii.h> + #include <linux/ethtool.h> + #include <linux/if_vlan.h> ++#include <linux/init.h> ++#include <linux/ipv6.h> ++#include <linux/mii.h> ++#include <linux/module.h> ++#include <linux/netdevice.h> ++#include <linux/pagemap.h> ++#include <linux/pci.h> + #include <linux/prefetch.h> + #include <linux/sctp.h> +- ++#include <linux/slab.h> ++#include <linux/tcp.h> ++#include <linux/types.h> ++#include <linux/vmalloc.h> ++#include <net/checksum.h> ++#include <net/ip6_checksum.h> + #include "igbvf.h" + + char igbvf_driver_name[] = "igbvf"; +@@ -273,9 +273,8 @@ static bool igbvf_clean_rx_irq(struct igbvf_adapter *adapter, + * that case, it fills the header buffer and spills the rest + * into the page. + */ +- hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info) +- & E1000_RXDADV_HDRBUFLEN_MASK) >> +- E1000_RXDADV_HDRBUFLEN_SHIFT; ++ hlen = le16_get_bits(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info, ++ E1000_RXDADV_HDRBUFLEN_MASK); + if (hlen > adapter->rx_ps_hdr_size) + hlen = adapter->rx_ps_hdr_size; + +diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c +index 17546a035ab19..d2562c8e8015e 100644 +--- a/drivers/net/ethernet/intel/igc/igc_i225.c ++++ b/drivers/net/ethernet/intel/igc/igc_i225.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright (c) 2018 Intel Corporation */ + ++#include <linux/bitfield.h> + #include <linux/delay.h> + + #include "igc_hw.h" +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index fc1de116d5548..e83700ad7e622 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -1640,10 +1640,6 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, + + if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { +- /* FIXME: add support for retrieving timestamps from +- * the other timer registers before skipping the +- * timestamping request. +- */ + unsigned long flags; + u32 tstamp_flags; + +diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c +index 53b77c969c857..d0d9e7170154c 100644 +--- a/drivers/net/ethernet/intel/igc/igc_phy.c ++++ b/drivers/net/ethernet/intel/igc/igc_phy.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright (c) 2018 Intel Corporation */ + ++#include <linux/bitfield.h> + #include "igc_phy.h" + + /** +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +index b2a0f2aaa05be..2e6e0365154a1 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +@@ -684,7 +684,7 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw) + u32 reg; + + reg = IXGBE_READ_REG(hw, IXGBE_STATUS); +- bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT; ++ bus->func = FIELD_GET(IXGBE_STATUS_LAN_ID, reg); + bus->lan_id = bus->func; + + /* check for a port swap */ +@@ -695,8 +695,8 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw) + /* Get MAC instance from EEPROM for configuring CS4227 */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP) { + hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_4, &ee_ctrl_4); +- bus->instance_id = (ee_ctrl_4 & IXGBE_EE_CTRL_4_INST_ID) >> +- IXGBE_EE_CTRL_4_INST_ID_SHIFT; ++ bus->instance_id = FIELD_GET(IXGBE_EE_CTRL_4_INST_ID, ++ ee_ctrl_4); + } + } + +@@ -870,10 +870,9 @@ s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw) + * SPI EEPROM is assumed here. This code would need to + * change if a future EEPROM is not SPI. + */ +- eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> +- IXGBE_EEC_SIZE_SHIFT); ++ eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec); + eeprom->word_size = BIT(eeprom_size + +- IXGBE_EEPROM_WORD_SIZE_SHIFT); ++ IXGBE_EEPROM_WORD_SIZE_SHIFT); + } + + if (eec & IXGBE_EEC_ADDR_SIZE) +@@ -3935,10 +3934,10 @@ s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw) + if (status) + return status; + +- sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> +- IXGBE_ETS_DATA_INDEX_SHIFT); +- sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> +- IXGBE_ETS_DATA_LOC_SHIFT); ++ sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK, ++ ets_sensor); ++ sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK, ++ ets_sensor); + + if (sensor_location != 0) { + status = hw->phy.ops.read_i2c_byte(hw, +@@ -3982,8 +3981,7 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) + if (status) + return status; + +- low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >> +- IXGBE_ETS_LTHRES_DELTA_SHIFT); ++ low_thresh_delta = FIELD_GET(IXGBE_ETS_LTHRES_DELTA_MASK, ets_cfg); + num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK); + if (num_sensors > IXGBE_MAX_SENSORS) + num_sensors = IXGBE_MAX_SENSORS; +@@ -3997,10 +3995,10 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) + ets_offset + 1 + i); + continue; + } +- sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> +- IXGBE_ETS_DATA_INDEX_SHIFT); +- sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> +- IXGBE_ETS_DATA_LOC_SHIFT); ++ sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK, ++ ets_sensor); ++ sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK, ++ ets_sensor); + therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK; + + hw->phy.ops.write_i2c_byte(hw, +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +index 13a6fca31004a..866024f2b9eeb 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +@@ -914,7 +914,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) + goto err_out; + } + +- xs = kzalloc(sizeof(*xs), GFP_KERNEL); ++ algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); ++ if (unlikely(!algo)) { ++ err = -ENOENT; ++ goto err_out; ++ } ++ ++ xs = kzalloc(sizeof(*xs), GFP_ATOMIC); + if (unlikely(!xs)) { + err = -ENOMEM; + goto err_out; +@@ -930,14 +936,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) + memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4)); + xs->xso.dev = adapter->netdev; + +- algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1); +- if (unlikely(!algo)) { +- err = -ENOENT; +- goto err_xs; +- } +- + aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8; +- xs->aead = kzalloc(aead_len, GFP_KERNEL); ++ xs->aead = kzalloc(aead_len, GFP_ATOMIC); + if (unlikely(!xs->aead)) { + err = -ENOMEM; + goto err_xs; +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +index cb23aad5953b0..f245f3df40fca 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +@@ -11409,7 +11409,7 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, + if ((pf_func & 1) == (pdev->devfn & 1)) { + unsigned int device_id; + +- vf = (req_id & 0x7F) >> 1; ++ vf = FIELD_GET(0x7F, req_id); + e_dev_err("VF %d has caused a PCIe error\n", vf); + e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: " + "%8.8x\tdw3: %8.8x\n", +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +index 930dc50719364..f28140a05f091 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +@@ -276,9 +276,8 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) + return 0; + + if (hw->phy.nw_mng_if_sel) { +- phy_addr = (hw->phy.nw_mng_if_sel & +- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> +- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; ++ phy_addr = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD, ++ hw->phy.nw_mng_if_sel); + if (ixgbe_probe_phy(hw, phy_addr)) + return 0; + else +@@ -1447,8 +1446,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) + ret_val = hw->eeprom.ops.read(hw, data_offset, &eword); + if (ret_val) + goto err_eeprom; +- control = (eword & IXGBE_CONTROL_MASK_NL) >> +- IXGBE_CONTROL_SHIFT_NL; ++ control = FIELD_GET(IXGBE_CONTROL_MASK_NL, eword); + edata = eword & IXGBE_DATA_MASK_NL; + switch (control) { + case IXGBE_DELAY_NL: +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +index 198ab9d97618c..d0a6c220a12ac 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +@@ -363,8 +363,7 @@ int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs) + static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) + { +- int entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) +- >> IXGBE_VT_MSGINFO_SHIFT; ++ int entries = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]); + u16 *hash_list = (u16 *)&msgbuf[1]; + struct vf_data_storage *vfinfo = &adapter->vfinfo[vf]; + struct ixgbe_hw *hw = &adapter->hw; +@@ -971,7 +970,7 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter, + static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) + { +- u32 add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; ++ u32 add = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]); + u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK); + u8 tcs = adapter->hw_tcs; + +@@ -994,8 +993,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) + { + u8 *new_mac = ((u8 *)(&msgbuf[1])); +- int index = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> +- IXGBE_VT_MSGINFO_SHIFT; ++ int index = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]); + int err; + + if (adapter->vfinfo[vf].pf_set_mac && !adapter->vfinfo[vf].trusted && +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +index 15325c549d9b5..57a912e4653fc 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +@@ -187,16 +187,16 @@ s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw) + s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw) + { + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; +- u32 eec; +- u16 eeprom_size; + + if (eeprom->type == ixgbe_eeprom_uninitialized) { ++ u16 eeprom_size; ++ u32 eec; ++ + eeprom->semaphore_delay = 10; + eeprom->type = ixgbe_flash; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw)); +- eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> +- IXGBE_EEC_SIZE_SHIFT); ++ eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec); + eeprom->word_size = BIT(eeprom_size + + IXGBE_EEPROM_WORD_SIZE_SHIFT); + +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +index cdc912bba8089..c1adc94a5a657 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +@@ -630,16 +630,16 @@ static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw) + static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw) + { + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; +- u32 eec; +- u16 eeprom_size; + + if (eeprom->type == ixgbe_eeprom_uninitialized) { ++ u16 eeprom_size; ++ u32 eec; ++ + eeprom->semaphore_delay = 10; + eeprom->type = ixgbe_flash; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw)); +- eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> +- IXGBE_EEC_SIZE_SHIFT); ++ eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec); + eeprom->word_size = BIT(eeprom_size + + IXGBE_EEPROM_WORD_SIZE_SHIFT); + +@@ -714,8 +714,7 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, + ret = ixgbe_iosf_wait(hw, &command); + + if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { +- error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> +- IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT; ++ error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command); + hw_dbg(hw, "Failed to read, error %x\n", error); + ret = -EIO; + goto out; +@@ -1415,8 +1414,7 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, + ret = ixgbe_iosf_wait(hw, &command); + + if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { +- error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> +- IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT; ++ error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command); + hw_dbg(hw, "Failed to write, error %x\n", error); + return -EIO; + } +@@ -3229,9 +3227,8 @@ static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw) + */ + if (hw->mac.type == ixgbe_mac_x550em_a && + hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) { +- hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel & +- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> +- IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; ++ hw->phy.mdio.prtad = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD, ++ hw->phy.nw_mng_if_sel); + } + } + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +index 6c18d3d2442eb..2539c985f695a 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +@@ -808,6 +808,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + ++ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); ++ cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; ++ cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0; ++ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); ++ + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); + cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; + cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0; +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +index 9181ac5de912a..19075f217d00c 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +@@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) + continue; + lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); + for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) { ++ if (iter >= MAX_LMAC_COUNT) ++ continue; + lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu), + iter); + rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac); +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +index 55639c133dd02..91a4ea529d077 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +@@ -1669,7 +1669,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz, + struct npc_coalesced_kpu_prfl *img_data = NULL; + int i = 0, rc = -EINVAL; + void __iomem *kpu_prfl_addr; +- u16 offset; ++ u32 offset; + + img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr; + if (le64_to_cpu(img_data->signature) == KPU_SIGN && +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +index b40bd0e467514..3f46d5e0fb2ec 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +@@ -1933,7 +1933,7 @@ int otx2_open(struct net_device *netdev) + * mcam entries are enabled to receive the packets. Hence disable the + * packet I/O. + */ +- if (err == EIO) ++ if (err == -EIO) + goto err_disable_rxtx; + else if (err) + goto err_tx_stop_queues; +diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +index aaf1faed4133e..7bb92e2dacda6 100644 +--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c ++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +@@ -14,6 +14,7 @@ + #include <linux/module.h> + #include <linux/phy.h> + #include <linux/platform_device.h> ++#include <linux/rtnetlink.h> + #include <linux/skbuff.h> + + #include "mlxbf_gige.h" +@@ -139,13 +140,10 @@ static int mlxbf_gige_open(struct net_device *netdev) + control |= MLXBF_GIGE_CONTROL_PORT_EN; + writeq(control, priv->base + MLXBF_GIGE_CONTROL); + +- err = mlxbf_gige_request_irqs(priv); +- if (err) +- return err; + mlxbf_gige_cache_stats(priv); + err = mlxbf_gige_clean_port(priv); + if (err) +- goto free_irqs; ++ return err; + + /* Clear driver's valid_polarity to match hardware, + * since the above call to clean_port() resets the +@@ -157,7 +155,7 @@ static int mlxbf_gige_open(struct net_device *netdev) + + err = mlxbf_gige_tx_init(priv); + if (err) +- goto free_irqs; ++ goto phy_deinit; + err = mlxbf_gige_rx_init(priv); + if (err) + goto tx_deinit; +@@ -166,6 +164,10 @@ static int mlxbf_gige_open(struct net_device *netdev) + napi_enable(&priv->napi); + netif_start_queue(netdev); + ++ err = mlxbf_gige_request_irqs(priv); ++ if (err) ++ goto napi_deinit; ++ + /* Set bits in INT_EN that we care about */ + int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | + MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | +@@ -182,11 +184,17 @@ static int mlxbf_gige_open(struct net_device *netdev) + + return 0; + ++napi_deinit: ++ netif_stop_queue(netdev); ++ napi_disable(&priv->napi); ++ netif_napi_del(&priv->napi); ++ mlxbf_gige_rx_deinit(priv); ++ + tx_deinit: + mlxbf_gige_tx_deinit(priv); + +-free_irqs: +- mlxbf_gige_free_irqs(priv); ++phy_deinit: ++ phy_stop(phydev); + return err; + } + +@@ -487,8 +495,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev) + { + struct mlxbf_gige *priv = platform_get_drvdata(pdev); + +- writeq(0, priv->base + MLXBF_GIGE_INT_EN); +- mlxbf_gige_clean_port(priv); ++ rtnl_lock(); ++ netif_device_detach(priv->netdev); ++ ++ if (netif_running(priv->netdev)) ++ dev_close(priv->netdev); ++ ++ rtnl_unlock(); + } + + static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = { +diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c +index c81cdeb4d4e7e..0b6174748d2b4 100644 +--- a/drivers/net/ethernet/microchip/lan743x_main.c ++++ b/drivers/net/ethernet/microchip/lan743x_main.c +@@ -25,6 +25,8 @@ + #define PCS_POWER_STATE_DOWN 0x6 + #define PCS_POWER_STATE_UP 0x4 + ++#define RFE_RD_FIFO_TH_3_DWORDS 0x3 ++ + static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) + { + u32 chip_rev; +@@ -3223,6 +3225,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter) + lan743x_pci_cleanup(adapter); + } + ++static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter) ++{ ++ u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_; ++ ++ if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) { ++ u32 misc_ctl; ++ ++ misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0); ++ misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_; ++ misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_, ++ RFE_RD_FIFO_TH_3_DWORDS); ++ lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl); ++ } ++} ++ + static int lan743x_hardware_init(struct lan743x_adapter *adapter, + struct pci_dev *pdev) + { +@@ -3238,6 +3255,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, + pci11x1x_strap_get_status(adapter); + spin_lock_init(&adapter->eth_syslock_spinlock); + mutex_init(&adapter->sgmii_rw_lock); ++ pci11x1x_set_rfe_rd_fifo_threshold(adapter); + } else { + adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; + adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; +diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h +index 52609fc13ad95..f0b486f85450e 100644 +--- a/drivers/net/ethernet/microchip/lan743x_main.h ++++ b/drivers/net/ethernet/microchip/lan743x_main.h +@@ -26,6 +26,7 @@ + #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF) + #define ID_REV_CHIP_REV_A0_ (0x00000000) + #define ID_REV_CHIP_REV_B0_ (0x00000010) ++#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0) + + #define FPGA_REV (0x04) + #define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF) +@@ -311,6 +312,9 @@ + #define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8) + #define SGMII_CTL_SGMII_POWER_DN_ BIT(1) + ++#define MISC_CTL_0 (0x920) ++#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4) ++ + /* Vendor Specific SGMII MMD details */ + #define SR_VSMMD_PCS_ID1 0x0004 + #define SR_VSMMD_PCS_ID2 0x0005 +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +index 48ea4aeeea5d4..e443d69e39511 100644 +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, + + *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + +- *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN); ++ *datasize = mtu + ETH_HLEN; + } + + static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) +diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c +index 81fd31f6fac46..e6f1da66c4500 100644 +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -1201,17 +1201,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) + RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01); + } + ++static void rtl_dash_loop_wait(struct rtl8169_private *tp, ++ const struct rtl_cond *c, ++ unsigned long usecs, int n, bool high) ++{ ++ if (!tp->dash_enabled) ++ return; ++ rtl_loop_wait(tp, c, usecs, n, high); ++} ++ ++static void rtl_dash_loop_wait_high(struct rtl8169_private *tp, ++ const struct rtl_cond *c, ++ unsigned long d, int n) ++{ ++ rtl_dash_loop_wait(tp, c, d, n, true); ++} ++ ++static void rtl_dash_loop_wait_low(struct rtl8169_private *tp, ++ const struct rtl_cond *c, ++ unsigned long d, int n) ++{ ++ rtl_dash_loop_wait(tp, c, d, n, false); ++} ++ + static void rtl8168dp_driver_start(struct rtl8169_private *tp) + { + r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START); +- rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); ++ rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10); + } + + static void rtl8168ep_driver_start(struct rtl8169_private *tp) + { + r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START); + r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); +- rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); ++ rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30); + } + + static void rtl8168_driver_start(struct rtl8169_private *tp) +@@ -1225,7 +1248,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp) + static void rtl8168dp_driver_stop(struct rtl8169_private *tp) + { + r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP); +- rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); ++ rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10); + } + + static void rtl8168ep_driver_stop(struct rtl8169_private *tp) +@@ -1233,7 +1256,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp) + rtl8168ep_stop_cmac(tp); + r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP); + r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01); +- rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); ++ rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10); + } + + static void rtl8168_driver_stop(struct rtl8169_private *tp) +@@ -5055,6 +5078,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp) + struct mii_bus *new_bus; + int ret; + ++ /* On some boards with this chip version the BIOS is buggy and misses ++ * to reset the PHY page selector. This results in the PHY ID read ++ * accessing registers on a different page, returning a more or ++ * less random value. Fix this by resetting the page selector first. ++ */ ++ if (tp->mac_version == RTL_GIGA_MAC_VER_25 || ++ tp->mac_version == RTL_GIGA_MAC_VER_26) ++ r8169_mdio_write(tp, 0x1f, 0); ++ + new_bus = devm_mdiobus_alloc(&pdev->dev); + if (!new_bus) + return -ENOMEM; +diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c +index 8fec0dbbbe7bb..c6897e6ea362d 100644 +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -1288,25 +1288,16 @@ static int ravb_poll(struct napi_struct *napi, int budget) + struct net_device *ndev = napi->dev; + struct ravb_private *priv = netdev_priv(ndev); + const struct ravb_hw_info *info = priv->info; +- bool gptp = info->gptp || info->ccc_gac; +- struct ravb_rx_desc *desc; + unsigned long flags; + int q = napi - priv->napi; + int mask = BIT(q); + int quota = budget; +- unsigned int entry; ++ bool unmask; + +- if (!gptp) { +- entry = priv->cur_rx[q] % priv->num_rx_ring[q]; +- desc = &priv->gbeth_rx_ring[entry]; +- } + /* Processing RX Descriptor Ring */ + /* Clear RX interrupt */ + ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); +- if (gptp || desc->die_dt != DT_FEMPTY) { +- if (ravb_rx(ndev, "a, q)) +- goto out; +- } ++ unmask = !ravb_rx(ndev, "a, q); + + /* Processing TX Descriptor Ring */ + spin_lock_irqsave(&priv->lock, flags); +@@ -1316,6 +1307,18 @@ static int ravb_poll(struct napi_struct *napi, int budget) + netif_wake_subqueue(ndev, q); + spin_unlock_irqrestore(&priv->lock, flags); + ++ /* Receive error message handling */ ++ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; ++ if (info->nc_queues) ++ priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; ++ if (priv->rx_over_errors != ndev->stats.rx_over_errors) ++ ndev->stats.rx_over_errors = priv->rx_over_errors; ++ if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) ++ ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; ++ ++ if (!unmask) ++ goto out; ++ + napi_complete(napi); + + /* Re-enable RX/TX interrupts */ +@@ -1329,14 +1332,6 @@ static int ravb_poll(struct napi_struct *napi, int budget) + } + spin_unlock_irqrestore(&priv->lock, flags); + +- /* Receive error message handling */ +- priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors; +- if (info->nc_queues) +- priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors; +- if (priv->rx_over_errors != ndev->stats.rx_over_errors) +- ndev->stats.rx_over_errors = priv->rx_over_errors; +- if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) +- ndev->stats.rx_fifo_errors = priv->rx_fifo_errors; + out: + return budget - quota; + } +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +index c6ff1fa0e04d8..683c34e609638 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +@@ -92,19 +92,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw, + u32 prio, u32 queue) + { + void __iomem *ioaddr = hw->pcsr; +- u32 base_register; +- u32 value; ++ u32 clear_mask = 0; ++ u32 ctrl2, ctrl3; ++ int i; + +- base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3; +- if (queue >= 4) +- queue -= 4; ++ ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2); ++ ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3); + +- value = readl(ioaddr + base_register); ++ /* The software must ensure that the same priority ++ * is not mapped to multiple Rx queues ++ */ ++ for (i = 0; i < 4; i++) ++ clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) & ++ GMAC_RXQCTRL_PSRQX_MASK(i)); ++ ++ ctrl2 &= ~clear_mask; ++ ctrl3 &= ~clear_mask; ++ ++ /* First assign new priorities to a queue, then ++ * clear them from others queues ++ */ ++ if (queue < 4) { ++ ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & ++ GMAC_RXQCTRL_PSRQX_MASK(queue); + +- value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue); +- value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & ++ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); ++ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); ++ } else { ++ queue -= 4; ++ ++ ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) & + GMAC_RXQCTRL_PSRQX_MASK(queue); +- writel(value, ioaddr + base_register); ++ ++ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3); ++ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2); ++ } + } + + static void dwmac4_tx_queue_priority(struct mac_device_info *hw, +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +index b5509f244ecd1..24c53b7255a2e 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +@@ -105,17 +105,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio, + u32 queue) + { + void __iomem *ioaddr = hw->pcsr; +- u32 value, reg; ++ u32 clear_mask = 0; ++ u32 ctrl2, ctrl3; ++ int i; + +- reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3; +- if (queue >= 4) ++ ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2); ++ ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3); ++ ++ /* The software must ensure that the same priority ++ * is not mapped to multiple Rx queues ++ */ ++ for (i = 0; i < 4; i++) ++ clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) & ++ XGMAC_PSRQ(i)); ++ ++ ctrl2 &= ~clear_mask; ++ ctrl3 &= ~clear_mask; ++ ++ /* First assign new priorities to a queue, then ++ * clear them from others queues ++ */ ++ if (queue < 4) { ++ ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & ++ XGMAC_PSRQ(queue); ++ ++ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); ++ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); ++ } else { + queue -= 4; + +- value = readl(ioaddr + reg); +- value &= ~XGMAC_PSRQ(queue); +- value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue); ++ ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) & ++ XGMAC_PSRQ(queue); + +- writel(value, ioaddr + reg); ++ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3); ++ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2); ++ } + } + + static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio, +diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +index e457ac9ae6d88..ad5c213dac077 100644 +--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c ++++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +@@ -20,6 +20,8 @@ + #include "txgbe_phy.h" + #include "txgbe_hw.h" + ++#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw" ++ + static int txgbe_swnodes_register(struct txgbe *txgbe) + { + struct txgbe_nodes *nodes = &txgbe->nodes; +@@ -551,8 +553,8 @@ static int txgbe_clock_register(struct txgbe *txgbe) + char clk_name[32]; + struct clk *clk; + +- snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d", +- pci_dev_id(pdev)); ++ snprintf(clk_name, sizeof(clk_name), "%s.%d", ++ TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev)); + + clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000); + if (IS_ERR(clk)) +@@ -614,7 +616,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe) + + info.parent = &pdev->dev; + info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]); +- info.name = "i2c_designware"; ++ info.name = TXGBE_I2C_CLK_DEV_NAME; + info.id = pci_dev_id(pdev); + + info.res = &DEFINE_RES_IRQ(pdev->irq); +diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c +index f81c4bcd85a2a..cbd98ea4a84af 100644 +--- a/drivers/net/phy/micrel.c ++++ b/drivers/net/phy/micrel.c +@@ -2388,6 +2388,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) + struct hwtstamp_config config; + int txcfg = 0, rxcfg = 0; + int pkt_ts_enable; ++ int tx_mod; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; +@@ -2437,9 +2438,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable); + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable); + +- if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) ++ tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD); ++ if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) { + lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, +- PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); ++ tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); ++ } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) { ++ lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, ++ tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); ++ } + + if (config.rx_filter != HWTSTAMP_FILTER_NONE) + lan8814_config_ts_intr(ptp_priv->phydev, true); +@@ -2497,7 +2503,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts, + } + } + +-static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) ++static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) + { + struct ptp_header *ptp_header; + u32 type; +@@ -2507,7 +2513,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig) + ptp_header = ptp_parse_header(skb, type); + skb_pull_inline(skb, ETH_HLEN); + ++ if (!ptp_header) ++ return false; ++ + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); ++ return true; + } + + static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv, +@@ -2519,7 +2529,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv, + bool ret = false; + u16 skb_sig; + +- lan8814_get_sig_rx(skb, &skb_sig); ++ if (!lan8814_get_sig_rx(skb, &skb_sig)) ++ return ret; + + /* Iterate over all RX timestamps and match it with the received skbs */ + spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags); +@@ -2799,7 +2810,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm) + return 0; + } + +-static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) ++static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) + { + struct ptp_header *ptp_header; + u32 type; +@@ -2807,7 +2818,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig) + type = ptp_classify_raw(skb); + ptp_header = ptp_parse_header(skb, type); + ++ if (!ptp_header) ++ return false; ++ + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); ++ return true; + } + + static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv, +@@ -2821,7 +2836,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv, + + spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags); + skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) { +- lan8814_get_sig_tx(skb, &skb_sig); ++ if (!lan8814_get_sig_tx(skb, &skb_sig)) ++ continue; + + if (memcmp(&skb_sig, &seq_id, sizeof(seq_id))) + continue; +@@ -2875,7 +2891,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv, + + spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags); + skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) { +- lan8814_get_sig_rx(skb, &skb_sig); ++ if (!lan8814_get_sig_rx(skb, &skb_sig)) ++ continue; + + if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id))) + continue; +diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c +index d837c18874161..e0e9b4c53cb02 100644 +--- a/drivers/net/usb/ax88179_178a.c ++++ b/drivers/net/usb/ax88179_178a.c +@@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev) + + if (is_valid_ether_addr(mac)) { + eth_hw_addr_set(dev->net, mac); ++ if (!is_local_ether_addr(mac)) ++ dev->net->addr_assign_type = NET_ADDR_PERM; + } else { + netdev_info(dev->net, "invalid MAC address, using random\n"); + eth_hw_addr_random(dev->net); +diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +index 168eda2132fb8..9dcc1506bd0b0 100644 +--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h ++++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +@@ -278,7 +278,7 @@ static inline void iwl_free_rxb(struct iwl_rx_cmd_buffer *r) + #define IWL_MGMT_TID 15 + #define IWL_FRAME_LIMIT 64 + #define IWL_MAX_RX_HW_QUEUES 16 +-#define IWL_9000_MAX_RX_HW_QUEUES 6 ++#define IWL_9000_MAX_RX_HW_QUEUES 1 + + /** + * enum iwl_wowlan_status - WoWLAN image/device status +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +index aaa9840d0d4c5..ee9d14250a261 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +@@ -352,7 +352,9 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) + ieee80211_hw_set(hw, HAS_RATE_CONTROL); + } + +- if (iwl_mvm_has_new_rx_api(mvm)) ++ /* We want to use the mac80211's reorder buffer for 9000 */ ++ if (iwl_mvm_has_new_rx_api(mvm) && ++ mvm->trans->trans_cfg->device_family > IWL_DEVICE_FAMILY_9000) + ieee80211_hw_set(hw, SUPPORTS_REORDERING_BUFFER); + + if (fw_has_capa(&mvm->fw->ucode_capa, +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c +index 2ecd32bed752f..045c862a8fc4f 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c +@@ -132,14 +132,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm) + if (ret) + return ERR_PTR(ret); + +- if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size)) ++ if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != ++ resp_size)) { ++ iwl_free_resp(&cmd); + return ERR_PTR(-EIO); ++ } + + resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL); ++ iwl_free_resp(&cmd); ++ + if (!resp) + return ERR_PTR(-ENOMEM); + +- iwl_free_resp(&cmd); + return resp; + } + +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +index bac0228b8c866..e9360b555ac93 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +@@ -236,21 +236,13 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, + static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm, + struct napi_struct *napi, + struct sk_buff *skb, int queue, +- struct ieee80211_sta *sta, +- struct ieee80211_link_sta *link_sta) ++ struct ieee80211_sta *sta) + { + if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) { + kfree_skb(skb); + return; + } + +- if (sta && sta->valid_links && link_sta) { +- struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); +- +- rx_status->link_valid = 1; +- rx_status->link_id = link_sta->link_id; +- } +- + ieee80211_rx_napi(mvm->hw, sta, skb, napi); + } + +@@ -636,7 +628,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, + while ((skb = __skb_dequeue(skb_list))) { + iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, + reorder_buf->queue, +- sta, NULL /* FIXME */); ++ sta); + reorder_buf->num_stored--; + } + } +@@ -963,6 +955,9 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, + baid = (reorder & IWL_RX_MPDU_REORDER_BAID_MASK) >> + IWL_RX_MPDU_REORDER_BAID_SHIFT; + ++ if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000) ++ return false; ++ + /* + * This also covers the case of receiving a Block Ack Request + * outside a BA session; we'll pass it to mac80211 and that +@@ -2486,6 +2481,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, + if (IS_ERR(sta)) + sta = NULL; + link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]); ++ ++ if (sta && sta->valid_links && link_sta) { ++ rx_status->link_valid = 1; ++ rx_status->link_id = link_sta->link_id; ++ } + } + } else if (!is_multicast_ether_addr(hdr->addr2)) { + /* +@@ -2621,9 +2621,14 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, + + if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc) && + likely(!iwl_mvm_time_sync_frame(mvm, skb, hdr->addr2)) && +- likely(!iwl_mvm_mei_filter_scan(mvm, skb))) +- iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta, +- link_sta); ++ likely(!iwl_mvm_mei_filter_scan(mvm, skb))) { ++ if (mvm->trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_9000 && ++ (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU) && ++ !(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME)) ++ rx_status->flag |= RX_FLAG_AMSDU_MORE; ++ ++ iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta); ++ } + out: + rcu_read_unlock(); + } +diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c +index 9f43f256db1d0..f0a4783baf1f3 100644 +--- a/drivers/net/wwan/t7xx/t7xx_cldma.c ++++ b/drivers/net/wwan/t7xx/t7xx_cldma.c +@@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno) + { + u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE; + +- return ioread64(hw_info->ap_pdn_base + offset); ++ return ioread64_lo_hi(hw_info->ap_pdn_base + offset); + } + + void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address, +@@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn + + reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 : + hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0; +- iowrite64(address, reg + offset); ++ iowrite64_lo_hi(address, reg + offset); + } + + void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno, +diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c +index cc70360364b7d..554ba4669cc8d 100644 +--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c ++++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c +@@ -139,8 +139,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool + return -ENODEV; + } + +- gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 + +- queue->index * sizeof(u64)); ++ gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base + ++ REG_CLDMA_DL_CURRENT_ADDRL_0 + ++ queue->index * sizeof(u64)); + if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100) + return 0; + +@@ -318,8 +319,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue) + struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info; + + /* Check current processing TGPD, 64-bit address is in a table by Q index */ +- ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + +- queue->index * sizeof(u64)); ++ ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 + ++ queue->index * sizeof(u64)); + if (req->gpd_addr != ul_curr_addr) { + spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags); + dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n", +diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c +index 76da4c15e3de1..f071ec7ff23d5 100644 +--- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c ++++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c +@@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_ + for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) { + offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i; + reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; +- iowrite64(0, reg); ++ iowrite64_lo_hi(0, reg); + } + } + +@@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_ + + reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset; + value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT; +- iowrite64(value, reg); ++ iowrite64_lo_hi(value, reg); + + reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset; + iowrite32(cfg->trsl_id, reg); + + reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset; + value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0); +- iowrite64(value, reg); ++ iowrite64_lo_hi(value, reg); + + /* Ensure ATR is set */ +- ioread64(reg); ++ ioread64_lo_hi(reg); + return 0; + } + +diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c +index ad29f370034e4..8d2aee88526c6 100644 +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) + return NULL; + } + skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE); ++ skb_mark_for_recycle(skb); + + /* Align ip header to a 16 bytes boundary */ + skb_reserve(skb, NET_IP_ALIGN); +diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c +index 3bf27052832f3..4d57a4e341054 100644 +--- a/drivers/of/dynamic.c ++++ b/drivers/of/dynamic.c +@@ -9,6 +9,7 @@ + + #define pr_fmt(fmt) "OF: " fmt + ++#include <linux/device.h> + #include <linux/of.h> + #include <linux/spinlock.h> + #include <linux/slab.h> +@@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs) + { + struct of_changeset_entry *ce, *cen; + ++ /* ++ * When a device is deleted, the device links to/from it are also queued ++ * for deletion. Until these device links are freed, the devices ++ * themselves aren't freed. If the device being deleted is due to an ++ * overlay change, this device might be holding a reference to a device ++ * node that will be freed. So, wait until all already pending device ++ * links are deleted before freeing a device node. This ensures we don't ++ * free any device node that has a non-zero reference count. ++ */ ++ device_link_wait_removal(); ++ + list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node) + __of_changeset_entry_destroy(ce); + } +diff --git a/drivers/of/module.c b/drivers/of/module.c +index 0e8aa974f0f2b..f58e624953a20 100644 +--- a/drivers/of/module.c ++++ b/drivers/of/module.c +@@ -16,6 +16,14 @@ ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len) + ssize_t csize; + ssize_t tsize; + ++ /* ++ * Prevent a kernel oops in vsnprintf() -- it only allows passing a ++ * NULL ptr when the length is also 0. Also filter out the negative ++ * lengths... ++ */ ++ if ((len > 0 && !str) || len < 0) ++ return -EINVAL; ++ + /* Name & Type */ + /* %p eats all alphanum characters, so %c must be used here */ + csize = snprintf(str, len, "of:N%pOFn%c%s", np, 'T', +diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c +index c78a6fd6c57f6..b4efdddb2ad91 100644 +--- a/drivers/perf/riscv_pmu.c ++++ b/drivers/perf/riscv_pmu.c +@@ -313,6 +313,10 @@ static int riscv_pmu_event_init(struct perf_event *event) + u64 event_config = 0; + uint64_t cmask; + ++ /* driver does not support branch stack sampling */ ++ if (has_branch_stack(event)) ++ return -EOPNOTSUPP; ++ + hwc->flags = 0; + mapped_event = rvpmu->event_map(event, &event_config); + if (mapped_event < 0) { +diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c +index cd783290bde5e..1148b4ecabdde 100644 +--- a/drivers/s390/net/qeth_core_main.c ++++ b/drivers/s390/net/qeth_core_main.c +@@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev, + } + } + ++/** ++ * qeth_irq() - qeth interrupt handler ++ * @cdev: ccw device ++ * @intparm: expect pointer to iob ++ * @irb: Interruption Response Block ++ * ++ * In the good path: ++ * corresponding qeth channel is locked with last used iob as active_cmd. ++ * But this function is also called for error interrupts. ++ * ++ * Caller ensures that: ++ * Interrupts are disabled; ccw device lock is held; ++ * ++ */ + static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, + struct irb *irb) + { +@@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, + iob = (struct qeth_cmd_buffer *) (addr_t)intparm; + } + +- qeth_unlock_channel(card, channel); +- + rc = qeth_check_irb_error(card, cdev, irb); + if (rc) { + /* IO was terminated, free its resources. */ ++ qeth_unlock_channel(card, channel); + if (iob) + qeth_cancel_cmd(iob, rc); + return; +@@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, + rc = qeth_get_problem(card, cdev, irb); + if (rc) { + card->read_or_write_problem = 1; ++ qeth_unlock_channel(card, channel); + if (iob) + qeth_cancel_cmd(iob, rc); + qeth_clear_ipacmd_list(card); +@@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, + } + } + ++ if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) { ++ /* channel command hasn't started: retry. ++ * active_cmd is still set to last iob ++ */ ++ QETH_CARD_TEXT(card, 2, "irqcc1"); ++ rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob), ++ (addr_t)iob, 0, 0, iob->timeout); ++ if (rc) { ++ QETH_DBF_MESSAGE(2, ++ "ccw retry on %x failed, rc = %i\n", ++ CARD_DEVID(card), rc); ++ QETH_CARD_TEXT_(card, 2, " err%d", rc); ++ qeth_unlock_channel(card, channel); ++ qeth_cancel_cmd(iob, rc); ++ } ++ return; ++ } ++ ++ qeth_unlock_channel(card, channel); ++ + if (iob) { + /* sanity check: */ + if (irb->scsw.cmd.count > iob->length) { +diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c +index ca2e932dd9b70..f684eb5e04898 100644 +--- a/drivers/scsi/myrb.c ++++ b/drivers/scsi/myrb.c +@@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev, + + name = myrb_devstate_name(ldev_info->state); + if (name) +- ret = snprintf(buf, 32, "%s\n", name); ++ ret = snprintf(buf, 64, "%s\n", name); + else +- ret = snprintf(buf, 32, "Invalid (%02X)\n", ++ ret = snprintf(buf, 64, "Invalid (%02X)\n", + ldev_info->state); + } else { + struct myrb_pdev_state *pdev_info = sdev->hostdata; +@@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev, + else + name = myrb_devstate_name(pdev_info->state); + if (name) +- ret = snprintf(buf, 32, "%s\n", name); ++ ret = snprintf(buf, 64, "%s\n", name); + else +- ret = snprintf(buf, 32, "Invalid (%02X)\n", ++ ret = snprintf(buf, 64, "Invalid (%02X)\n", + pdev_info->state); + } + return ret; +@@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev, + + name = myrb_raidlevel_name(ldev_info->raid_level); + if (!name) +- return snprintf(buf, 32, "Invalid (%02X)\n", ++ return snprintf(buf, 64, "Invalid (%02X)\n", + ldev_info->state); +- return snprintf(buf, 32, "%s\n", name); ++ return snprintf(buf, 64, "%s\n", name); + } +- return snprintf(buf, 32, "Physical Drive\n"); ++ return snprintf(buf, 64, "Physical Drive\n"); + } + static DEVICE_ATTR_RO(raid_level); + +@@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev, + unsigned char status; + + if (sdev->channel < myrb_logical_channel(sdev->host)) +- return snprintf(buf, 32, "physical device - not rebuilding\n"); ++ return snprintf(buf, 64, "physical device - not rebuilding\n"); + + status = myrb_get_rbld_progress(cb, &rbld_buf); + + if (rbld_buf.ldev_num != sdev->id || + status != MYRB_STATUS_SUCCESS) +- return snprintf(buf, 32, "not rebuilding\n"); ++ return snprintf(buf, 64, "not rebuilding\n"); + +- return snprintf(buf, 32, "rebuilding block %u of %u\n", ++ return snprintf(buf, 64, "rebuilding block %u of %u\n", + rbld_buf.ldev_size - rbld_buf.blocks_left, + rbld_buf.ldev_size); + } +diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c +index a1eec65a9713f..e824be9d9bbb9 100644 +--- a/drivers/scsi/myrs.c ++++ b/drivers/scsi/myrs.c +@@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev, + + name = myrs_devstate_name(ldev_info->dev_state); + if (name) +- ret = snprintf(buf, 32, "%s\n", name); ++ ret = snprintf(buf, 64, "%s\n", name); + else +- ret = snprintf(buf, 32, "Invalid (%02X)\n", ++ ret = snprintf(buf, 64, "Invalid (%02X)\n", + ldev_info->dev_state); + } else { + struct myrs_pdev_info *pdev_info; +@@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev, + pdev_info = sdev->hostdata; + name = myrs_devstate_name(pdev_info->dev_state); + if (name) +- ret = snprintf(buf, 32, "%s\n", name); ++ ret = snprintf(buf, 64, "%s\n", name); + else +- ret = snprintf(buf, 32, "Invalid (%02X)\n", ++ ret = snprintf(buf, 64, "Invalid (%02X)\n", + pdev_info->dev_state); + } + return ret; +@@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev, + ldev_info = sdev->hostdata; + name = myrs_raid_level_name(ldev_info->raid_level); + if (!name) +- return snprintf(buf, 32, "Invalid (%02X)\n", ++ return snprintf(buf, 64, "Invalid (%02X)\n", + ldev_info->dev_state); + + } else + name = myrs_raid_level_name(MYRS_RAID_PHYSICAL); + +- return snprintf(buf, 32, "%s\n", name); ++ return snprintf(buf, 64, "%s\n", name); + } + static DEVICE_ATTR_RO(raid_level); + +@@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev, + unsigned char status; + + if (sdev->channel < cs->ctlr_info->physchan_present) +- return snprintf(buf, 32, "physical device - not rebuilding\n"); ++ return snprintf(buf, 64, "physical device - not rebuilding\n"); + + ldev_info = sdev->hostdata; + ldev_num = ldev_info->ldev_num; +@@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev, + return -EIO; + } + if (ldev_info->rbld_active) { +- return snprintf(buf, 32, "rebuilding block %zu of %zu\n", ++ return snprintf(buf, 64, "rebuilding block %zu of %zu\n", + (size_t)ldev_info->rbld_lba, + (size_t)ldev_info->cfg_devsize); + } else +- return snprintf(buf, 32, "not rebuilding\n"); ++ return snprintf(buf, 64, "not rebuilding\n"); + } + + static ssize_t rebuild_store(struct device *dev, +@@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev, + unsigned short ldev_num; + + if (sdev->channel < cs->ctlr_info->physchan_present) +- return snprintf(buf, 32, "physical device - not checking\n"); ++ return snprintf(buf, 64, "physical device - not checking\n"); + + ldev_info = sdev->hostdata; + if (!ldev_info) +@@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev, + ldev_num = ldev_info->ldev_num; + myrs_get_ldev_info(cs, ldev_num, ldev_info); + if (ldev_info->cc_active) +- return snprintf(buf, 32, "checking block %zu of %zu\n", ++ return snprintf(buf, 64, "checking block %zu of %zu\n", + (size_t)ldev_info->cc_lba, + (size_t)ldev_info->cfg_devsize); + else +- return snprintf(buf, 32, "not checking\n"); ++ return snprintf(buf, 64, "not checking\n"); + } + + static ssize_t consistency_check_store(struct device *dev, +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index e80c33cdad2b9..c62f677084b4c 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -3754,7 +3754,7 @@ static int sd_probe(struct device *dev) + + error = device_add_disk(dev, gd, NULL); + if (error) { +- put_device(&sdkp->disk_dev); ++ device_unregister(&sdkp->disk_dev); + put_disk(gd); + goto out; + } +diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c +index 3638e974f5d49..06bf58b7e5d72 100644 +--- a/drivers/spi/spi-pci1xxxx.c ++++ b/drivers/spi/spi-pci1xxxx.c +@@ -275,6 +275,8 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id * + spi_bus->spi_int[iter] = devm_kzalloc(&pdev->dev, + sizeof(struct pci1xxxx_spi_internal), + GFP_KERNEL); ++ if (!spi_bus->spi_int[iter]) ++ return -ENOMEM; + spi_sub_ptr = spi_bus->spi_int[iter]; + spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller)); + if (!spi_sub_ptr->spi_host) +diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c +index 0e48ffd499b9f..652eadbefe24c 100644 +--- a/drivers/spi/spi-s3c64xx.c ++++ b/drivers/spi/spi-s3c64xx.c +@@ -3,19 +3,20 @@ + // Copyright (c) 2009 Samsung Electronics Co., Ltd. + // Jaswinder Singh <jassi.brar@samsung.com> + +-#include <linux/init.h> +-#include <linux/module.h> +-#include <linux/interrupt.h> +-#include <linux/delay.h> ++#include <linux/bitops.h> ++#include <linux/bits.h> + #include <linux/clk.h> ++#include <linux/delay.h> + #include <linux/dma-mapping.h> + #include <linux/dmaengine.h> ++#include <linux/init.h> ++#include <linux/interrupt.h> ++#include <linux/module.h> ++#include <linux/of.h> ++#include <linux/platform_data/spi-s3c64xx.h> + #include <linux/platform_device.h> + #include <linux/pm_runtime.h> + #include <linux/spi/spi.h> +-#include <linux/of.h> +- +-#include <linux/platform_data/spi-s3c64xx.h> + + #define MAX_SPI_PORTS 12 + #define S3C64XX_SPI_QUIRK_CS_AUTO (1 << 1) +@@ -76,6 +77,7 @@ + #define S3C64XX_SPI_INT_RX_FIFORDY_EN (1<<1) + #define S3C64XX_SPI_INT_TX_FIFORDY_EN (1<<0) + ++#define S3C64XX_SPI_ST_TX_FIFO_LVL_SHIFT 6 + #define S3C64XX_SPI_ST_RX_OVERRUN_ERR (1<<5) + #define S3C64XX_SPI_ST_RX_UNDERRUN_ERR (1<<4) + #define S3C64XX_SPI_ST_TX_OVERRUN_ERR (1<<3) +@@ -106,9 +108,11 @@ + #define FIFO_LVL_MASK(i) ((i)->port_conf->fifo_lvl_mask[i->port_id]) + #define S3C64XX_SPI_ST_TX_DONE(v, i) (((v) & \ + (1 << (i)->port_conf->tx_st_done)) ? 1 : 0) +-#define TX_FIFO_LVL(v, i) (((v) >> 6) & FIFO_LVL_MASK(i)) +-#define RX_FIFO_LVL(v, i) (((v) >> (i)->port_conf->rx_lvl_offset) & \ +- FIFO_LVL_MASK(i)) ++#define TX_FIFO_LVL(v, sdd) (((v) & (sdd)->tx_fifomask) >> \ ++ __ffs((sdd)->tx_fifomask)) ++#define RX_FIFO_LVL(v, sdd) (((v) & (sdd)->rx_fifomask) >> \ ++ __ffs((sdd)->rx_fifomask)) ++#define FIFO_DEPTH(i) ((FIFO_LVL_MASK(i) >> 1) + 1) + + #define S3C64XX_SPI_MAX_TRAILCNT 0x3ff + #define S3C64XX_SPI_TRAILCNT_OFF 19 +@@ -133,6 +137,10 @@ struct s3c64xx_spi_dma_data { + * struct s3c64xx_spi_port_config - SPI Controller hardware info + * @fifo_lvl_mask: Bit-mask for {TX|RX}_FIFO_LVL bits in SPI_STATUS register. + * @rx_lvl_offset: Bit offset of RX_FIFO_LVL bits in SPI_STATUS regiter. ++ * @rx_fifomask: SPI_STATUS.RX_FIFO_LVL mask. Shifted mask defining the field's ++ * length and position. ++ * @tx_fifomask: SPI_STATUS.TX_FIFO_LVL mask. Shifted mask defining the field's ++ * length and position. + * @tx_st_done: Bit offset of TX_DONE bit in SPI_STATUS regiter. + * @clk_div: Internal clock divider + * @quirks: Bitmask of known quirks +@@ -150,6 +158,8 @@ struct s3c64xx_spi_dma_data { + struct s3c64xx_spi_port_config { + int fifo_lvl_mask[MAX_SPI_PORTS]; + int rx_lvl_offset; ++ u32 rx_fifomask; ++ u32 tx_fifomask; + int tx_st_done; + int quirks; + int clk_div; +@@ -179,6 +189,11 @@ struct s3c64xx_spi_port_config { + * @tx_dma: Local transmit DMA data (e.g. chan and direction) + * @port_conf: Local SPI port configuartion data + * @port_id: Port identification number ++ * @fifo_depth: depth of the FIFO. ++ * @rx_fifomask: SPI_STATUS.RX_FIFO_LVL mask. Shifted mask defining the field's ++ * length and position. ++ * @tx_fifomask: SPI_STATUS.TX_FIFO_LVL mask. Shifted mask defining the field's ++ * length and position. + */ + struct s3c64xx_spi_driver_data { + void __iomem *regs; +@@ -198,6 +213,9 @@ struct s3c64xx_spi_driver_data { + struct s3c64xx_spi_dma_data tx_dma; + const struct s3c64xx_spi_port_config *port_conf; + unsigned int port_id; ++ unsigned int fifo_depth; ++ u32 rx_fifomask; ++ u32 tx_fifomask; + }; + + static void s3c64xx_flush_fifo(struct s3c64xx_spi_driver_data *sdd) +@@ -405,12 +423,10 @@ static bool s3c64xx_spi_can_dma(struct spi_controller *host, + { + struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host); + +- if (sdd->rx_dma.ch && sdd->tx_dma.ch) { +- return xfer->len > (FIFO_LVL_MASK(sdd) >> 1) + 1; +- } else { +- return false; +- } ++ if (sdd->rx_dma.ch && sdd->tx_dma.ch) ++ return xfer->len >= sdd->fifo_depth; + ++ return false; + } + + static int s3c64xx_enable_datapath(struct s3c64xx_spi_driver_data *sdd, +@@ -495,9 +511,7 @@ static u32 s3c64xx_spi_wait_for_timeout(struct s3c64xx_spi_driver_data *sdd, + void __iomem *regs = sdd->regs; + unsigned long val = 1; + u32 status; +- +- /* max fifo depth available */ +- u32 max_fifo = (FIFO_LVL_MASK(sdd) >> 1) + 1; ++ u32 max_fifo = sdd->fifo_depth; + + if (timeout_ms) + val = msecs_to_loops(timeout_ms); +@@ -604,7 +618,7 @@ static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd, + * For any size less than the fifo size the below code is + * executed atleast once. + */ +- loops = xfer->len / ((FIFO_LVL_MASK(sdd) >> 1) + 1); ++ loops = xfer->len / sdd->fifo_depth; + buf = xfer->rx_buf; + do { + /* wait for data to be received in the fifo */ +@@ -741,7 +755,7 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host, + struct spi_transfer *xfer) + { + struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host); +- const unsigned int fifo_len = (FIFO_LVL_MASK(sdd) >> 1) + 1; ++ const unsigned int fifo_len = sdd->fifo_depth; + const void *tx_buf = NULL; + void *rx_buf = NULL; + int target_len = 0, origin_len = 0; +@@ -769,10 +783,9 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host, + return status; + } + +- if (!is_polling(sdd) && (xfer->len > fifo_len) && ++ if (!is_polling(sdd) && xfer->len >= fifo_len && + sdd->rx_dma.ch && sdd->tx_dma.ch) { + use_dma = 1; +- + } else if (xfer->len >= fifo_len) { + tx_buf = xfer->tx_buf; + rx_buf = xfer->rx_buf; +@@ -1146,6 +1159,23 @@ static inline const struct s3c64xx_spi_port_config *s3c64xx_spi_get_port_config( + return (const struct s3c64xx_spi_port_config *)platform_get_device_id(pdev)->driver_data; + } + ++static void s3c64xx_spi_set_fifomask(struct s3c64xx_spi_driver_data *sdd) ++{ ++ const struct s3c64xx_spi_port_config *port_conf = sdd->port_conf; ++ ++ if (port_conf->rx_fifomask) ++ sdd->rx_fifomask = port_conf->rx_fifomask; ++ else ++ sdd->rx_fifomask = FIFO_LVL_MASK(sdd) << ++ port_conf->rx_lvl_offset; ++ ++ if (port_conf->tx_fifomask) ++ sdd->tx_fifomask = port_conf->tx_fifomask; ++ else ++ sdd->tx_fifomask = FIFO_LVL_MASK(sdd) << ++ S3C64XX_SPI_ST_TX_FIFO_LVL_SHIFT; ++} ++ + static int s3c64xx_spi_probe(struct platform_device *pdev) + { + struct resource *mem_res; +@@ -1191,6 +1221,10 @@ static int s3c64xx_spi_probe(struct platform_device *pdev) + sdd->port_id = pdev->id; + } + ++ sdd->fifo_depth = FIFO_DEPTH(sdd); ++ ++ s3c64xx_spi_set_fifomask(sdd); ++ + sdd->cur_bpw = 8; + + sdd->tx_dma.direction = DMA_MEM_TO_DEV; +@@ -1280,7 +1314,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev) + dev_dbg(&pdev->dev, "Samsung SoC SPI Driver loaded for Bus SPI-%d with %d Targets attached\n", + sdd->port_id, host->num_chipselect); + dev_dbg(&pdev->dev, "\tIOmem=[%pR]\tFIFO %dbytes\n", +- mem_res, (FIFO_LVL_MASK(sdd) >> 1) + 1); ++ mem_res, sdd->fifo_depth); + + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); +diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c +index 4853141cd10c8..894622b6556a6 100644 +--- a/drivers/usb/typec/ucsi/ucsi_glink.c ++++ b/drivers/usb/typec/ucsi/ucsi_glink.c +@@ -254,6 +254,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work) + static void pmic_glink_ucsi_register(struct work_struct *work) + { + struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work); ++ int orientation; ++ int i; ++ ++ for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) { ++ if (!ucsi->port_orientation[i]) ++ continue; ++ orientation = gpiod_get_value(ucsi->port_orientation[i]); ++ ++ if (orientation >= 0) { ++ typec_switch_set(ucsi->port_switch[i], ++ orientation ? TYPEC_ORIENTATION_REVERSE ++ : TYPEC_ORIENTATION_NORMAL); ++ } ++ } + + ucsi_register(ucsi->ucsi); + } +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index fc8eb8d86ca25..5acb2cb79d4bf 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -2410,12 +2410,65 @@ int try_release_extent_mapping(struct page *page, gfp_t mask) + return try_release_extent_state(tree, page, mask); + } + ++struct btrfs_fiemap_entry { ++ u64 offset; ++ u64 phys; ++ u64 len; ++ u32 flags; ++}; ++ ++/* ++ * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file ++ * range from the inode's io tree, unlock the subvolume tree search path, flush ++ * the fiemap cache and relock the file range and research the subvolume tree. ++ * The value here is something negative that can't be confused with a valid ++ * errno value and different from 1 because that's also a return value from ++ * fiemap_fill_next_extent() and also it's often used to mean some btree search ++ * did not find a key, so make it some distinct negative value. ++ */ ++#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1)) ++ + /* +- * To cache previous fiemap extent ++ * Used to: + * +- * Will be used for merging fiemap extent ++ * - Cache the next entry to be emitted to the fiemap buffer, so that we can ++ * merge extents that are contiguous and can be grouped as a single one; ++ * ++ * - Store extents ready to be written to the fiemap buffer in an intermediary ++ * buffer. This intermediary buffer is to ensure that in case the fiemap ++ * buffer is memory mapped to the fiemap target file, we don't deadlock ++ * during btrfs_page_mkwrite(). This is because during fiemap we are locking ++ * an extent range in order to prevent races with delalloc flushing and ++ * ordered extent completion, which is needed in order to reliably detect ++ * delalloc in holes and prealloc extents. And this can lead to a deadlock ++ * if the fiemap buffer is memory mapped to the file we are running fiemap ++ * against (a silly, useless in practice scenario, but possible) because ++ * btrfs_page_mkwrite() will try to lock the same extent range. + */ + struct fiemap_cache { ++ /* An array of ready fiemap entries. */ ++ struct btrfs_fiemap_entry *entries; ++ /* Number of entries in the entries array. */ ++ int entries_size; ++ /* Index of the next entry in the entries array to write to. */ ++ int entries_pos; ++ /* ++ * Once the entries array is full, this indicates what's the offset for ++ * the next file extent item we must search for in the inode's subvolume ++ * tree after unlocking the extent range in the inode's io tree and ++ * releasing the search path. ++ */ ++ u64 next_search_offset; ++ /* ++ * This matches struct fiemap_extent_info::fi_mapped_extents, we use it ++ * to count ourselves emitted extents and stop instead of relying on ++ * fiemap_fill_next_extent() because we buffer ready fiemap entries at ++ * the @entries array, and we want to stop as soon as we hit the max ++ * amount of extents to map, not just to save time but also to make the ++ * logic at extent_fiemap() simpler. ++ */ ++ unsigned int extents_mapped; ++ /* Fields for the cached extent (unsubmitted, not ready, extent). */ + u64 offset; + u64 phys; + u64 len; +@@ -2423,6 +2476,28 @@ struct fiemap_cache { + bool cached; + }; + ++static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo, ++ struct fiemap_cache *cache) ++{ ++ for (int i = 0; i < cache->entries_pos; i++) { ++ struct btrfs_fiemap_entry *entry = &cache->entries[i]; ++ int ret; ++ ++ ret = fiemap_fill_next_extent(fieinfo, entry->offset, ++ entry->phys, entry->len, ++ entry->flags); ++ /* ++ * Ignore 1 (reached max entries) because we keep track of that ++ * ourselves in emit_fiemap_extent(). ++ */ ++ if (ret < 0) ++ return ret; ++ } ++ cache->entries_pos = 0; ++ ++ return 0; ++} ++ + /* + * Helper to submit fiemap extent. + * +@@ -2437,8 +2512,8 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, + struct fiemap_cache *cache, + u64 offset, u64 phys, u64 len, u32 flags) + { ++ struct btrfs_fiemap_entry *entry; + u64 cache_end; +- int ret = 0; + + /* Set at the end of extent_fiemap(). */ + ASSERT((flags & FIEMAP_EXTENT_LAST) == 0); +@@ -2451,7 +2526,9 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, + * find an extent that starts at an offset behind the end offset of the + * previous extent we processed. This happens if fiemap is called + * without FIEMAP_FLAG_SYNC and there are ordered extents completing +- * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()). ++ * after we had to unlock the file range, release the search path, emit ++ * the fiemap extents stored in the buffer (cache->entries array) and ++ * the lock the remainder of the range and re-search the btree. + * + * For example we are in leaf X processing its last item, which is the + * file extent item for file range [512K, 1M[, and after +@@ -2564,11 +2641,35 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, + + emit: + /* Not mergeable, need to submit cached one */ +- ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, +- cache->len, cache->flags); +- cache->cached = false; +- if (ret) +- return ret; ++ ++ if (cache->entries_pos == cache->entries_size) { ++ /* ++ * We will need to research for the end offset of the last ++ * stored extent and not from the current offset, because after ++ * unlocking the range and releasing the path, if there's a hole ++ * between that end offset and this current offset, a new extent ++ * may have been inserted due to a new write, so we don't want ++ * to miss it. ++ */ ++ entry = &cache->entries[cache->entries_size - 1]; ++ cache->next_search_offset = entry->offset + entry->len; ++ cache->cached = false; ++ ++ return BTRFS_FIEMAP_FLUSH_CACHE; ++ } ++ ++ entry = &cache->entries[cache->entries_pos]; ++ entry->offset = cache->offset; ++ entry->phys = cache->phys; ++ entry->len = cache->len; ++ entry->flags = cache->flags; ++ cache->entries_pos++; ++ cache->extents_mapped++; ++ ++ if (cache->extents_mapped == fieinfo->fi_extents_max) { ++ cache->cached = false; ++ return 1; ++ } + assign: + cache->cached = true; + cache->offset = offset; +@@ -2694,8 +2795,8 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path + * neighbour leaf). + * We also need the private clone because holding a read lock on an + * extent buffer of the subvolume's b+tree will make lockdep unhappy +- * when we call fiemap_fill_next_extent(), because that may cause a page +- * fault when filling the user space buffer with fiemap data. ++ * when we check if extents are shared, as backref walking may need to ++ * lock the same leaf we are processing. + */ + clone = btrfs_clone_extent_buffer(path->nodes[0]); + if (!clone) +@@ -2735,34 +2836,16 @@ static int fiemap_process_hole(struct btrfs_inode *inode, + * it beyond i_size. + */ + while (cur_offset < end && cur_offset < i_size) { +- struct extent_state *cached_state = NULL; + u64 delalloc_start; + u64 delalloc_end; + u64 prealloc_start; +- u64 lockstart; +- u64 lockend; + u64 prealloc_len = 0; + bool delalloc; + +- lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize); +- lockend = round_up(end, inode->root->fs_info->sectorsize); +- +- /* +- * We are only locking for the delalloc range because that's the +- * only thing that can change here. With fiemap we have a lock +- * on the inode, so no buffered or direct writes can happen. +- * +- * However mmaps and normal page writeback will cause this to +- * change arbitrarily. We have to lock the extent lock here to +- * make sure that nobody messes with the tree while we're doing +- * btrfs_find_delalloc_in_range. +- */ +- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, + delalloc_cached_state, + &delalloc_start, + &delalloc_end); +- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + if (!delalloc) + break; + +@@ -2930,6 +3013,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) + { + const u64 ino = btrfs_ino(inode); ++ struct extent_state *cached_state = NULL; + struct extent_state *delalloc_cached_state = NULL; + struct btrfs_path *path; + struct fiemap_cache cache = { 0 }; +@@ -2942,18 +3026,23 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, + bool stopped = false; + int ret; + ++ cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry); ++ cache.entries = kmalloc_array(cache.entries_size, ++ sizeof(struct btrfs_fiemap_entry), ++ GFP_KERNEL); + backref_ctx = btrfs_alloc_backref_share_check_ctx(); + path = btrfs_alloc_path(); +- if (!backref_ctx || !path) { ++ if (!cache.entries || !backref_ctx || !path) { + ret = -ENOMEM; + goto out; + } + ++restart: + range_start = round_down(start, sectorsize); + range_end = round_up(start + len, sectorsize); + prev_extent_end = range_start; + +- btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); ++ lock_extent(&inode->io_tree, range_start, range_end, &cached_state); + + ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); + if (ret < 0) +@@ -3079,7 +3168,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, + if (ret < 0) { + goto out_unlock; + } else if (ret > 0) { +- /* fiemap_fill_next_extent() told us to stop. */ ++ /* emit_fiemap_extent() told us to stop. */ + stopped = true; + break; + } +@@ -3102,16 +3191,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, + } + + check_eof_delalloc: +- /* +- * Release (and free) the path before emitting any final entries to +- * fiemap_fill_next_extent() to keep lockdep happy. This is because +- * once we find no more file extent items exist, we may have a +- * non-cloned leaf, and fiemap_fill_next_extent() can trigger page +- * faults when copying data to the user space buffer. +- */ +- btrfs_free_path(path); +- path = NULL; +- + if (!stopped && prev_extent_end < range_end) { + ret = fiemap_process_hole(inode, fieinfo, &cache, + &delalloc_cached_state, backref_ctx, +@@ -3125,28 +3204,16 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, + const u64 i_size = i_size_read(&inode->vfs_inode); + + if (prev_extent_end < i_size) { +- struct extent_state *cached_state = NULL; + u64 delalloc_start; + u64 delalloc_end; +- u64 lockstart; +- u64 lockend; + bool delalloc; + +- lockstart = round_down(prev_extent_end, sectorsize); +- lockend = round_up(i_size, sectorsize); +- +- /* +- * See the comment in fiemap_process_hole as to why +- * we're doing the locking here. +- */ +- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + delalloc = btrfs_find_delalloc_in_range(inode, + prev_extent_end, + i_size - 1, + &delalloc_cached_state, + &delalloc_start, + &delalloc_end); +- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + if (!delalloc) + cache.flags |= FIEMAP_EXTENT_LAST; + } else { +@@ -3154,12 +3221,39 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, + } + } + +- ret = emit_last_fiemap_cache(fieinfo, &cache); +- + out_unlock: +- btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); ++ unlock_extent(&inode->io_tree, range_start, range_end, &cached_state); ++ ++ if (ret == BTRFS_FIEMAP_FLUSH_CACHE) { ++ btrfs_release_path(path); ++ ret = flush_fiemap_cache(fieinfo, &cache); ++ if (ret) ++ goto out; ++ len -= cache.next_search_offset - start; ++ start = cache.next_search_offset; ++ goto restart; ++ } else if (ret < 0) { ++ goto out; ++ } ++ ++ /* ++ * Must free the path before emitting to the fiemap buffer because we ++ * may have a non-cloned leaf and if the fiemap buffer is memory mapped ++ * to a file, a write into it (through btrfs_page_mkwrite()) may trigger ++ * waiting for an ordered extent that in order to complete needs to ++ * modify that leaf, therefore leading to a deadlock. ++ */ ++ btrfs_free_path(path); ++ path = NULL; ++ ++ ret = flush_fiemap_cache(fieinfo, &cache); ++ if (ret) ++ goto out; ++ ++ ret = emit_last_fiemap_cache(fieinfo, &cache); + out: + free_extent_state(delalloc_cached_state); ++ kfree(cache.entries); + btrfs_free_backref_share_ctx(backref_ctx); + btrfs_free_path(path); + return ret; +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index ca79c2b8adc46..1ac14223ffb50 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -7813,6 +7813,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, + static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) + { ++ struct btrfs_inode *btrfs_inode = BTRFS_I(inode); + int ret; + + ret = fiemap_prep(inode, fieinfo, start, &len, 0); +@@ -7838,7 +7839,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + return ret; + } + +- return extent_fiemap(BTRFS_I(inode), fieinfo, start, len); ++ btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED); ++ ++ /* ++ * We did an initial flush to avoid holding the inode's lock while ++ * triggering writeback and waiting for the completion of IO and ordered ++ * extents. Now after we locked the inode we do it again, because it's ++ * possible a new write may have happened in between those two steps. ++ */ ++ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { ++ ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX); ++ if (ret) { ++ btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); ++ return ret; ++ } ++ } ++ ++ ret = extent_fiemap(btrfs_inode, fieinfo, start, len); ++ btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); ++ ++ return ret; + } + + static int btrfs_writepages(struct address_space *mapping, +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index 522596060252f..c7e52d980cd75 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -2886,12 +2886,9 @@ static void + nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) + { + struct nfs4_client *clp = cb->cb_clp; +- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + +- spin_lock(&nn->client_lock); + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); +- put_client_renew_locked(clp); +- spin_unlock(&nn->client_lock); ++ drop_client(clp); + } + + static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { +@@ -6273,7 +6270,7 @@ deleg_reaper(struct nfsd_net *nn) + list_add(&clp->cl_ra_cblist, &cblist); + + /* release in nfsd4_cb_recall_any_release */ +- atomic_inc(&clp->cl_rpc_users); ++ kref_get(&clp->cl_nfsdfs.cl_ref); + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + clp->cl_ra_time = ktime_get_boottime_seconds(); + } +diff --git a/fs/pipe.c b/fs/pipe.c +index a234035cc375d..ba4376341ddd2 100644 +--- a/fs/pipe.c ++++ b/fs/pipe.c +@@ -425,6 +425,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) + bool was_empty = false; + bool wake_next_writer = false; + ++ /* ++ * Reject writing to watch queue pipes before the point where we lock ++ * the pipe. ++ * Otherwise, lockdep would be unhappy if the caller already has another ++ * pipe locked. ++ * If we had to support locking a normal pipe and a notification pipe at ++ * the same time, we could set up lockdep annotations for that, but ++ * since we don't actually need that, it's simpler to just bail here. ++ */ ++ if (pipe_has_watch_queue(pipe)) ++ return -EXDEV; ++ + /* Null write succeeds. */ + if (unlikely(total_len == 0)) + return 0; +@@ -437,11 +449,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) + goto out; + } + +- if (pipe_has_watch_queue(pipe)) { +- ret = -EXDEV; +- goto out; +- } +- + /* + * If it wasn't empty we try to merge new data into + * the last buffer. +diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c +index 15e1215bc4e5a..1a9e705d65002 100644 +--- a/fs/smb/client/cached_dir.c ++++ b/fs/smb/client/cached_dir.c +@@ -401,6 +401,7 @@ smb2_close_cached_fid(struct kref *ref) + { + struct cached_fid *cfid = container_of(ref, struct cached_fid, + refcount); ++ int rc; + + spin_lock(&cfid->cfids->cfid_list_lock); + if (cfid->on_list) { +@@ -414,9 +415,10 @@ smb2_close_cached_fid(struct kref *ref) + cfid->dentry = NULL; + + if (cfid->is_open) { +- SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, ++ rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, + cfid->fid.volatile_fid); +- atomic_dec(&cfid->tcon->num_remote_opens); ++ if (rc != -EBUSY && rc != -EAGAIN) ++ atomic_dec(&cfid->tcon->num_remote_opens); + } + + free_cached_dir(cfid); +diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c +index 7206167f4184a..6c85edb8635d0 100644 +--- a/fs/smb/client/cifs_debug.c ++++ b/fs/smb/client/cifs_debug.c +@@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { ++ if (cifs_ses_exiting(ses)) ++ continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + spin_lock(&tcon->open_file_lock); + list_for_each_entry(cfile, &tcon->openFileList, tlist) { +@@ -654,6 +656,8 @@ static ssize_t cifs_stats_proc_write(struct file *file, + } + #endif /* CONFIG_CIFS_STATS2 */ + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { ++ if (cifs_ses_exiting(ses)) ++ continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + atomic_set(&tcon->num_smbs_sent, 0); + spin_lock(&tcon->stat_lock); +@@ -732,6 +736,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) + } + #endif /* STATS2 */ + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { ++ if (cifs_ses_exiting(ses)) ++ continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + i++; + seq_printf(m, "\n%d) %s", i, tcon->tree_name); +diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c +index 2131638f26d0b..fcb93a66e47cb 100644 +--- a/fs/smb/client/cifsfs.c ++++ b/fs/smb/client/cifsfs.c +@@ -159,6 +159,7 @@ struct workqueue_struct *decrypt_wq; + struct workqueue_struct *fileinfo_put_wq; + struct workqueue_struct *cifsoplockd_wq; + struct workqueue_struct *deferredclose_wq; ++struct workqueue_struct *serverclose_wq; + __u32 cifs_lock_secret; + + /* +@@ -1877,6 +1878,13 @@ init_cifs(void) + goto out_destroy_cifsoplockd_wq; + } + ++ serverclose_wq = alloc_workqueue("serverclose", ++ WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); ++ if (!serverclose_wq) { ++ rc = -ENOMEM; ++ goto out_destroy_serverclose_wq; ++ } ++ + rc = cifs_init_inodecache(); + if (rc) + goto out_destroy_deferredclose_wq; +@@ -1951,6 +1959,8 @@ init_cifs(void) + destroy_workqueue(decrypt_wq); + out_destroy_cifsiod_wq: + destroy_workqueue(cifsiod_wq); ++out_destroy_serverclose_wq: ++ destroy_workqueue(serverclose_wq); + out_clean_proc: + cifs_proc_clean(); + return rc; +@@ -1980,6 +1990,7 @@ exit_cifs(void) + destroy_workqueue(cifsoplockd_wq); + destroy_workqueue(decrypt_wq); + destroy_workqueue(fileinfo_put_wq); ++ destroy_workqueue(serverclose_wq); + destroy_workqueue(cifsiod_wq); + cifs_proc_clean(); + } +diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h +index 35a12413bbee6..a878b1e5aa313 100644 +--- a/fs/smb/client/cifsglob.h ++++ b/fs/smb/client/cifsglob.h +@@ -425,10 +425,10 @@ struct smb_version_operations { + /* set fid protocol-specific info */ + void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); + /* close a file */ +- void (*close)(const unsigned int, struct cifs_tcon *, ++ int (*close)(const unsigned int, struct cifs_tcon *, + struct cifs_fid *); + /* close a file, returning file attributes and timestamps */ +- void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, ++ int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, + struct cifsFileInfo *pfile_info); + /* send a flush request to the server */ + int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); +@@ -1408,6 +1408,7 @@ struct cifsFileInfo { + bool invalidHandle:1; /* file closed via session abend */ + bool swapfile:1; + bool oplock_break_cancelled:1; ++ bool offload:1; /* offload final part of _put to a wq */ + unsigned int oplock_epoch; /* epoch from the lease break */ + __u32 oplock_level; /* oplock/lease level from the lease break */ + int count; +@@ -1416,6 +1417,7 @@ struct cifsFileInfo { + struct cifs_search_info srch_inf; + struct work_struct oplock_break; /* work for oplock breaks */ + struct work_struct put; /* work for the final part of _put */ ++ struct work_struct serverclose; /* work for serverclose */ + struct delayed_work deferred; + bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ + char *symlink_target; +@@ -2073,6 +2075,7 @@ extern struct workqueue_struct *decrypt_wq; + extern struct workqueue_struct *fileinfo_put_wq; + extern struct workqueue_struct *cifsoplockd_wq; + extern struct workqueue_struct *deferredclose_wq; ++extern struct workqueue_struct *serverclose_wq; + extern __u32 cifs_lock_secret; + + extern mempool_t *cifs_mid_poolp; +@@ -2278,4 +2281,14 @@ struct smb2_compound_vars { + struct smb2_file_link_info link_info; + }; + ++static inline bool cifs_ses_exiting(struct cifs_ses *ses) ++{ ++ bool ret; ++ ++ spin_lock(&ses->ses_lock); ++ ret = ses->ses_status == SES_EXITING; ++ spin_unlock(&ses->ses_lock); ++ return ret; ++} ++ + #endif /* _CIFS_GLOB_H */ +diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c +index 4c958129181d3..97776dd12b6b8 100644 +--- a/fs/smb/client/connect.c ++++ b/fs/smb/client/connect.c +@@ -178,6 +178,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { ++ if (cifs_ses_exiting(ses)) ++ continue; + spin_lock(&ses->chan_lock); + for (i = 0; i < ses->chan_count; i++) { + if (!ses->chans[i].server) +@@ -3981,13 +3983,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses) + } + + static struct cifs_tcon * +-cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ++__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) + { + int rc; + struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); + struct cifs_ses *ses; + struct cifs_tcon *tcon = NULL; + struct smb3_fs_context *ctx; ++ char *origin_fullpath = NULL; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx == NULL) +@@ -4011,6 +4014,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) + ctx->sign = master_tcon->ses->sign; + ctx->seal = master_tcon->seal; + ctx->witness = master_tcon->use_witness; ++ ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses; + + rc = cifs_set_vol_auth(ctx, master_tcon->ses); + if (rc) { +@@ -4030,12 +4034,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) + goto out; + } + ++#ifdef CONFIG_CIFS_DFS_UPCALL ++ spin_lock(&master_tcon->tc_lock); ++ if (master_tcon->origin_fullpath) { ++ spin_unlock(&master_tcon->tc_lock); ++ origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source); ++ if (IS_ERR(origin_fullpath)) { ++ tcon = ERR_CAST(origin_fullpath); ++ origin_fullpath = NULL; ++ cifs_put_smb_ses(ses); ++ goto out; ++ } ++ } else { ++ spin_unlock(&master_tcon->tc_lock); ++ } ++#endif ++ + tcon = cifs_get_tcon(ses, ctx); + if (IS_ERR(tcon)) { + cifs_put_smb_ses(ses); + goto out; + } + ++#ifdef CONFIG_CIFS_DFS_UPCALL ++ if (origin_fullpath) { ++ spin_lock(&tcon->tc_lock); ++ tcon->origin_fullpath = origin_fullpath; ++ spin_unlock(&tcon->tc_lock); ++ origin_fullpath = NULL; ++ queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, ++ dfs_cache_get_ttl() * HZ); ++ } ++#endif ++ + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + if (cap_unix(ses)) + reset_cifs_unix_caps(0, tcon, NULL, ctx); +@@ -4044,11 +4075,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) + out: + kfree(ctx->username); + kfree_sensitive(ctx->password); ++ kfree(origin_fullpath); + kfree(ctx); + + return tcon; + } + ++static struct cifs_tcon * ++cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ++{ ++ struct cifs_tcon *ret; ++ ++ cifs_mount_lock(); ++ ret = __cifs_construct_tcon(cifs_sb, fsuid); ++ cifs_mount_unlock(); ++ return ret; ++} ++ + struct cifs_tcon * + cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) + { +diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c +index 580a27a3a7e62..855468a32904e 100644 +--- a/fs/smb/client/dir.c ++++ b/fs/smb/client/dir.c +@@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned + int disposition; + struct TCP_Server_Info *server = tcon->ses->server; + struct cifs_open_parms oparms; ++ int rdwr_for_fscache = 0; + + *oplock = 0; + if (tcon->ses->server->oplocks) +@@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned + return PTR_ERR(full_path); + } + ++ /* If we're caching, we need to be able to fill in around partial writes. */ ++ if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY) ++ rdwr_for_fscache = 1; ++ + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && + (CIFS_UNIX_POSIX_PATH_OPS_CAP & +@@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned + desired_access |= GENERIC_READ; /* is this too little? */ + if (OPEN_FMODE(oflags) & FMODE_WRITE) + desired_access |= GENERIC_WRITE; ++ if (rdwr_for_fscache == 1) ++ desired_access |= GENERIC_READ; + + disposition = FILE_OVERWRITE_IF; + if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) +@@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned + if (!tcon->unix_ext && (mode & S_IWUGO) == 0) + create_options |= CREATE_OPTION_READONLY; + ++retry_open: + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, +@@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned + rc = server->ops->open(xid, &oparms, oplock, buf); + if (rc) { + cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); ++ if (rc == -EACCES && rdwr_for_fscache == 1) { ++ desired_access &= ~GENERIC_READ; ++ rdwr_for_fscache = 2; ++ goto retry_open; ++ } + goto out; + } ++ if (rdwr_for_fscache == 2) ++ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); + + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + /* +diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c +index c711d5eb2987e..53a8c633221b9 100644 +--- a/fs/smb/client/file.c ++++ b/fs/smb/client/file.c +@@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) + */ + } + +-static inline int cifs_convert_flags(unsigned int flags) ++static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) + { + if ((flags & O_ACCMODE) == O_RDONLY) + return GENERIC_READ; + else if ((flags & O_ACCMODE) == O_WRONLY) +- return GENERIC_WRITE; ++ return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; + else if ((flags & O_ACCMODE) == O_RDWR) { + /* GENERIC_ALL is too much permission to request + can cause unnecessary access denied on create */ +@@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ + int create_options = CREATE_NOT_DIR; + struct TCP_Server_Info *server = tcon->ses->server; + struct cifs_open_parms oparms; ++ int rdwr_for_fscache = 0; + + if (!server->ops->open) + return -ENOSYS; + +- desired_access = cifs_convert_flags(f_flags); ++ /* If we're caching, we need to be able to fill in around partial writes. */ ++ if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) ++ rdwr_for_fscache = 1; ++ ++ desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); + + /********************************************************************* + * open flag mapping table: +@@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ + if (f_flags & O_DIRECT) + create_options |= CREATE_NO_BUFFER; + ++retry_open: + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, +@@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ + }; + + rc = server->ops->open(xid, &oparms, oplock, buf); +- if (rc) ++ if (rc) { ++ if (rc == -EACCES && rdwr_for_fscache == 1) { ++ desired_access = cifs_convert_flags(f_flags, 0); ++ rdwr_for_fscache = 2; ++ goto retry_open; ++ } + return rc; ++ } ++ if (rdwr_for_fscache == 2) ++ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); + + /* TODO: Add support for calling posix query info but with passing in fid */ + if (tcon->unix_ext) +@@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem) + } + + static void cifsFileInfo_put_work(struct work_struct *work); ++void serverclose_work(struct work_struct *work); + + struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, + struct tcon_link *tlink, __u32 oplock, +@@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, + cfile->tlink = cifs_get_tlink(tlink); + INIT_WORK(&cfile->oplock_break, cifs_oplock_break); + INIT_WORK(&cfile->put, cifsFileInfo_put_work); ++ INIT_WORK(&cfile->serverclose, serverclose_work); + INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); + mutex_init(&cfile->fh_mutex); + spin_lock_init(&cfile->file_info_lock); +@@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work) + cifsFileInfo_put_final(cifs_file); + } + ++void serverclose_work(struct work_struct *work) ++{ ++ struct cifsFileInfo *cifs_file = container_of(work, ++ struct cifsFileInfo, serverclose); ++ ++ struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); ++ ++ struct TCP_Server_Info *server = tcon->ses->server; ++ int rc = 0; ++ int retries = 0; ++ int MAX_RETRIES = 4; ++ ++ do { ++ if (server->ops->close_getattr) ++ rc = server->ops->close_getattr(0, tcon, cifs_file); ++ else if (server->ops->close) ++ rc = server->ops->close(0, tcon, &cifs_file->fid); ++ ++ if (rc == -EBUSY || rc == -EAGAIN) { ++ retries++; ++ msleep(250); ++ } ++ } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) ++ ); ++ ++ if (retries == MAX_RETRIES) ++ pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); ++ ++ if (cifs_file->offload) ++ queue_work(fileinfo_put_wq, &cifs_file->put); ++ else ++ cifsFileInfo_put_final(cifs_file); ++} ++ + /** + * cifsFileInfo_put - release a reference of file priv data + * +@@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, + struct cifs_fid fid = {}; + struct cifs_pending_open open; + bool oplock_break_cancelled; ++ bool serverclose_offloaded = false; + + spin_lock(&tcon->open_file_lock); + spin_lock(&cifsi->open_file_lock); + spin_lock(&cifs_file->file_info_lock); ++ ++ cifs_file->offload = offload; + if (--cifs_file->count > 0) { + spin_unlock(&cifs_file->file_info_lock); + spin_unlock(&cifsi->open_file_lock); +@@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, + if (!tcon->need_reconnect && !cifs_file->invalidHandle) { + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int xid; ++ int rc = 0; + + xid = get_xid(); + if (server->ops->close_getattr) +- server->ops->close_getattr(xid, tcon, cifs_file); ++ rc = server->ops->close_getattr(xid, tcon, cifs_file); + else if (server->ops->close) +- server->ops->close(xid, tcon, &cifs_file->fid); ++ rc = server->ops->close(xid, tcon, &cifs_file->fid); + _free_xid(xid); ++ ++ if (rc == -EBUSY || rc == -EAGAIN) { ++ // Server close failed, hence offloading it as an async op ++ queue_work(serverclose_wq, &cifs_file->serverclose); ++ serverclose_offloaded = true; ++ } + } + + if (oplock_break_cancelled) +@@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, + + cifs_del_pending_open(&open); + +- if (offload) +- queue_work(fileinfo_put_wq, &cifs_file->put); +- else +- cifsFileInfo_put_final(cifs_file); ++ // if serverclose has been offloaded to wq (on failure), it will ++ // handle offloading put as well. If serverclose not offloaded, ++ // we need to handle offloading put here. ++ if (!serverclose_offloaded) { ++ if (offload) ++ queue_work(fileinfo_put_wq, &cifs_file->put); ++ else ++ cifsFileInfo_put_final(cifs_file); ++ } + } + + int cifs_open(struct inode *inode, struct file *file) +@@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file) + use_cache: + fscache_use_cookie(cifs_inode_cookie(file_inode(file)), + file->f_mode & FMODE_WRITE); +- if (file->f_flags & O_DIRECT && +- (!((file->f_flags & O_ACCMODE) != O_RDONLY) || +- file->f_flags & O_APPEND)) +- cifs_invalidate_cache(file_inode(file), +- FSCACHE_INVAL_DIO_WRITE); ++ if (!(file->f_flags & O_DIRECT)) ++ goto out; ++ if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) ++ goto out; ++ cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); + + out: + free_dentry_path(page); +@@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) + int disposition = FILE_OPEN; + int create_options = CREATE_NOT_DIR; + struct cifs_open_parms oparms; ++ int rdwr_for_fscache = 0; + + xid = get_xid(); + mutex_lock(&cfile->fh_mutex); +@@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) + } + #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + +- desired_access = cifs_convert_flags(cfile->f_flags); ++ /* If we're caching, we need to be able to fill in around partial writes. */ ++ if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) ++ rdwr_for_fscache = 1; ++ ++ desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); + + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (cfile->f_flags & O_SYNC) +@@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &cfile->fid); + ++retry_open: + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, +@@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) + /* indicate that we need to relock the file */ + oparms.reconnect = true; + } ++ if (rc == -EACCES && rdwr_for_fscache == 1) { ++ desired_access = cifs_convert_flags(cfile->f_flags, 0); ++ rdwr_for_fscache = 2; ++ goto retry_open; ++ } + + if (rc) { + mutex_unlock(&cfile->fh_mutex); +@@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) + goto reopen_error_exit; + } + ++ if (rdwr_for_fscache == 2) ++ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); ++ + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + reopen_success: + #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ +diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c +index e4a6b240d2263..58567ae617b9f 100644 +--- a/fs/smb/client/fs_context.c ++++ b/fs/smb/client/fs_context.c +@@ -37,7 +37,7 @@ + #include "rfc1002pdu.h" + #include "fs_context.h" + +-static DEFINE_MUTEX(cifs_mount_mutex); ++DEFINE_MUTEX(cifs_mount_mutex); + + static const match_table_t cifs_smb_version_tokens = { + { Smb_1, SMB1_VERSION_STRING }, +@@ -752,9 +752,9 @@ static int smb3_get_tree(struct fs_context *fc) + + if (err) + return err; +- mutex_lock(&cifs_mount_mutex); ++ cifs_mount_lock(); + ret = smb3_get_tree_common(fc); +- mutex_unlock(&cifs_mount_mutex); ++ cifs_mount_unlock(); + return ret; + } + +diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h +index cf46916286d02..8cfc25b609b6b 100644 +--- a/fs/smb/client/fs_context.h ++++ b/fs/smb/client/fs_context.h +@@ -293,4 +293,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); + #define MAX_CACHED_FIDS 16 + extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp); + ++extern struct mutex cifs_mount_mutex; ++ ++static inline void cifs_mount_lock(void) ++{ ++ mutex_lock(&cifs_mount_mutex); ++} ++ ++static inline void cifs_mount_unlock(void) ++{ ++ mutex_unlock(&cifs_mount_mutex); ++} ++ + #endif +diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c +index e5cad149f5a2d..a4ee801b29394 100644 +--- a/fs/smb/client/fscache.c ++++ b/fs/smb/client/fscache.c +@@ -12,6 +12,16 @@ + #include "cifs_fs_sb.h" + #include "cifsproto.h" + ++/* ++ * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode(). ++ */ ++struct cifs_fscache_inode_key { ++ ++ __le64 uniqueid; /* server inode number */ ++ __le64 createtime; /* creation time on server */ ++ u8 type; /* S_IFMT file type */ ++} __packed; ++ + static void cifs_fscache_fill_volume_coherency( + struct cifs_tcon *tcon, + struct cifs_fscache_volume_coherency_data *cd) +@@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) + void cifs_fscache_get_inode_cookie(struct inode *inode) + { + struct cifs_fscache_inode_coherency_data cd; ++ struct cifs_fscache_inode_key key; + struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + ++ key.uniqueid = cpu_to_le64(cifsi->uniqueid); ++ key.createtime = cpu_to_le64(cifsi->createtime); ++ key.type = (inode->i_mode & S_IFMT) >> 12; + cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd); + + cifsi->netfs.cache = + fscache_acquire_cookie(tcon->fscache, 0, +- &cifsi->uniqueid, sizeof(cifsi->uniqueid), ++ &key, sizeof(key), + &cd, sizeof(cd), + i_size_read(&cifsi->netfs.inode)); + if (cifsi->netfs.cache) +diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h +index a3d73720914f8..1f2ea9f5cc9a8 100644 +--- a/fs/smb/client/fscache.h ++++ b/fs/smb/client/fscache.h +@@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode, + __cifs_readahead_to_fscache(inode, pos, len); + } + ++static inline bool cifs_fscache_enabled(struct inode *inode) ++{ ++ return fscache_cookie_enabled(cifs_inode_cookie(inode)); ++} ++ + #else /* CONFIG_CIFS_FSCACHE */ + static inline + void cifs_fscache_fill_coherency(struct inode *inode, +@@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} + static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {} + static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; } + static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {} ++static inline bool cifs_fscache_enabled(struct inode *inode) { return false; } + + static inline int cifs_fscache_query_occupancy(struct inode *inode, + pgoff_t first, unsigned int nr_pages, +diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c +index cb9e719e67ae2..fa6330d586e89 100644 +--- a/fs/smb/client/inode.c ++++ b/fs/smb/client/inode.c +@@ -1390,6 +1390,8 @@ cifs_find_inode(struct inode *inode, void *opaque) + { + struct cifs_fattr *fattr = opaque; + ++ /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */ ++ + /* don't match inode with different uniqueid */ + if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) + return 0; +diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c +index 73ededa8eba5c..204dd7c47126e 100644 +--- a/fs/smb/client/ioctl.c ++++ b/fs/smb/client/ioctl.c +@@ -246,7 +246,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) { + list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) { +- if (ses_it->Suid == out.session_id) { ++ spin_lock(&ses_it->ses_lock); ++ if (ses_it->ses_status != SES_EXITING && ++ ses_it->Suid == out.session_id) { + ses = ses_it; + /* + * since we are using the session outside the crit +@@ -254,9 +256,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug + * so increment its refcount + */ + cifs_smb_ses_inc_refcount(ses); ++ spin_unlock(&ses_it->ses_lock); + found = true; + goto search_end; + } ++ spin_unlock(&ses_it->ses_lock); + } + } + search_end: +diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c +index c2137ea3c2538..ef573e3f8e52a 100644 +--- a/fs/smb/client/misc.c ++++ b/fs/smb/client/misc.c +@@ -489,6 +489,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { ++ if (cifs_ses_exiting(ses)) ++ continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (tcon->tid != buf->Tid) + continue; +diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c +index 64e25233e85de..1aebcf95c1951 100644 +--- a/fs/smb/client/smb1ops.c ++++ b/fs/smb/client/smb1ops.c +@@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) + cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); + } + +-static void ++static int + cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) + { +- CIFSSMBClose(xid, tcon, fid->netfid); ++ return CIFSSMBClose(xid, tcon, fid->netfid); + } + + static int +diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c +index 82b84a4941dd2..cc72be5a93a93 100644 +--- a/fs/smb/client/smb2misc.c ++++ b/fs/smb/client/smb2misc.c +@@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { ++ if (cifs_ses_exiting(ses)) ++ continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + spin_lock(&tcon->open_file_lock); + cifs_stats_inc( +@@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { ++ if (cifs_ses_exiting(ses)) ++ continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + + spin_lock(&tcon->open_file_lock); +diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c +index 978a9f409857a..04fea874d0a33 100644 +--- a/fs/smb/client/smb2ops.c ++++ b/fs/smb/client/smb2ops.c +@@ -1392,14 +1392,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) + memcpy(cfile->fid.create_guid, fid->create_guid, 16); + } + +-static void ++static int + smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_fid *fid) + { +- SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); ++ return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); + } + +-static void ++static int + smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, + struct cifsFileInfo *cfile) + { +@@ -1410,7 +1410,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, + rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, &file_inf); + if (rc) +- return; ++ return rc; + + inode = d_inode(cfile->dentry); + +@@ -1439,6 +1439,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, + + /* End of file and Attributes should not have to be updated on close */ + spin_unlock(&inode->i_lock); ++ return rc; + } + + static int +@@ -2429,6 +2430,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { ++ if (cifs_ses_exiting(ses)) ++ continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { + spin_lock(&tcon->tc_lock); +diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c +index 4d7d0bdf7a472..94bd4c6d2d682 100644 +--- a/fs/smb/client/smb2pdu.c ++++ b/fs/smb/client/smb2pdu.c +@@ -3549,9 +3549,9 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, + memcpy(&pbuf->network_open_info, + &rsp->network_open_info, + sizeof(pbuf->network_open_info)); ++ atomic_dec(&tcon->num_remote_opens); + } + +- atomic_dec(&tcon->num_remote_opens); + close_exit: + SMB2_close_free(&rqst); + free_rsp_buf(resp_buftype, rsp); +diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h +index 0ebf91ffa2361..4464a62228cf3 100644 +--- a/fs/smb/server/ksmbd_netlink.h ++++ b/fs/smb/server/ksmbd_netlink.h +@@ -166,7 +166,8 @@ struct ksmbd_share_config_response { + __u16 force_uid; + __u16 force_gid; + __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; +- __u32 reserved[112]; /* Reserved room */ ++ __u32 reserved[111]; /* Reserved room */ ++ __u32 payload_sz; + __u32 veto_list_sz; + __s8 ____payload[]; + }; +diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c +index 328a412259dc1..a2f0a2edceb8a 100644 +--- a/fs/smb/server/mgmt/share_config.c ++++ b/fs/smb/server/mgmt/share_config.c +@@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, + share->name = kstrdup(name, GFP_KERNEL); + + if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) { +- share->path = kstrdup(ksmbd_share_config_path(resp), ++ int path_len = PATH_MAX; ++ ++ if (resp->payload_sz) ++ path_len = resp->payload_sz - resp->veto_list_sz; ++ ++ share->path = kstrndup(ksmbd_share_config_path(resp), path_len, + GFP_KERNEL); + if (share->path) + share->path_sz = strlen(share->path); +diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c +index 27a9dce3e03ab..8600f32c981a1 100644 +--- a/fs/smb/server/smb2ops.c ++++ b/fs/smb/server/smb2ops.c +@@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn) + conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; + ++ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || ++ (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && ++ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) ++ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; ++ + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; + } +@@ -275,11 +280,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; + +- if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || +- (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && +- conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)) +- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; +- + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; + +diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c +index 199c31c275e5b..924f08326eef4 100644 +--- a/fs/smb/server/smb2pdu.c ++++ b/fs/smb/server/smb2pdu.c +@@ -5631,8 +5631,9 @@ static int smb2_rename(struct ksmbd_work *work, + if (!file_info->ReplaceIfExists) + flags = RENAME_NOREPLACE; + +- smb_break_all_levII_oplock(work, fp, 0); + rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags); ++ if (!rc) ++ smb_break_all_levII_oplock(work, fp, 0); + out: + kfree(new_name); + return rc; +diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c +index f29bb03f0dc47..8752ac82c557b 100644 +--- a/fs/smb/server/transport_ipc.c ++++ b/fs/smb/server/transport_ipc.c +@@ -65,6 +65,7 @@ struct ipc_msg_table_entry { + struct hlist_node ipc_table_hlist; + + void *response; ++ unsigned int msg_sz; + }; + + static struct delayed_work ipc_timer_work; +@@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz) + } + + memcpy(entry->response, payload, sz); ++ entry->msg_sz = sz; + wake_up_interruptible(&entry->wait); + ret = 0; + break; +@@ -453,6 +455,34 @@ static int ipc_msg_send(struct ksmbd_ipc_msg *msg) + return ret; + } + ++static int ipc_validate_msg(struct ipc_msg_table_entry *entry) ++{ ++ unsigned int msg_sz = entry->msg_sz; ++ ++ if (entry->type == KSMBD_EVENT_RPC_REQUEST) { ++ struct ksmbd_rpc_command *resp = entry->response; ++ ++ msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz; ++ } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) { ++ struct ksmbd_spnego_authen_response *resp = entry->response; ++ ++ msg_sz = sizeof(struct ksmbd_spnego_authen_response) + ++ resp->session_key_len + resp->spnego_blob_len; ++ } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) { ++ struct ksmbd_share_config_response *resp = entry->response; ++ ++ if (resp->payload_sz) { ++ if (resp->payload_sz < resp->veto_list_sz) ++ return -EINVAL; ++ ++ msg_sz = sizeof(struct ksmbd_share_config_response) + ++ resp->payload_sz; ++ } ++ } ++ ++ return entry->msg_sz != msg_sz ? -EINVAL : 0; ++} ++ + static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle) + { + struct ipc_msg_table_entry entry; +@@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle + ret = wait_event_interruptible_timeout(entry.wait, + entry.response != NULL, + IPC_WAIT_TIMEOUT); ++ if (entry.response) { ++ ret = ipc_validate_msg(&entry); ++ if (ret) { ++ kvfree(entry.response); ++ entry.response = NULL; ++ } ++ } + out: + down_write(&ipc_msg_table_lock); + hash_del(&entry.ipc_table_hlist); +diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c +index 1fb8f4df60cbb..9848af78215bf 100644 +--- a/fs/vboxsf/super.c ++++ b/fs/vboxsf/super.c +@@ -151,7 +151,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) + if (!sbi->nls) { + vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); + err = -EINVAL; +- goto fail_free; ++ goto fail_destroy_idr; + } + } + +@@ -224,6 +224,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) + ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + if (sbi->nls) + unload_nls(sbi->nls); ++fail_destroy_idr: + idr_destroy(&sbi->ino_idr); + kfree(sbi); + return err; +diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h +index 31029f4f7be85..c4aabbf002f7c 100644 +--- a/include/kvm/arm_pmu.h ++++ b/include/kvm/arm_pmu.h +@@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void); + */ + #define kvm_pmu_update_vcpu_events(vcpu) \ + do { \ +- if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \ ++ if (!has_vhe() && kvm_arm_support_pmu_v3()) \ + vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ + } while (0) + +diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h +index d0807ad43f933..6e950594215a0 100644 +--- a/include/linux/avf/virtchnl.h ++++ b/include/linux/avf/virtchnl.h +@@ -4,6 +4,11 @@ + #ifndef _VIRTCHNL_H_ + #define _VIRTCHNL_H_ + ++#include <linux/bitops.h> ++#include <linux/bits.h> ++#include <linux/overflow.h> ++#include <uapi/linux/if_ether.h> ++ + /* Description: + * This header file describes the Virtual Function (VF) - Physical Function + * (PF) communication protocol used by the drivers for all devices starting +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index 9b08d792fa95a..2ebb5d4d43dc6 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -1524,12 +1524,26 @@ struct bpf_link { + enum bpf_link_type type; + const struct bpf_link_ops *ops; + struct bpf_prog *prog; +- struct work_struct work; ++ /* rcu is used before freeing, work can be used to schedule that ++ * RCU-based freeing before that, so they never overlap ++ */ ++ union { ++ struct rcu_head rcu; ++ struct work_struct work; ++ }; + }; + + struct bpf_link_ops { + void (*release)(struct bpf_link *link); ++ /* deallocate link resources callback, called without RCU grace period ++ * waiting ++ */ + void (*dealloc)(struct bpf_link *link); ++ /* deallocate link resources callback, called after RCU grace period; ++ * if underlying BPF program is sleepable we go through tasks trace ++ * RCU GP and then "classic" RCU GP ++ */ ++ void (*dealloc_deferred)(struct bpf_link *link); + int (*detach)(struct bpf_link *link); + int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog); +diff --git a/include/linux/device.h b/include/linux/device.h +index 99496a0a5ddb3..a070160fbcb8e 100644 +--- a/include/linux/device.h ++++ b/include/linux/device.h +@@ -1250,6 +1250,7 @@ void device_link_del(struct device_link *link); + void device_link_remove(void *consumer, struct device *supplier); + void device_links_supplier_sync_state_pause(void); + void device_links_supplier_sync_state_resume(void); ++void device_link_wait_removal(void); + + /* Create alias, so I can be autoloaded. */ + #define MODULE_ALIAS_CHARDEV(major,minor) \ +diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h +index 731beb3198c4f..8215e193178aa 100644 +--- a/include/linux/io_uring_types.h ++++ b/include/linux/io_uring_types.h +@@ -250,7 +250,6 @@ struct io_ring_ctx { + + struct io_submit_state submit_state; + +- struct io_buffer_list *io_bl; + struct xarray io_bl_xa; + + struct io_hash_table cancel_table_locked; +diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h +index 35f3a4a8ceb1e..acf7e1a3f3def 100644 +--- a/include/linux/secretmem.h ++++ b/include/linux/secretmem.h +@@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio) + /* + * Using folio_mapping() is quite slow because of the actual call + * instruction. +- * We know that secretmem pages are not compound and LRU so we can ++ * We know that secretmem pages are not compound, so we can + * save a couple of cycles here. + */ +- if (folio_test_large(folio) || !folio_test_lru(folio)) ++ if (folio_test_large(folio)) + return false; + + mapping = (struct address_space *) +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h +index 2922059908cc5..9e61f6df6bc55 100644 +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -736,8 +736,6 @@ typedef unsigned char *sk_buff_data_t; + * @list: queue head + * @ll_node: anchor in an llist (eg socket defer_list) + * @sk: Socket we are owned by +- * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in +- * fragmentation management + * @dev: Device we arrived on/are leaving by + * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL + * @cb: Control buffer. Free for use by every layer. Put private vars here +@@ -860,10 +858,7 @@ struct sk_buff { + struct llist_node ll_node; + }; + +- union { +- struct sock *sk; +- int ip_defrag_offset; +- }; ++ struct sock *sk; + + union { + ktime_t tstamp; +diff --git a/include/linux/udp.h b/include/linux/udp.h +index d04188714dca1..94e63b2695406 100644 +--- a/include/linux/udp.h ++++ b/include/linux/udp.h +@@ -140,6 +140,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, + } + } + ++DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); ++#if IS_ENABLED(CONFIG_IPV6) ++DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); ++#endif ++ ++static inline bool udp_encap_needed(void) ++{ ++ if (static_branch_unlikely(&udp_encap_needed_key)) ++ return true; ++ ++#if IS_ENABLED(CONFIG_IPV6) ++ if (static_branch_unlikely(&udpv6_encap_needed_key)) ++ return true; ++#endif ++ ++ return false; ++} ++ + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) + { + if (!skb_is_gso(skb)) +@@ -153,6 +171,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) + !udp_test_bit(ACCEPT_FRAGLIST, sk)) + return true; + ++ /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still ++ * land in a tunnel as the socket check in udp_gro_receive cannot be ++ * foolproof. ++ */ ++ if (udp_encap_needed() && ++ READ_ONCE(udp_sk(sk)->encap_rcv) && ++ !(skb_shinfo(skb)->gso_type & ++ (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) ++ return true; ++ + return false; + } + +diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h +index 0d231024570a3..03e68a8e229f5 100644 +--- a/include/net/bluetooth/hci.h ++++ b/include/net/bluetooth/hci.h +@@ -176,6 +176,15 @@ enum { + */ + HCI_QUIRK_USE_BDADDR_PROPERTY, + ++ /* When this quirk is set, the Bluetooth Device Address provided by ++ * the 'local-bd-address' fwnode property is incorrectly specified in ++ * big-endian order. ++ * ++ * This quirk can be set before hci_register_dev is called or ++ * during the hdev->setup vendor callback. ++ */ ++ HCI_QUIRK_BDADDR_PROPERTY_BROKEN, ++ + /* When this quirk is set, the duplicate filtering during + * scanning is based on Bluetooth devices addresses. To allow + * RSSI based updates, restart scanning if needed. +diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h +index 01a73bf74fa19..6ecac01115d9c 100644 +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -173,6 +173,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, + void (*delack_handler)(struct timer_list *), + void (*keepalive_handler)(struct timer_list *)); + void inet_csk_clear_xmit_timers(struct sock *sk); ++void inet_csk_clear_xmit_timers_sync(struct sock *sk); + + static inline void inet_csk_schedule_ack(struct sock *sk) + { +diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h +index 4d43adf186064..cd526fd31b458 100644 +--- a/include/net/mana/mana.h ++++ b/include/net/mana/mana.h +@@ -39,7 +39,6 @@ enum TRI_STATE { + #define COMP_ENTRY_SIZE 64 + + #define RX_BUFFERS_PER_QUEUE 512 +-#define MANA_RX_DATA_ALIGN 64 + + #define MAX_SEND_BUFFERS_PER_QUEUE 256 + +diff --git a/include/net/sock.h b/include/net/sock.h +index e70c903b04f30..25780942ec8bf 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1808,6 +1808,13 @@ static inline void sock_owned_by_me(const struct sock *sk) + #endif + } + ++static inline void sock_not_owned_by_me(const struct sock *sk) ++{ ++#ifdef CONFIG_LOCKDEP ++ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); ++#endif ++} ++ + static inline bool sock_owned_by_user(const struct sock *sk) + { + sock_owned_by_me(sk); +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index aed10bae50acb..2c0a9a98272ca 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -148,6 +148,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, + static void io_queue_sqe(struct io_kiocb *req); + + struct kmem_cache *req_cachep; ++static struct workqueue_struct *iou_wq __ro_after_init; + + static int __read_mostly sysctl_io_uring_disabled; + static int __read_mostly sysctl_io_uring_group = -1; +@@ -343,7 +344,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) + err: + kfree(ctx->cancel_table.hbs); + kfree(ctx->cancel_table_locked.hbs); +- kfree(ctx->io_bl); + xa_destroy(&ctx->io_bl_xa); + kfree(ctx); + return NULL; +@@ -2934,7 +2934,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) + io_wq_put_hash(ctx->hash_map); + kfree(ctx->cancel_table.hbs); + kfree(ctx->cancel_table_locked.hbs); +- kfree(ctx->io_bl); + xa_destroy(&ctx->io_bl_xa); + kfree(ctx); + } +@@ -3182,7 +3181,7 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) + * noise and overhead, there's no discernable change in runtime + * over using system_wq. + */ +- queue_work(system_unbound_wq, &ctx->exit_work); ++ queue_work(iou_wq, &ctx->exit_work); + } + + static int io_uring_release(struct inode *inode, struct file *file) +@@ -3430,14 +3429,15 @@ static void *io_uring_validate_mmap_request(struct file *file, + ptr = ctx->sq_sqes; + break; + case IORING_OFF_PBUF_RING: { ++ struct io_buffer_list *bl; + unsigned int bgid; + + bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; +- rcu_read_lock(); +- ptr = io_pbuf_get_address(ctx, bgid); +- rcu_read_unlock(); +- if (!ptr) +- return ERR_PTR(-EINVAL); ++ bl = io_pbuf_get_bl(ctx, bgid); ++ if (IS_ERR(bl)) ++ return bl; ++ ptr = bl->buf_ring; ++ io_put_bl(ctx, bl); + break; + } + default: +@@ -4666,6 +4666,8 @@ static int __init io_uring_init(void) + offsetof(struct io_kiocb, cmd.data), + sizeof_field(struct io_kiocb, cmd.data), NULL); + ++ iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64); ++ + #ifdef CONFIG_SYSCTL + register_sysctl_init("kernel", kernel_io_uring_disabled_table); + #endif +diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c +index e8516f3bbbaaa..26a00920042c4 100644 +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -17,8 +17,6 @@ + + #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) + +-#define BGID_ARRAY 64 +- + /* BIDs are addressed by a 16-bit field in a CQE */ + #define MAX_BIDS_PER_BGID (1 << 16) + +@@ -31,13 +29,9 @@ struct io_provide_buf { + __u16 bid; + }; + +-static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, +- struct io_buffer_list *bl, +- unsigned int bgid) ++static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, ++ unsigned int bgid) + { +- if (bl && bgid < BGID_ARRAY) +- return &bl[bgid]; +- + return xa_load(&ctx->io_bl_xa, bgid); + } + +@@ -53,7 +47,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, + { + lockdep_assert_held(&ctx->uring_lock); + +- return __io_buffer_get_list(ctx, ctx->io_bl, bgid); ++ return __io_buffer_get_list(ctx, bgid); + } + + static int io_buffer_add_list(struct io_ring_ctx *ctx, +@@ -65,11 +59,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx, + * always under the ->uring_lock, but the RCU lookup from mmap does. + */ + bl->bgid = bgid; +- smp_store_release(&bl->is_ready, 1); +- +- if (bgid < BGID_ARRAY) +- return 0; +- ++ atomic_set(&bl->refs, 1); + return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); + } + +@@ -215,24 +205,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, + return ret; + } + +-static __cold int io_init_bl_list(struct io_ring_ctx *ctx) +-{ +- struct io_buffer_list *bl; +- int i; +- +- bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); +- if (!bl) +- return -ENOMEM; +- +- for (i = 0; i < BGID_ARRAY; i++) { +- INIT_LIST_HEAD(&bl[i].buf_list); +- bl[i].bgid = i; +- } +- +- smp_store_release(&ctx->io_bl, bl); +- return 0; +-} +- + /* + * Mark the given mapped range as free for reuse + */ +@@ -301,22 +273,22 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, + return i; + } + ++void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) ++{ ++ if (atomic_dec_and_test(&bl->refs)) { ++ __io_remove_buffers(ctx, bl, -1U); ++ kfree_rcu(bl, rcu); ++ } ++} ++ + void io_destroy_buffers(struct io_ring_ctx *ctx) + { + struct io_buffer_list *bl; + unsigned long index; +- int i; +- +- for (i = 0; i < BGID_ARRAY; i++) { +- if (!ctx->io_bl) +- break; +- __io_remove_buffers(ctx, &ctx->io_bl[i], -1U); +- } + + xa_for_each(&ctx->io_bl_xa, index, bl) { + xa_erase(&ctx->io_bl_xa, bl->bgid); +- __io_remove_buffers(ctx, bl, -1U); +- kfree_rcu(bl, rcu); ++ io_put_bl(ctx, bl); + } + + while (!list_empty(&ctx->io_buffers_pages)) { +@@ -485,12 +457,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) + + io_ring_submit_lock(ctx, issue_flags); + +- if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) { +- ret = io_init_bl_list(ctx); +- if (ret) +- goto err; +- } +- + bl = io_buffer_get_list(ctx, p->bgid); + if (unlikely(!bl)) { + bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); +@@ -503,14 +469,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) + if (ret) { + /* + * Doesn't need rcu free as it was never visible, but +- * let's keep it consistent throughout. Also can't +- * be a lower indexed array group, as adding one +- * where lookup failed cannot happen. ++ * let's keep it consistent throughout. + */ +- if (p->bgid >= BGID_ARRAY) +- kfree_rcu(bl, rcu); +- else +- WARN_ON_ONCE(1); ++ kfree_rcu(bl, rcu); + goto err; + } + } +@@ -675,12 +636,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) + if (reg.ring_entries >= 65536) + return -EINVAL; + +- if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { +- int ret = io_init_bl_list(ctx); +- if (ret) +- return ret; +- } +- + bl = io_buffer_get_list(ctx, reg.bgid); + if (bl) { + /* if mapped buffer ring OR classic exists, don't allow */ +@@ -729,31 +684,40 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) + if (!bl->is_mapped) + return -EINVAL; + +- __io_remove_buffers(ctx, bl, -1U); +- if (bl->bgid >= BGID_ARRAY) { +- xa_erase(&ctx->io_bl_xa, bl->bgid); +- kfree_rcu(bl, rcu); +- } ++ xa_erase(&ctx->io_bl_xa, bl->bgid); ++ io_put_bl(ctx, bl); + return 0; + } + +-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) ++struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, ++ unsigned long bgid) + { + struct io_buffer_list *bl; ++ bool ret; + +- bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); +- +- if (!bl || !bl->is_mmap) +- return NULL; + /* +- * Ensure the list is fully setup. Only strictly needed for RCU lookup +- * via mmap, and in that case only for the array indexed groups. For +- * the xarray lookups, it's either visible and ready, or not at all. ++ * We have to be a bit careful here - we're inside mmap and cannot grab ++ * the uring_lock. This means the buffer_list could be simultaneously ++ * going away, if someone is trying to be sneaky. Look it up under rcu ++ * so we know it's not going away, and attempt to grab a reference to ++ * it. If the ref is already zero, then fail the mapping. If successful, ++ * the caller will call io_put_bl() to drop the the reference at at the ++ * end. This may then safely free the buffer_list (and drop the pages) ++ * at that point, vm_insert_pages() would've already grabbed the ++ * necessary vma references. + */ +- if (!smp_load_acquire(&bl->is_ready)) +- return NULL; +- +- return bl->buf_ring; ++ rcu_read_lock(); ++ bl = xa_load(&ctx->io_bl_xa, bgid); ++ /* must be a mmap'able buffer ring and have pages */ ++ ret = false; ++ if (bl && bl->is_mmap) ++ ret = atomic_inc_not_zero(&bl->refs); ++ rcu_read_unlock(); ++ ++ if (ret) ++ return bl; ++ ++ return ERR_PTR(-EINVAL); + } + + /* +diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h +index 3d0cb6b8c1ed2..8d7929369501d 100644 +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -25,12 +25,12 @@ struct io_buffer_list { + __u16 head; + __u16 mask; + ++ atomic_t refs; ++ + /* ring mapped provided buffers */ + __u8 is_mapped; + /* ring mapped provided buffers, but mmap'ed by application */ + __u8 is_mmap; +- /* bl is visible from an RCU point of view for lookup */ +- __u8 is_ready; + }; + + struct io_buffer { +@@ -60,7 +60,9 @@ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags); + + void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); + +-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid); ++void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl); ++struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, ++ unsigned long bgid); + + static inline void io_kbuf_recycle_ring(struct io_kiocb *req) + { +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index 4b7d186c7622d..4902a7487f076 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -2866,17 +2866,46 @@ void bpf_link_inc(struct bpf_link *link) + atomic64_inc(&link->refcnt); + } + ++static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) ++{ ++ struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); ++ ++ /* free bpf_link and its containing memory */ ++ link->ops->dealloc_deferred(link); ++} ++ ++static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) ++{ ++ if (rcu_trace_implies_rcu_gp()) ++ bpf_link_defer_dealloc_rcu_gp(rcu); ++ else ++ call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); ++} ++ + /* bpf_link_free is guaranteed to be called from process context */ + static void bpf_link_free(struct bpf_link *link) + { ++ bool sleepable = false; ++ + bpf_link_free_id(link->id); + if (link->prog) { ++ sleepable = link->prog->aux->sleepable; + /* detach BPF program, clean up used resources */ + link->ops->release(link); + bpf_prog_put(link->prog); + } +- /* free bpf_link and its containing memory */ +- link->ops->dealloc(link); ++ if (link->ops->dealloc_deferred) { ++ /* schedule BPF link deallocation; if underlying BPF program ++ * is sleepable, we need to first wait for RCU tasks trace ++ * sync, then go through "classic" RCU grace period ++ */ ++ if (sleepable) ++ call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); ++ else ++ call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); ++ } ++ if (link->ops->dealloc) ++ link->ops->dealloc(link); + } + + static void bpf_link_put_deferred(struct work_struct *work) +@@ -3381,7 +3410,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, + + static const struct bpf_link_ops bpf_raw_tp_link_lops = { + .release = bpf_raw_tp_link_release, +- .dealloc = bpf_raw_tp_link_dealloc, ++ .dealloc_deferred = bpf_raw_tp_link_dealloc, + .show_fdinfo = bpf_raw_tp_link_show_fdinfo, + .fill_link_info = bpf_raw_tp_link_fill_link_info, + }; +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 396c4c66932f2..c9fc734989c68 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -6637,6 +6637,11 @@ static int check_stack_access_within_bounds( + err = check_stack_slot_within_bounds(env, min_off, state, type); + if (!err && max_off > 0) + err = -EINVAL; /* out of stack access into non-negative offsets */ ++ if (!err && access_size < 0) ++ /* access_size should not be negative (or overflow an int); others checks ++ * along the way should have prevented such an access. ++ */ ++ err = -EFAULT; /* invalid negative access size; integer overflow? */ + + if (err) { + if (tnum_is_const(reg->var_off)) { +diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c +index 1d76f3b014aee..1e79084a9d9d2 100644 +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -2639,7 +2639,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, + + static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { + .release = bpf_kprobe_multi_link_release, +- .dealloc = bpf_kprobe_multi_link_dealloc, ++ .dealloc_deferred = bpf_kprobe_multi_link_dealloc, + .fill_link_info = bpf_kprobe_multi_link_fill_link_info, + }; + +@@ -3065,6 +3065,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link) + + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); + bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt); ++ if (umulti_link->task) ++ put_task_struct(umulti_link->task); ++ path_put(&umulti_link->path); + } + + static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) +@@ -3072,16 +3075,13 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) + struct bpf_uprobe_multi_link *umulti_link; + + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); +- if (umulti_link->task) +- put_task_struct(umulti_link->task); +- path_put(&umulti_link->path); + kvfree(umulti_link->uprobes); + kfree(umulti_link); + } + + static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { + .release = bpf_uprobe_multi_link_release, +- .dealloc = bpf_uprobe_multi_link_dealloc, ++ .dealloc_deferred = bpf_uprobe_multi_link_dealloc, + }; + + static int uprobe_prog_run(struct bpf_uprobe *uprobe, +diff --git a/mm/memory.c b/mm/memory.c +index 78e05d3e9e4ac..e44d4d887cf6d 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -5674,6 +5674,10 @@ int follow_phys(struct vm_area_struct *vma, + goto out; + pte = ptep_get(ptep); + ++ /* Never return PFNs of anon folios in COW mappings. */ ++ if (vm_normal_folio(vma, address, pte)) ++ goto unlock; ++ + if ((flags & FOLL_WRITE) && !pte_write(pte)) + goto unlock; + +diff --git a/net/9p/client.c b/net/9p/client.c +index e265a0ca6bddd..f7e90b4769bba 100644 +--- a/net/9p/client.c ++++ b/net/9p/client.c +@@ -1583,7 +1583,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, + received = rsize; + } + +- p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); ++ p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received); + + if (non_zc) { + int n = copy_to_iter(dataptr, received, to); +@@ -1609,9 +1609,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) + int total = 0; + *err = 0; + +- p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", +- fid->fid, offset, iov_iter_count(from)); +- + while (iov_iter_count(from)) { + int count = iov_iter_count(from); + int rsize = fid->iounit; +@@ -1623,6 +1620,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) + if (count < rsize) + rsize = count; + ++ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n", ++ fid->fid, offset, rsize, count); ++ + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0, +@@ -1650,7 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) + written = rsize; + } + +- p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); ++ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written); + + p9_req_put(clnt, req); + iov_iter_revert(from, count - written - iov_iter_count(from)); +diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c +index c5462486dbca1..282ec581c0720 100644 +--- a/net/ax25/ax25_dev.c ++++ b/net/ax25/ax25_dev.c +@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev) + spin_lock_bh(&ax25_dev_lock); + + #ifdef CONFIG_AX25_DAMA_SLAVE +- ax25_ds_del_timer(ax25_dev); ++ timer_shutdown_sync(&ax25_dev->dama.slave_timer); + #endif + + /* +diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c +index 233453807b509..ce3ff2fa72e58 100644 +--- a/net/bluetooth/hci_debugfs.c ++++ b/net/bluetooth/hci_debugfs.c +@@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val) + { + struct hci_dev *hdev = data; + +- if (val == 0 || val > hdev->conn_info_max_age) ++ hci_dev_lock(hdev); ++ if (val == 0 || val > hdev->conn_info_max_age) { ++ hci_dev_unlock(hdev); + return -EINVAL; ++ } + +- hci_dev_lock(hdev); + hdev->conn_info_min_age = val; + hci_dev_unlock(hdev); + +@@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val) + { + struct hci_dev *hdev = data; + +- if (val == 0 || val < hdev->conn_info_min_age) ++ hci_dev_lock(hdev); ++ if (val == 0 || val < hdev->conn_info_min_age) { ++ hci_dev_unlock(hdev); + return -EINVAL; ++ } + +- hci_dev_lock(hdev); + hdev->conn_info_max_age = val; + hci_dev_unlock(hdev); + +@@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val) + { + struct hci_dev *hdev = data; + +- if (val == 0 || val % 2 || val > hdev->sniff_max_interval) ++ hci_dev_lock(hdev); ++ if (val == 0 || val % 2 || val > hdev->sniff_max_interval) { ++ hci_dev_unlock(hdev); + return -EINVAL; ++ } + +- hci_dev_lock(hdev); + hdev->sniff_min_interval = val; + hci_dev_unlock(hdev); + +@@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val) + { + struct hci_dev *hdev = data; + +- if (val == 0 || val % 2 || val < hdev->sniff_min_interval) ++ hci_dev_lock(hdev); ++ if (val == 0 || val % 2 || val < hdev->sniff_min_interval) { ++ hci_dev_unlock(hdev); + return -EINVAL; ++ } + +- hci_dev_lock(hdev); + hdev->sniff_max_interval = val; + hci_dev_unlock(hdev); + +@@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val) + { + struct hci_dev *hdev = data; + +- if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) ++ hci_dev_lock(hdev); ++ if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) { ++ hci_dev_unlock(hdev); + return -EINVAL; ++ } + +- hci_dev_lock(hdev); + hdev->le_conn_min_interval = val; + hci_dev_unlock(hdev); + +@@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val) + { + struct hci_dev *hdev = data; + +- if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) ++ hci_dev_lock(hdev); ++ if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) { ++ hci_dev_unlock(hdev); + return -EINVAL; ++ } + +- hci_dev_lock(hdev); + hdev->le_conn_max_interval = val; + hci_dev_unlock(hdev); + +@@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val) + { + struct hci_dev *hdev = data; + +- if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) ++ hci_dev_lock(hdev); ++ if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) { ++ hci_dev_unlock(hdev); + return -EINVAL; ++ } + +- hci_dev_lock(hdev); + hdev->le_adv_min_interval = val; + hci_dev_unlock(hdev); + +@@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val) + { + struct hci_dev *hdev = data; + +- if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) ++ hci_dev_lock(hdev); ++ if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) { ++ hci_dev_unlock(hdev); + return -EINVAL; ++ } + +- hci_dev_lock(hdev); + hdev->le_adv_max_interval = val; + hci_dev_unlock(hdev); + +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index 2bb8ab9302a97..bb0e5902a3e60 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -3219,6 +3219,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, + if (test_bit(HCI_ENCRYPT, &hdev->flags)) + set_bit(HCI_CONN_ENCRYPT, &conn->flags); + ++ /* "Link key request" completed ahead of "connect request" completes */ ++ if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && ++ ev->link_type == ACL_LINK) { ++ struct link_key *key; ++ struct hci_cp_read_enc_key_size cp; ++ ++ key = hci_find_link_key(hdev, &ev->bdaddr); ++ if (key) { ++ set_bit(HCI_CONN_ENCRYPT, &conn->flags); ++ ++ if (!(hdev->commands[20] & 0x10)) { ++ conn->enc_key_size = HCI_LINK_KEY_SIZE; ++ } else { ++ cp.handle = cpu_to_le16(conn->handle); ++ if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, ++ sizeof(cp), &cp)) { ++ bt_dev_err(hdev, "sending read key size failed"); ++ conn->enc_key_size = HCI_LINK_KEY_SIZE; ++ } ++ } ++ ++ hci_encrypt_cfm(conn, ev->status); ++ } ++ } ++ + /* Get remote features */ + if (conn->type == ACL_LINK) { + struct hci_cp_read_remote_features cp; +diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c +index 9b241eabca3e8..d6c0633bfe5bf 100644 +--- a/net/bluetooth/hci_sync.c ++++ b/net/bluetooth/hci_sync.c +@@ -3292,7 +3292,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) + if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) + return; + +- bacpy(&hdev->public_addr, &ba); ++ if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) ++ baswap(&hdev->public_addr, &ba); ++ else ++ bacpy(&hdev->public_addr, &ba); + } + + struct hci_init_stage { +diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c +index aa23479b20b2a..ed62c1026fe93 100644 +--- a/net/bridge/netfilter/ebtables.c ++++ b/net/bridge/netfilter/ebtables.c +@@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) + struct ebt_table_info *newinfo; + struct ebt_replace tmp; + ++ if (len < sizeof(tmp)) ++ return -EINVAL; + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + +@@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len) + { + struct ebt_replace hlp; + ++ if (len < sizeof(hlp)) ++ return -EINVAL; + if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) + return -EFAULT; + +@@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg, + { + struct compat_ebt_replace hlp; + ++ if (len < sizeof(hlp)) ++ return -EINVAL; + if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) + return -EFAULT; + +diff --git a/net/core/gro.c b/net/core/gro.c +index 0759277dc14ee..cefddf65f7db0 100644 +--- a/net/core/gro.c ++++ b/net/core/gro.c +@@ -195,8 +195,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) + } + + merge: +- /* sk owenrship - if any - completely transferred to the aggregated packet */ ++ /* sk ownership - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; ++ skb->sk = NULL; + delta_truesize = skb->truesize; + if (offset > headlen) { + unsigned int eat = offset - headlen; +diff --git a/net/core/sock_map.c b/net/core/sock_map.c +index 27d733c0f65e1..8598466a38057 100644 +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, + struct sock *sk; + int err = 0; + ++ if (irqs_disabled()) ++ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ ++ + spin_lock_bh(&stab->lock); + sk = *psk; + if (!sk_test || sk_test == sk) +@@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key) + struct bpf_shtab_elem *elem; + int ret = -ENOENT; + ++ if (irqs_disabled()) ++ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */ ++ + hash = sock_hash_bucket_hash(key, key_size); + bucket = sock_hash_select_bucket(htab, hash); + +diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c +index e5742f2a2d522..1b6457f357bdb 100644 +--- a/net/hsr/hsr_slave.c ++++ b/net/hsr/hsr_slave.c +@@ -220,7 +220,8 @@ void hsr_del_port(struct hsr_port *port) + netdev_update_features(master->dev); + dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + netdev_rx_handler_unregister(port->dev); +- dev_set_promiscuity(port->dev, -1); ++ if (!port->hsr->fwd_offloaded) ++ dev_set_promiscuity(port->dev, -1); + netdev_upper_dev_unlink(port->dev, master->dev); + } + +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index 762817d6c8d70..a018981b45142 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -289,6 +289,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l + struct sock_reuseport *reuseport_cb; + struct inet_bind_hashbucket *head2; + struct inet_bind2_bucket *tb2; ++ bool conflict = false; + bool reuseport_cb_ok; + + rcu_read_lock(); +@@ -301,18 +302,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l + + spin_lock(&head2->lock); + +- inet_bind_bucket_for_each(tb2, &head2->chain) +- if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) +- break; ++ inet_bind_bucket_for_each(tb2, &head2->chain) { ++ if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) ++ continue; + +- if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, +- reuseport_ok)) { +- spin_unlock(&head2->lock); +- return true; ++ if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok)) ++ continue; ++ ++ conflict = true; ++ break; + } + + spin_unlock(&head2->lock); +- return false; ++ ++ return conflict; + } + + /* +@@ -774,6 +777,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk) + } + EXPORT_SYMBOL(inet_csk_clear_xmit_timers); + ++void inet_csk_clear_xmit_timers_sync(struct sock *sk) ++{ ++ struct inet_connection_sock *icsk = inet_csk(sk); ++ ++ /* ongoing timer handlers need to acquire socket lock. */ ++ sock_not_owned_by_me(sk); ++ ++ icsk->icsk_pending = icsk->icsk_ack.pending = 0; ++ ++ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); ++ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); ++ sk_stop_timer_sync(sk, &sk->sk_timer); ++} ++ + void inet_csk_delete_keepalive_timer(struct sock *sk) + { + sk_stop_timer(sk, &sk->sk_timer); +diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c +index 7072fc0783ef5..c88c9034d6300 100644 +--- a/net/ipv4/inet_fragment.c ++++ b/net/ipv4/inet_fragment.c +@@ -24,6 +24,8 @@ + #include <net/ip.h> + #include <net/ipv6.h> + ++#include "../core/sock_destructor.h" ++ + /* Use skb->cb to track consecutive/adjacent fragments coming at + * the end of the queue. Nodes in the rb-tree queue will + * contain "runs" of one or more adjacent fragments. +@@ -39,6 +41,7 @@ struct ipfrag_skb_cb { + }; + struct sk_buff *next_frag; + int frag_run_len; ++ int ip_defrag_offset; + }; + + #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) +@@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, + */ + if (!last) + fragrun_create(q, skb); /* First fragment. */ +- else if (last->ip_defrag_offset + last->len < end) { ++ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { + /* This is the common case: skb goes to the end. */ + /* Detect and discard overlaps. */ +- if (offset < last->ip_defrag_offset + last->len) ++ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) + return IPFRAG_OVERLAP; +- if (offset == last->ip_defrag_offset + last->len) ++ if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) + fragrun_append_to_last(q, skb); + else + fragrun_create(q, skb); +@@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, + + parent = *rbn; + curr = rb_to_skb(parent); +- curr_run_end = curr->ip_defrag_offset + ++ curr_run_end = FRAG_CB(curr)->ip_defrag_offset + + FRAG_CB(curr)->frag_run_len; +- if (end <= curr->ip_defrag_offset) ++ if (end <= FRAG_CB(curr)->ip_defrag_offset) + rbn = &parent->rb_left; + else if (offset >= curr_run_end) + rbn = &parent->rb_right; +- else if (offset >= curr->ip_defrag_offset && ++ else if (offset >= FRAG_CB(curr)->ip_defrag_offset && + end <= curr_run_end) + return IPFRAG_DUP; + else +@@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, + rb_insert_color(&skb->rbnode, &q->rb_fragments); + } + +- skb->ip_defrag_offset = offset; ++ FRAG_CB(skb)->ip_defrag_offset = offset; + + return IPFRAG_OK; + } +@@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, + struct sk_buff *parent) + { + struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); +- struct sk_buff **nextp; ++ void (*destructor)(struct sk_buff *); ++ unsigned int orig_truesize = 0; ++ struct sk_buff **nextp = NULL; ++ struct sock *sk = skb->sk; + int delta; + ++ if (sk && is_skb_wmem(skb)) { ++ /* TX: skb->sk might have been passed as argument to ++ * dst->output and must remain valid until tx completes. ++ * ++ * Move sk to reassembled skb and fix up wmem accounting. ++ */ ++ orig_truesize = skb->truesize; ++ destructor = skb->destructor; ++ } ++ + if (head != skb) { + fp = skb_clone(skb, GFP_ATOMIC); +- if (!fp) +- return NULL; ++ if (!fp) { ++ head = skb; ++ goto out_restore_sk; ++ } + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; + if (RB_EMPTY_NODE(&skb->rbnode)) + FRAG_CB(parent)->next_frag = fp; +@@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, + &q->rb_fragments); + if (q->fragments_tail == skb) + q->fragments_tail = fp; ++ ++ if (orig_truesize) { ++ /* prevent skb_morph from releasing sk */ ++ skb->sk = NULL; ++ skb->destructor = NULL; ++ } + skb_morph(skb, head); + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; + rb_replace_node(&head->rbnode, &skb->rbnode, +@@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, + consume_skb(head); + head = skb; + } +- WARN_ON(head->ip_defrag_offset != 0); ++ WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); + + delta = -head->truesize; + + /* Head of list must not be cloned. */ + if (skb_unclone(head, GFP_ATOMIC)) +- return NULL; ++ goto out_restore_sk; + + delta += head->truesize; + if (delta) +@@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, + + clone = alloc_skb(0, GFP_ATOMIC); + if (!clone) +- return NULL; ++ goto out_restore_sk; + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; + skb_frag_list_init(head); + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) +@@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, + nextp = &skb_shinfo(head)->frag_list; + } + ++out_restore_sk: ++ if (orig_truesize) { ++ int ts_delta = head->truesize - orig_truesize; ++ ++ /* if this reassembled skb is fragmented later, ++ * fraglist skbs will get skb->sk assigned from head->sk, ++ * and each frag skb will be released via sock_wfree. ++ * ++ * Update sk_wmem_alloc. ++ */ ++ head->sk = sk; ++ head->destructor = destructor; ++ refcount_add(ts_delta, &sk->sk_wmem_alloc); ++ } ++ + return nextp; + } + EXPORT_SYMBOL(inet_frag_reasm_prepare); +@@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); + void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, + void *reasm_data, bool try_coalesce) + { ++ struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; ++ const unsigned int head_truesize = head->truesize; + struct sk_buff **nextp = reasm_data; + struct rb_node *rbn; + struct sk_buff *fp; +@@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, + head->prev = NULL; + head->tstamp = q->stamp; + head->mono_delivery_time = q->mono_delivery_time; ++ ++ if (sk) ++ refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); + } + EXPORT_SYMBOL(inet_frag_reasm_finish); + +diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c +index a4941f53b5237..fb947d1613fe2 100644 +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) + } + + skb_dst_drop(skb); ++ skb_orphan(skb); + return -EINPROGRESS; + + insert_error: +@@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) + struct ipq *qp; + + __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); +- skb_orphan(skb); + + /* Lookup (or create) queue header */ + qp = ip_find(net, ip_hdr(skb), user, vif); +diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c +index 5169c3c72cffe..f21a1a5403723 100644 +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, + tpi->flags | TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + } else { ++ if (unlikely(!pskb_may_pull(skb, ++ gre_hdr_len + sizeof(*ershdr)))) ++ return PACKET_REJECT; ++ + ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); + ver = ershdr->ver; ++ iph = ip_hdr(skb); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, + tpi->flags | TUNNEL_KEY, + iph->saddr, iph->daddr, tpi->key); +diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c +index 2407066b0fec1..b150c9929b12e 100644 +--- a/net/ipv4/netfilter/arp_tables.c ++++ b/net/ipv4/netfilter/arp_tables.c +@@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) + void *loc_cpu_entry; + struct arpt_entry *iter; + ++ if (len < sizeof(tmp)) ++ return -EINVAL; + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + +@@ -1254,6 +1256,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) + void *loc_cpu_entry; + struct arpt_entry *iter; + ++ if (len < sizeof(tmp)) ++ return -EINVAL; + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + +diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c +index 7da1df4997d05..4876707595781 100644 +--- a/net/ipv4/netfilter/ip_tables.c ++++ b/net/ipv4/netfilter/ip_tables.c +@@ -1108,6 +1108,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) + void *loc_cpu_entry; + struct ipt_entry *iter; + ++ if (len < sizeof(tmp)) ++ return -EINVAL; + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + +@@ -1492,6 +1494,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) + void *loc_cpu_entry; + struct ipt_entry *iter; + ++ if (len < sizeof(tmp)) ++ return -EINVAL; + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 68bb8d6bcc113..f8df35f7352a5 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout) + lock_sock(sk); + __tcp_close(sk, timeout); + release_sock(sk); ++ if (!sk->sk_net_refcnt) ++ inet_csk_clear_xmit_timers_sync(sk); + sock_put(sk); + } + EXPORT_SYMBOL(tcp_close); +diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c +index 848072793fa98..70a9a4a48216e 100644 +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -584,6 +584,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, + } + + DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); ++EXPORT_SYMBOL(udp_encap_needed_key); ++ ++#if IS_ENABLED(CONFIG_IPV6) ++DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); ++EXPORT_SYMBOL(udpv6_encap_needed_key); ++#endif ++ + void udp_encap_enable(void) + { + static_branch_inc(&udp_encap_needed_key); +diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c +index 6c95d28d0c4a7..c3d67423ae189 100644 +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) + NAPI_GRO_CB(p)->count++; + p->data_len += skb->len; + +- /* sk owenrship - if any - completely transferred to the aggregated packet */ ++ /* sk ownership - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; ++ skb->sk = NULL; + p->truesize += skb->truesize; + p->len += skb->len; + +@@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, + unsigned int off = skb_gro_offset(skb); + int flush = 1; + +- /* we can do L4 aggregation only if the packet can't land in a tunnel +- * otherwise we could corrupt the inner stream ++ /* We can do L4 aggregation only if the packet can't land in a tunnel ++ * otherwise we could corrupt the inner stream. Detecting such packets ++ * cannot be foolproof and the aggregation might still happen in some ++ * cases. Such packets should be caught in udp_unexpected_gso later. + */ + NAPI_GRO_CB(skb)->is_flist = 0; + if (!sk || !udp_sk(sk)->gro_receive) { ++ /* If the packet was locally encapsulated in a UDP tunnel that ++ * wasn't detected above, do not GRO. ++ */ ++ if (skb->encapsulation) ++ goto out; ++ + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; + +@@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) + skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + +- if (skb->ip_summed == CHECKSUM_UNNECESSARY) { +- if (skb->csum_level < SKB_MAX_CSUM_LEVEL) +- skb->csum_level++; +- } else { +- skb->ip_summed = CHECKSUM_UNNECESSARY; +- skb->csum_level = 0; +- } ++ __skb_incr_checksum_unnecessary(skb); + + return 0; + } +diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c +index 4fc2cae0d116c..54294f6a8ec51 100644 +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -645,19 +645,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) + if (!w) { + /* New dump: + * +- * 1. hook callback destructor. +- */ +- cb->args[3] = (long)cb->done; +- cb->done = fib6_dump_done; +- +- /* +- * 2. allocate and initialize walker. ++ * 1. allocate and initialize walker. + */ + w = kzalloc(sizeof(*w), GFP_ATOMIC); + if (!w) + return -ENOMEM; + w->func = fib6_dump_node; + cb->args[2] = (long)w; ++ ++ /* 2. hook callback destructor. ++ */ ++ cb->args[3] = (long)cb->done; ++ cb->done = fib6_dump_done; ++ + } + + arg.skb = skb; +diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c +index 070d87abf7c02..26c3287beb29c 100644 +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb, + struct ip6_tnl *tunnel; + u8 ver; + ++ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) ++ return PACKET_REJECT; ++ + ipv6h = ipv6_hdr(skb); + ershdr = (struct erspan_base_hdr *)skb->data; + ver = ershdr->ver; +diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c +index fd9f049d6d41e..636b360311c53 100644 +--- a/net/ipv6/netfilter/ip6_tables.c ++++ b/net/ipv6/netfilter/ip6_tables.c +@@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) + void *loc_cpu_entry; + struct ip6t_entry *iter; + ++ if (len < sizeof(tmp)) ++ return -EINVAL; + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + +@@ -1501,6 +1503,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) + void *loc_cpu_entry; + struct ip6t_entry *iter; + ++ if (len < sizeof(tmp)) ++ return -EINVAL; + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) + return -EFAULT; + +diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c +index b2dd48911c8d6..efbec7ee27d0a 100644 +--- a/net/ipv6/netfilter/nf_conntrack_reasm.c ++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c +@@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, + } + + skb_dst_drop(skb); ++ skb_orphan(skb); + return -EINPROGRESS; + + insert_error: +@@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); + +- skb_orphan(skb); + fq = fq_find(net, fhdr->identification, user, hdr, + skb->dev ? skb->dev->ifindex : 0); + if (fq == NULL) { +diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c +index 438476a31313c..d31beb65db08f 100644 +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -450,7 +450,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + goto try_again; + } + +-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); ++DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); + void udpv6_encap_enable(void) + { + static_branch_inc(&udpv6_encap_needed_key); +diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c +index 6b95ba241ebe2..626d7b362dc7b 100644 +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) + skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + +- if (skb->ip_summed == CHECKSUM_UNNECESSARY) { +- if (skb->csum_level < SKB_MAX_CSUM_LEVEL) +- skb->csum_level++; +- } else { +- skb->ip_summed = CHECKSUM_UNNECESSARY; +- skb->csum_level = 0; +- } ++ __skb_incr_checksum_unnecessary(skb); + + return 0; + } +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index b54951ae07aa9..01ac690af7799 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) + return READ_ONCE(msk->wnd_end); + } + +-static bool mptcp_is_tcpsk(struct sock *sk) ++static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) + { +- struct socket *sock = sk->sk_socket; +- +- if (unlikely(sk->sk_prot == &tcp_prot)) { +- /* we are being invoked after mptcp_accept() has +- * accepted a non-mp-capable flow: sk is a tcp_sk, +- * not an mptcp one. +- * +- * Hand the socket over to tcp so all further socket ops +- * bypass mptcp. +- */ +- WRITE_ONCE(sock->ops, &inet_stream_ops); +- return true; + #if IS_ENABLED(CONFIG_MPTCP_IPV6) +- } else if (unlikely(sk->sk_prot == &tcpv6_prot)) { +- WRITE_ONCE(sock->ops, &inet6_stream_ops); +- return true; ++ if (sk->sk_prot == &tcpv6_prot) ++ return &inet6_stream_ops; + #endif +- } +- +- return false; ++ WARN_ON_ONCE(sk->sk_prot != &tcp_prot); ++ return &inet_stream_ops; + } + + static int __mptcp_socket_create(struct mptcp_sock *msk) +@@ -3328,44 +3314,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) + msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT; + } + +-static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err, +- bool kern) +-{ +- struct sock *newsk; +- +- pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); +- newsk = inet_csk_accept(ssk, flags, err, kern); +- if (!newsk) +- return NULL; +- +- pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); +- if (sk_is_mptcp(newsk)) { +- struct mptcp_subflow_context *subflow; +- struct sock *new_mptcp_sock; +- +- subflow = mptcp_subflow_ctx(newsk); +- new_mptcp_sock = subflow->conn; +- +- /* is_mptcp should be false if subflow->conn is missing, see +- * subflow_syn_recv_sock() +- */ +- if (WARN_ON_ONCE(!new_mptcp_sock)) { +- tcp_sk(newsk)->is_mptcp = 0; +- goto out; +- } +- +- newsk = new_mptcp_sock; +- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); +- } else { +- MPTCP_INC_STATS(sock_net(ssk), +- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); +- } +- +-out: +- newsk->sk_kern_sock = kern; +- return newsk; +-} +- + void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) + { + struct mptcp_subflow_context *subflow, *tmp; +@@ -3802,7 +3750,6 @@ static struct proto mptcp_prot = { + .connect = mptcp_connect, + .disconnect = mptcp_disconnect, + .close = mptcp_close, +- .accept = mptcp_accept, + .setsockopt = mptcp_setsockopt, + .getsockopt = mptcp_getsockopt, + .shutdown = mptcp_shutdown, +@@ -3912,18 +3859,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, + if (!ssk) + return -EINVAL; + +- newsk = mptcp_accept(ssk, flags, &err, kern); ++ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); ++ newsk = inet_csk_accept(ssk, flags, &err, kern); + if (!newsk) + return err; + +- lock_sock(newsk); +- +- __inet_accept(sock, newsock, newsk); +- if (!mptcp_is_tcpsk(newsock->sk)) { +- struct mptcp_sock *msk = mptcp_sk(newsk); ++ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); ++ if (sk_is_mptcp(newsk)) { + struct mptcp_subflow_context *subflow; ++ struct sock *new_mptcp_sock; ++ ++ subflow = mptcp_subflow_ctx(newsk); ++ new_mptcp_sock = subflow->conn; ++ ++ /* is_mptcp should be false if subflow->conn is missing, see ++ * subflow_syn_recv_sock() ++ */ ++ if (WARN_ON_ONCE(!new_mptcp_sock)) { ++ tcp_sk(newsk)->is_mptcp = 0; ++ goto tcpfallback; ++ } ++ ++ newsk = new_mptcp_sock; ++ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); ++ ++ newsk->sk_kern_sock = kern; ++ lock_sock(newsk); ++ __inet_accept(sock, newsock, newsk); + + set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); ++ msk = mptcp_sk(newsk); + msk->in_accept_queue = 0; + + /* set ssk->sk_socket of accept()ed flows to mptcp socket. +@@ -3945,6 +3910,19 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, + if (unlikely(list_is_singular(&msk->conn_list))) + mptcp_set_state(newsk, TCP_CLOSE); + } ++ } else { ++tcpfallback: ++ newsk->sk_kern_sock = kern; ++ lock_sock(newsk); ++ __inet_accept(sock, newsock, newsk); ++ /* we are being invoked after accepting a non-mp-capable ++ * flow: sk is a tcp_sk, not an mptcp one. ++ * ++ * Hand the socket over to tcp so all further socket ops ++ * bypass mptcp. ++ */ ++ WRITE_ONCE(newsock->sk->sk_socket->ops, ++ mptcp_fallback_tcp_ops(newsock->sk)); + } + release_sock(newsk); + +diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c +index ab41700bee688..23ee96c6abcbf 100644 +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -905,6 +905,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, + return child; + + fallback: ++ if (fallback) ++ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); + mptcp_subflow_drop_ctx(child); + return child; + } +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index f10419ba6e0bd..2a4649df8f086 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -1200,6 +1200,26 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) + #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ + __NFT_TABLE_F_WAS_AWAKEN) + ++static bool nft_table_pending_update(const struct nft_ctx *ctx) ++{ ++ struct nftables_pernet *nft_net = nft_pernet(ctx->net); ++ struct nft_trans *trans; ++ ++ if (ctx->table->flags & __NFT_TABLE_F_UPDATE) ++ return true; ++ ++ list_for_each_entry(trans, &nft_net->commit_list, list) { ++ if (trans->ctx.table == ctx->table && ++ ((trans->msg_type == NFT_MSG_NEWCHAIN && ++ nft_trans_chain_update(trans)) || ++ (trans->msg_type == NFT_MSG_DELCHAIN && ++ nft_is_base_chain(trans->ctx.chain)))) ++ return true; ++ } ++ ++ return false; ++} ++ + static int nf_tables_updtable(struct nft_ctx *ctx) + { + struct nft_trans *trans; +@@ -1223,7 +1243,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) + return -EOPNOTSUPP; + + /* No dormant off/on/off/on games in single transaction */ +- if (ctx->table->flags & __NFT_TABLE_F_UPDATE) ++ if (nft_table_pending_update(ctx)) + return -EINVAL; + + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, +@@ -2420,6 +2440,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, + struct nft_stats __percpu *stats = NULL; + struct nft_chain_hook hook = {}; + ++ if (table->flags & __NFT_TABLE_F_UPDATE) ++ return -EINVAL; ++ + if (flags & NFT_CHAIN_BINDING) + return -EOPNOTSUPP; + +@@ -2621,6 +2644,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, + } + } + ++ if (table->flags & __NFT_TABLE_F_UPDATE && ++ !list_empty(&hook.list)) { ++ NL_SET_BAD_ATTR(extack, attr); ++ err = -EOPNOTSUPP; ++ goto err_hooks; ++ } ++ + if (!(table->flags & NFT_TABLE_F_DORMANT) && + nft_is_base_chain(chain) && + !list_empty(&hook.list)) { +@@ -2850,6 +2880,9 @@ static int nft_delchain_hook(struct nft_ctx *ctx, + struct nft_trans *trans; + int err; + ++ if (ctx->table->flags & __NFT_TABLE_F_UPDATE) ++ return -EOPNOTSUPP; ++ + err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook, + ctx->family, chain->flags, extack); + if (err < 0) +@@ -2934,7 +2967,8 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); + + if (nla[NFTA_CHAIN_HOOK]) { +- if (chain->flags & NFT_CHAIN_HW_OFFLOAD) ++ if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN || ++ chain->flags & NFT_CHAIN_HW_OFFLOAD) + return -EOPNOTSUPP; + + if (nft_is_base_chain(chain)) { +@@ -8134,11 +8168,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx, + return err; + } + ++/* call under rcu_read_lock */ + static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) + { + const struct nf_flowtable_type *type; + +- list_for_each_entry(type, &nf_tables_flowtables, list) { ++ list_for_each_entry_rcu(type, &nf_tables_flowtables, list) { + if (family == type->family) + return type; + } +@@ -8150,9 +8185,13 @@ nft_flowtable_type_get(struct net *net, u8 family) + { + const struct nf_flowtable_type *type; + ++ rcu_read_lock(); + type = __nft_flowtable_type_get(family); +- if (type != NULL && try_module_get(type->owner)) ++ if (type != NULL && try_module_get(type->owner)) { ++ rcu_read_unlock(); + return type; ++ } ++ rcu_read_unlock(); + + lockdep_nfnl_nft_mutex_not_held(); + #ifdef CONFIG_MODULES +@@ -10053,9 +10092,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) + if (nft_trans_chain_update(trans)) { + nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + &nft_trans_chain_hooks(trans)); +- nft_netdev_unregister_hooks(net, +- &nft_trans_chain_hooks(trans), +- true); ++ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { ++ nft_netdev_unregister_hooks(net, ++ &nft_trans_chain_hooks(trans), ++ true); ++ } + } else { + nft_chain_del(trans->ctx.chain); + nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, +@@ -10294,10 +10335,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); + struct nft_trans_elem *te; ++ int err = 0; + + if (action == NFNL_ABORT_VALIDATE && + nf_tables_validate(net) < 0) +- return -EAGAIN; ++ err = -EAGAIN; + + list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, + list) { +@@ -10327,9 +10369,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + break; + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain_update(trans)) { +- nft_netdev_unregister_hooks(net, +- &nft_trans_chain_hooks(trans), +- true); ++ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { ++ nft_netdev_unregister_hooks(net, ++ &nft_trans_chain_hooks(trans), ++ true); ++ } + free_percpu(nft_trans_chain_stats(trans)); + kfree(nft_trans_chain_name(trans)); + nft_trans_destroy(trans); +@@ -10483,12 +10527,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + nf_tables_abort_release(trans); + } + +- if (action == NFNL_ABORT_AUTOLOAD) +- nf_tables_module_autoload(net); +- else +- nf_tables_module_autoload_cleanup(net); +- +- return 0; ++ return err; + } + + static int nf_tables_abort(struct net *net, struct sk_buff *skb, +@@ -10501,6 +10540,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, + gc_seq = nft_gc_seq_begin(nft_net); + ret = __nf_tables_abort(net, action); + nft_gc_seq_end(nft_net, gc_seq); ++ ++ WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); ++ ++ /* module autoload needs to happen after GC sequence update because it ++ * temporarily releases and grabs mutex again. ++ */ ++ if (action == NFNL_ABORT_AUTOLOAD) ++ nf_tables_module_autoload(net); ++ else ++ nf_tables_module_autoload_cleanup(net); ++ + mutex_unlock(&nft_net->commit_mutex); + + return ret; +@@ -11301,9 +11351,10 @@ static void __net_exit nf_tables_exit_net(struct net *net) + + gc_seq = nft_gc_seq_begin(nft_net); + +- if (!list_empty(&nft_net->commit_list) || +- !list_empty(&nft_net->module_list)) +- __nf_tables_abort(net, NFNL_ABORT_NONE); ++ WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); ++ ++ if (!list_empty(&nft_net->module_list)) ++ nf_tables_module_autoload_cleanup(net); + + __nft_release_tables(net); + +@@ -11395,6 +11446,7 @@ static void __exit nf_tables_module_exit(void) + unregister_netdevice_notifier(&nf_tables_flowtable_notifier); + nft_chain_filter_fini(); + nft_chain_route_fini(); ++ nf_tables_trans_destroy_flush_work(); + unregister_pernet_subsys(&nf_tables_net_ops); + cancel_work_sync(&trans_gc_work); + cancel_work_sync(&trans_destroy_work); +diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c +index 12684d835cb53..772ddb5824d9e 100644 +--- a/net/nfc/nci/core.c ++++ b/net/nfc/nci/core.c +@@ -1516,6 +1516,11 @@ static void nci_rx_work(struct work_struct *work) + nfc_send_to_raw_sock(ndev->nfc_dev, skb, + RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); + ++ if (!nci_plen(skb->data)) { ++ kfree_skb(skb); ++ break; ++ } ++ + /* Process frame */ + switch (nci_mt(skb->data)) { + case NCI_MT_RSP_PKT: +diff --git a/net/rds/rdma.c b/net/rds/rdma.c +index a4e3c5de998be..00dbcd4d28e68 100644 +--- a/net/rds/rdma.c ++++ b/net/rds/rdma.c +@@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, + } + ret = PTR_ERR(trans_private); + /* Trigger connection so that its ready for the next retry */ +- if (ret == -ENODEV) ++ if (ret == -ENODEV && cp) + rds_conn_connect_if_down(cp->cp_conn); + goto out; + } +diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c +index dffa990a9629f..e34f1be151645 100644 +--- a/net/sched/act_skbmod.c ++++ b/net/sched/act_skbmod.c +@@ -241,13 +241,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, + struct tcf_skbmod *d = to_skbmod(a); + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbmod_params *p; +- struct tc_skbmod opt = { +- .index = d->tcf_index, +- .refcnt = refcount_read(&d->tcf_refcnt) - ref, +- .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, +- }; ++ struct tc_skbmod opt; + struct tcf_t t; + ++ memset(&opt, 0, sizeof(opt)); ++ opt.index = d->tcf_index; ++ opt.refcnt = refcount_read(&d->tcf_refcnt) - ref, ++ opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; + spin_lock_bh(&d->tcf_lock); + opt.action = d->tcf_action; + p = rcu_dereference_protected(d->skbmod_p, +diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c +index e9eaf637220e9..5f25a2595add5 100644 +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -809,7 +809,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) + notify = !sch->q.qlen && !WARN_ON_ONCE(!n && + !qdisc_is_offloaded); + /* TODO: perform the search on a per txq basis */ +- sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); ++ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); + if (sch == NULL) { + WARN_ON_ONCE(parentid != TC_H_ROOT); + break; +diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c +index e0ce4276274be..933e12e3a55c7 100644 +--- a/net/sunrpc/svcsock.c ++++ b/net/sunrpc/svcsock.c +@@ -1216,15 +1216,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) + * MSG_SPLICE_PAGES is used exclusively to reduce the number of + * copy operations in this path. Therefore the caller must ensure + * that the pages backing @xdr are unchanging. +- * +- * Note that the send is non-blocking. The caller has incremented +- * the reference count on each page backing the RPC message, and +- * the network layer will "put" these pages when transmission is +- * complete. +- * +- * This is safe for our RPC services because the memory backing +- * the head and tail components is never kmalloc'd. These always +- * come from pages in the svc_rqst::rq_pages array. + */ + static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp, + rpc_fraghdr marker, unsigned int *sentp) +@@ -1254,6 +1245,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp, + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, + 1 + count, sizeof(marker) + rqstp->rq_res.len); + ret = sock_sendmsg(svsk->sk_sock, &msg); ++ page_frag_free(buf); + if (ret < 0) + return ret; + *sentp += ret; +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index acf5bb74fd386..df166f6afad82 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk, + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); + +- psock = sk_psock_get(sk); + err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); + if (err < 0) + return err; ++ psock = sk_psock_get(sk); + bpf_strp_enabled = sk_psock_strp_enabled(psock); + + /* If crypto failed the connection is broken */ +@@ -2152,12 +2152,15 @@ int tls_sw_recvmsg(struct sock *sk, + } + + /* Drain records from the rx_list & copy if required */ +- if (is_peek || is_kvec) ++ if (is_peek) + err = process_rx_list(ctx, msg, &control, copied + peeked, + decrypted - peeked, is_peek, NULL); + else + err = process_rx_list(ctx, msg, &control, 0, + async_copy_bytes, is_peek, NULL); ++ ++ /* we could have copied less than we wanted, and possibly nothing */ ++ decrypted += max(err, 0) - async_copy_bytes; + } + + copied += decrypted; +diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c +index a64bf601b480d..2925f5d27ad3f 100644 +--- a/net/vmw_vsock/virtio_transport.c ++++ b/net/vmw_vsock/virtio_transport.c +@@ -109,7 +109,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) + if (!skb) + break; + +- virtio_transport_deliver_tap_pkt(skb); + reply = virtio_vsock_skb_reply(skb); + + sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); +@@ -128,6 +127,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) + break; + } + ++ virtio_transport_deliver_tap_pkt(skb); ++ + if (reply) { + struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; + int val; +diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py +index 0669bac5e900e..3f899cc7e99a9 100755 +--- a/scripts/bpf_doc.py ++++ b/scripts/bpf_doc.py +@@ -414,8 +414,8 @@ class PrinterRST(Printer): + version = version.stdout.decode().rstrip() + except: + try: +- version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot, +- capture_output=True, check=True) ++ version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'], ++ cwd=linuxRoot, capture_output=True, check=True) + version = version.stdout.decode().rstrip() + except: + return 'Linux' +diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile +index c9e38ad937fd4..3c54125eb3733 100644 +--- a/scripts/mod/Makefile ++++ b/scripts/mod/Makefile +@@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO) + hostprogs-always-y += modpost mk_elfconfig + always-y += empty.o + +-modpost-objs := modpost.o file2alias.o sumversion.o ++modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o + + devicetable-offsets-file := devicetable-offsets.h + +@@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s + + # dependencies on generated files need to be listed explicitly + +-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h ++$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h + $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file) + + quiet_cmd_elfconfig = MKELF $@ +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index 5191fdbd3fa23..7d53942445d75 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -22,7 +22,6 @@ + #include <errno.h> + #include "modpost.h" + #include "../../include/linux/license.h" +-#include "../../include/linux/module_symbol.h" + + static bool module_enabled; + /* Are we using CONFIG_MODVERSIONS? */ +@@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename) + *p = TO_NATIVE(*p); + } + ++ symsearch_init(info); ++ + return 1; + } + + static void parse_elf_finish(struct elf_info *info) + { ++ symsearch_finish(info); + release_file(info->hdr, info->size); + } + +@@ -1042,75 +1044,16 @@ static int secref_whitelist(const char *fromsec, const char *fromsym, + return 1; + } + +-/* +- * If there's no name there, ignore it; likewise, ignore it if it's +- * one of the magic symbols emitted used by current tools. +- * +- * Otherwise if find_symbols_between() returns those symbols, they'll +- * fail the whitelist tests and cause lots of false alarms ... fixable +- * only by merging __exit and __init sections into __text, bloating +- * the kernel (which is especially evil on embedded platforms). +- */ +-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) +-{ +- const char *name = elf->strtab + sym->st_name; +- +- if (!name || !strlen(name)) +- return 0; +- return !is_mapping_symbol(name); +-} +- +-/* Look up the nearest symbol based on the section and the address */ +-static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr, +- unsigned int secndx, bool allow_negative, +- Elf_Addr min_distance) +-{ +- Elf_Sym *sym; +- Elf_Sym *near = NULL; +- Elf_Addr sym_addr, distance; +- bool is_arm = (elf->hdr->e_machine == EM_ARM); +- +- for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { +- if (get_secindex(elf, sym) != secndx) +- continue; +- if (!is_valid_name(elf, sym)) +- continue; +- +- sym_addr = sym->st_value; +- +- /* +- * For ARM Thumb instruction, the bit 0 of st_value is set +- * if the symbol is STT_FUNC type. Mask it to get the address. +- */ +- if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC) +- sym_addr &= ~1; +- +- if (addr >= sym_addr) +- distance = addr - sym_addr; +- else if (allow_negative) +- distance = sym_addr - addr; +- else +- continue; +- +- if (distance <= min_distance) { +- min_distance = distance; +- near = sym; +- } +- +- if (min_distance == 0) +- break; +- } +- return near; +-} +- + static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr, + unsigned int secndx) + { +- return find_nearest_sym(elf, addr, secndx, false, ~0); ++ return symsearch_find_nearest(elf, addr, secndx, false, ~0); + } + + static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) + { ++ Elf_Sym *new_sym; ++ + /* If the supplied symbol has a valid name, return it */ + if (is_valid_name(elf, sym)) + return sym; +@@ -1119,7 +1062,9 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym) + * Strive to find a better symbol name, but the resulting name may not + * match the symbol referenced in the original code. + */ +- return find_nearest_sym(elf, addr, get_secindex(elf, sym), true, 20); ++ new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym), ++ true, 20); ++ return new_sym ? new_sym : sym; + } + + static bool is_executable_section(struct elf_info *elf, unsigned int secndx) +diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h +index 5f94c2c9f2d95..6413f26fcb6b4 100644 +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -10,6 +10,7 @@ + #include <fcntl.h> + #include <unistd.h> + #include <elf.h> ++#include "../../include/linux/module_symbol.h" + + #include "list.h" + #include "elfconfig.h" +@@ -128,6 +129,8 @@ struct elf_info { + * take shndx from symtab_shndx_start[N] instead */ + Elf32_Word *symtab_shndx_start; + Elf32_Word *symtab_shndx_stop; ++ ++ struct symsearch *symsearch; + }; + + /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */ +@@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info, + return index; + } + ++/* ++ * If there's no name there, ignore it; likewise, ignore it if it's ++ * one of the magic symbols emitted used by current tools. ++ * ++ * Internal symbols created by tools should be ignored by modpost. ++ */ ++static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) ++{ ++ const char *name = elf->strtab + sym->st_name; ++ ++ if (!name || !strlen(name)) ++ return 0; ++ return !is_mapping_symbol(name); ++} ++ ++/* symsearch.c */ ++void symsearch_init(struct elf_info *elf); ++void symsearch_finish(struct elf_info *elf); ++Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr, ++ unsigned int secndx, bool allow_negative, ++ Elf_Addr min_distance); ++ + /* file2alias.c */ + void handle_moddevtable(struct module *mod, struct elf_info *info, + Elf_Sym *sym, const char *symname); +diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c +new file mode 100644 +index 0000000000000..aa4ed51f9960c +--- /dev/null ++++ b/scripts/mod/symsearch.c +@@ -0,0 +1,199 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++/* ++ * Helper functions for finding the symbol in an ELF which is "nearest" ++ * to a given address. ++ */ ++ ++#include "modpost.h" ++ ++struct syminfo { ++ unsigned int symbol_index; ++ unsigned int section_index; ++ Elf_Addr addr; ++}; ++ ++/* ++ * Container used to hold an entire binary search table. ++ * Entries in table are ascending, sorted first by section_index, ++ * then by addr, and last by symbol_index. The sorting by ++ * symbol_index is used to ensure predictable behavior when ++ * multiple symbols are present with the same address; all ++ * symbols past the first are effectively ignored, by eliding ++ * them in symsearch_fixup(). ++ */ ++struct symsearch { ++ unsigned int table_size; ++ struct syminfo table[]; ++}; ++ ++static int syminfo_compare(const void *s1, const void *s2) ++{ ++ const struct syminfo *sym1 = s1; ++ const struct syminfo *sym2 = s2; ++ ++ if (sym1->section_index > sym2->section_index) ++ return 1; ++ if (sym1->section_index < sym2->section_index) ++ return -1; ++ if (sym1->addr > sym2->addr) ++ return 1; ++ if (sym1->addr < sym2->addr) ++ return -1; ++ if (sym1->symbol_index > sym2->symbol_index) ++ return 1; ++ if (sym1->symbol_index < sym2->symbol_index) ++ return -1; ++ return 0; ++} ++ ++static unsigned int symbol_count(struct elf_info *elf) ++{ ++ unsigned int result = 0; ++ ++ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { ++ if (is_valid_name(elf, sym)) ++ result++; ++ } ++ return result; ++} ++ ++/* ++ * Populate the search array that we just allocated. ++ * Be slightly paranoid here. The ELF file is mmap'd and could ++ * conceivably change between symbol_count() and symsearch_populate(). ++ * If we notice any difference, bail out rather than potentially ++ * propagating errors or crashing. ++ */ ++static void symsearch_populate(struct elf_info *elf, ++ struct syminfo *table, ++ unsigned int table_size) ++{ ++ bool is_arm = (elf->hdr->e_machine == EM_ARM); ++ ++ for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { ++ if (is_valid_name(elf, sym)) { ++ if (table_size-- == 0) ++ fatal("%s: size mismatch\n", __func__); ++ table->symbol_index = sym - elf->symtab_start; ++ table->section_index = get_secindex(elf, sym); ++ table->addr = sym->st_value; ++ ++ /* ++ * For ARM Thumb instruction, the bit 0 of st_value is ++ * set if the symbol is STT_FUNC type. Mask it to get ++ * the address. ++ */ ++ if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC) ++ table->addr &= ~1; ++ ++ table++; ++ } ++ } ++ ++ if (table_size != 0) ++ fatal("%s: size mismatch\n", __func__); ++} ++ ++/* ++ * Do any fixups on the table after sorting. ++ * For now, this just finds adjacent entries which have ++ * the same section_index and addr, and it propagates ++ * the first symbol_index over the subsequent entries, ++ * so that only one symbol_index is seen for any given ++ * section_index and addr. This ensures that whether ++ * we're looking at an address from "above" or "below" ++ * that we see the same symbol_index. ++ * This does leave some duplicate entries in the table; ++ * in practice, these are a small fraction of the ++ * total number of entries, and they are harmless to ++ * the binary search algorithm other than a few occasional ++ * unnecessary comparisons. ++ */ ++static void symsearch_fixup(struct syminfo *table, unsigned int table_size) ++{ ++ /* Don't look at index 0, it will never change. */ ++ for (unsigned int i = 1; i < table_size; i++) { ++ if (table[i].addr == table[i - 1].addr && ++ table[i].section_index == table[i - 1].section_index) { ++ table[i].symbol_index = table[i - 1].symbol_index; ++ } ++ } ++} ++ ++void symsearch_init(struct elf_info *elf) ++{ ++ unsigned int table_size = symbol_count(elf); ++ ++ elf->symsearch = NOFAIL(malloc(sizeof(struct symsearch) + ++ sizeof(struct syminfo) * table_size)); ++ elf->symsearch->table_size = table_size; ++ ++ symsearch_populate(elf, elf->symsearch->table, table_size); ++ qsort(elf->symsearch->table, table_size, ++ sizeof(struct syminfo), syminfo_compare); ++ ++ symsearch_fixup(elf->symsearch->table, table_size); ++} ++ ++void symsearch_finish(struct elf_info *elf) ++{ ++ free(elf->symsearch); ++ elf->symsearch = NULL; ++} ++ ++/* ++ * Find the syminfo which is in secndx and "nearest" to addr. ++ * allow_negative: allow returning a symbol whose address is > addr. ++ * min_distance: ignore symbols which are further away than this. ++ * ++ * Returns a pointer into the symbol table for success. ++ * Returns NULL if no legal symbol is found within the requested range. ++ */ ++Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr, ++ unsigned int secndx, bool allow_negative, ++ Elf_Addr min_distance) ++{ ++ unsigned int hi = elf->symsearch->table_size; ++ unsigned int lo = 0; ++ struct syminfo *table = elf->symsearch->table; ++ struct syminfo target; ++ ++ target.addr = addr; ++ target.section_index = secndx; ++ target.symbol_index = ~0; /* compares greater than any actual index */ ++ while (hi > lo) { ++ unsigned int mid = lo + (hi - lo) / 2; /* Avoids overflow */ ++ ++ if (syminfo_compare(&table[mid], &target) > 0) ++ hi = mid; ++ else ++ lo = mid + 1; ++ } ++ ++ /* ++ * table[hi], if it exists, is the first entry in the array which ++ * lies beyond target. table[hi - 1], if it exists, is the last ++ * entry in the array which comes before target, including the ++ * case where it perfectly matches the section and the address. ++ * ++ * Note -- if the address we're looking up falls perfectly ++ * in the middle of two symbols, this is written to always ++ * prefer the symbol with the lower address. ++ */ ++ Elf_Sym *result = NULL; ++ ++ if (allow_negative && ++ hi < elf->symsearch->table_size && ++ table[hi].section_index == secndx && ++ table[hi].addr - addr <= min_distance) { ++ min_distance = table[hi].addr - addr; ++ result = &elf->symtab_start[table[hi].symbol_index]; ++ } ++ if (hi > 0 && ++ table[hi - 1].section_index == secndx && ++ addr - table[hi - 1].addr <= min_distance) { ++ result = &elf->symtab_start[table[hi - 1].symbol_index]; ++ } ++ return result; ++} +diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c +index 6fa640263216f..2c23a5a286086 100644 +--- a/security/selinux/selinuxfs.c ++++ b/security/selinux/selinuxfs.c +@@ -2135,7 +2135,6 @@ static struct file_system_type sel_fs_type = { + .kill_sb = sel_kill_sb, + }; + +-static struct vfsmount *selinuxfs_mount __ro_after_init; + struct path selinux_null __ro_after_init; + + static int __init init_sel_fs(void) +@@ -2157,18 +2156,21 @@ static int __init init_sel_fs(void) + return err; + } + +- selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type); +- if (IS_ERR(selinuxfs_mount)) { ++ selinux_null.mnt = kern_mount(&sel_fs_type); ++ if (IS_ERR(selinux_null.mnt)) { + pr_err("selinuxfs: could not mount!\n"); +- err = PTR_ERR(selinuxfs_mount); +- selinuxfs_mount = NULL; ++ err = PTR_ERR(selinux_null.mnt); ++ selinux_null.mnt = NULL; ++ return err; + } ++ + selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root, + &null_name); + if (IS_ERR(selinux_null.dentry)) { + pr_err("selinuxfs: could not lookup null!\n"); + err = PTR_ERR(selinux_null.dentry); + selinux_null.dentry = NULL; ++ return err; + } + + return err; +diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c +index d36234b88fb42..941bfbf812ed3 100644 +--- a/sound/pci/emu10k1/emu10k1_callback.c ++++ b/sound/pci/emu10k1/emu10k1_callback.c +@@ -255,7 +255,7 @@ lookup_voices(struct snd_emux *emu, struct snd_emu10k1 *hw, + /* check if sample is finished playing (non-looping only) */ + if (bp != best + V_OFF && bp != best + V_FREE && + (vp->reg.sample_mode & SNDRV_SFNT_SAMPLE_SINGLESHOT)) { +- val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch) - 64; ++ val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch); + if (val >= vp->reg.loopstart) + bp = best + V_OFF; + } +@@ -362,7 +362,7 @@ start_voice(struct snd_emux_voice *vp) + + map = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); + +- addr = vp->reg.start + 64; ++ addr = vp->reg.start; + temp = vp->reg.parm.filterQ; + ccca = (temp << 28) | addr; + if (vp->apitch < 0xe400) +@@ -430,9 +430,6 @@ start_voice(struct snd_emux_voice *vp) + /* Q & current address (Q 4bit value, MSB) */ + CCCA, ccca, + +- /* cache */ +- CCR, REG_VAL_PUT(CCR_CACHEINVALIDSIZE, 64), +- + /* reset volume */ + VTFT, vtarget | vp->ftarget, + CVCF, vtarget | CVCF_CURRENTFILTER_MASK, +diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c +index 7adc1d373d65c..27848d6469636 100644 +--- a/sound/pci/hda/cs35l56_hda.c ++++ b/sound/pci/hda/cs35l56_hda.c +@@ -978,14 +978,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int id) + pm_runtime_mark_last_busy(cs35l56->base.dev); + pm_runtime_enable(cs35l56->base.dev); + ++ cs35l56->base.init_done = true; ++ + ret = component_add(cs35l56->base.dev, &cs35l56_hda_comp_ops); + if (ret) { + dev_err(cs35l56->base.dev, "Register component failed: %d\n", ret); + goto pm_err; + } + +- cs35l56->base.init_done = true; +- + return 0; + + pm_err: +diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c +index 757a4d193e0fb..c31f60b0421e5 100644 +--- a/sound/pci/hda/cs35l56_hda_i2c.c ++++ b/sound/pci/hda/cs35l56_hda_i2c.c +@@ -49,10 +49,19 @@ static const struct i2c_device_id cs35l56_hda_i2c_id[] = { + {} + }; + ++static const struct acpi_device_id cs35l56_acpi_hda_match[] = { ++ { "CSC3554", 0 }, ++ { "CSC3556", 0 }, ++ { "CSC3557", 0 }, ++ {} ++}; ++MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match); ++ + static struct i2c_driver cs35l56_hda_i2c_driver = { + .driver = { +- .name = "cs35l56-hda", +- .pm = &cs35l56_hda_pm_ops, ++ .name = "cs35l56-hda", ++ .acpi_match_table = cs35l56_acpi_hda_match, ++ .pm = &cs35l56_hda_pm_ops, + }, + .id_table = cs35l56_hda_i2c_id, + .probe = cs35l56_hda_i2c_probe, +diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c +index 756aec342eab7..52c9e04b3c55f 100644 +--- a/sound/pci/hda/cs35l56_hda_spi.c ++++ b/sound/pci/hda/cs35l56_hda_spi.c +@@ -49,10 +49,19 @@ static const struct spi_device_id cs35l56_hda_spi_id[] = { + {} + }; + ++static const struct acpi_device_id cs35l56_acpi_hda_match[] = { ++ { "CSC3554", 0 }, ++ { "CSC3556", 0 }, ++ { "CSC3557", 0 }, ++ {} ++}; ++MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match); ++ + static struct spi_driver cs35l56_hda_spi_driver = { + .driver = { +- .name = "cs35l56-hda", +- .pm = &cs35l56_hda_pm_ops, ++ .name = "cs35l56-hda", ++ .acpi_match_table = cs35l56_acpi_hda_match, ++ .pm = &cs35l56_hda_pm_ops, + }, + .id_table = cs35l56_hda_spi_id, + .probe = cs35l56_hda_spi_probe, +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 0db9326b6f844..b1c2fb43cab69 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -10072,7 +10072,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), +- SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), ++ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC), + SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), + SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), +@@ -10302,6 +10302,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), ++ SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), +diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c +index a32c14a109b77..223238f662f83 100644 +--- a/sound/soc/amd/acp/acp-pci.c ++++ b/sound/soc/amd/acp/acp-pci.c +@@ -107,7 +107,10 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id + goto unregister_dmic_dev; + } + +- acp_init(chip); ++ ret = acp_init(chip); ++ if (ret) ++ goto unregister_dmic_dev; ++ + res = devm_kcalloc(&pci->dev, num_res, sizeof(struct resource), GFP_KERNEL); + if (!res) { + ret = -ENOMEM; +diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c +index e67c2e19cb1a7..1fdbef5fd6cba 100644 +--- a/sound/soc/codecs/rt5682-sdw.c ++++ b/sound/soc/codecs/rt5682-sdw.c +@@ -763,12 +763,12 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev) + return 0; + + if (!slave->unattach_request) { ++ mutex_lock(&rt5682->disable_irq_lock); + if (rt5682->disable_irq == true) { +- mutex_lock(&rt5682->disable_irq_lock); + sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); + rt5682->disable_irq = false; +- mutex_unlock(&rt5682->disable_irq_lock); + } ++ mutex_unlock(&rt5682->disable_irq_lock); + goto regmap_sync; + } + +diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c +index 935e597022d32..b8471b2d8f4f1 100644 +--- a/sound/soc/codecs/rt711-sdca-sdw.c ++++ b/sound/soc/codecs/rt711-sdca-sdw.c +@@ -438,13 +438,13 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev) + return 0; + + if (!slave->unattach_request) { ++ mutex_lock(&rt711->disable_irq_lock); + if (rt711->disable_irq == true) { +- mutex_lock(&rt711->disable_irq_lock); + sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0); + sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8); + rt711->disable_irq = false; +- mutex_unlock(&rt711->disable_irq_lock); + } ++ mutex_unlock(&rt711->disable_irq_lock); + goto regmap_sync; + } + +diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c +index 3f5773310ae8c..988451f24a756 100644 +--- a/sound/soc/codecs/rt711-sdw.c ++++ b/sound/soc/codecs/rt711-sdw.c +@@ -536,12 +536,12 @@ static int __maybe_unused rt711_dev_resume(struct device *dev) + return 0; + + if (!slave->unattach_request) { ++ mutex_lock(&rt711->disable_irq_lock); + if (rt711->disable_irq == true) { +- mutex_lock(&rt711->disable_irq_lock); + sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF); + rt711->disable_irq = false; +- mutex_unlock(&rt711->disable_irq_lock); + } ++ mutex_unlock(&rt711->disable_irq_lock); + goto regmap_sync; + } + +diff --git a/sound/soc/codecs/rt712-sdca-sdw.c b/sound/soc/codecs/rt712-sdca-sdw.c +index 6b644a89c5890..ba877432cea61 100644 +--- a/sound/soc/codecs/rt712-sdca-sdw.c ++++ b/sound/soc/codecs/rt712-sdca-sdw.c +@@ -438,13 +438,14 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev) + return 0; + + if (!slave->unattach_request) { ++ mutex_lock(&rt712->disable_irq_lock); + if (rt712->disable_irq == true) { +- mutex_lock(&rt712->disable_irq_lock); ++ + sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0); + sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8); + rt712->disable_irq = false; +- mutex_unlock(&rt712->disable_irq_lock); + } ++ mutex_unlock(&rt712->disable_irq_lock); + goto regmap_sync; + } + +diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c +index a38ec58622145..43a4e79e56966 100644 +--- a/sound/soc/codecs/rt722-sdca-sdw.c ++++ b/sound/soc/codecs/rt722-sdca-sdw.c +@@ -464,13 +464,13 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev) + return 0; + + if (!slave->unattach_request) { ++ mutex_lock(&rt722->disable_irq_lock); + if (rt722->disable_irq == true) { +- mutex_lock(&rt722->disable_irq_lock); + sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6); + sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8); + rt722->disable_irq = false; +- mutex_unlock(&rt722->disable_irq_lock); + } ++ mutex_unlock(&rt722->disable_irq_lock); + goto regmap_sync; + } + +diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c +index 72b90a7ee4b68..b9c20e29fe63e 100644 +--- a/sound/soc/codecs/wm_adsp.c ++++ b/sound/soc/codecs/wm_adsp.c +@@ -683,11 +683,12 @@ static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl) + int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, + unsigned int alg, void *buf, size_t len) + { +- struct cs_dsp_coeff_ctl *cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg); ++ struct cs_dsp_coeff_ctl *cs_ctl; + struct wm_coeff_ctl *ctl; + int ret; + + mutex_lock(&dsp->cs_dsp.pwr_lock); ++ cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg); + ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len); + mutex_unlock(&dsp->cs_dsp.pwr_lock); + +diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c +index 2d25748ca7066..b27e89ff6a167 100644 +--- a/sound/soc/soc-ops.c ++++ b/sound/soc/soc-ops.c +@@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol, + int max = mc->max; + int min = mc->min; + int sign_bit = mc->sign_bit; +- unsigned int mask = (1 << fls(max)) - 1; ++ unsigned int mask = (1ULL << fls(max)) - 1; + unsigned int invert = mc->invert; + int val; + int ret; +diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c +index 4c54ce212de6a..cc006d7038d97 100644 +--- a/sound/soc/sof/amd/acp.c ++++ b/sound/soc/sof/amd/acp.c +@@ -522,6 +522,10 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev) + goto unregister_dev; + } + ++ ret = acp_init(sdev); ++ if (ret < 0) ++ goto free_smn_dev; ++ + sdev->ipc_irq = pci->irq; + ret = request_threaded_irq(sdev->ipc_irq, acp_irq_handler, acp_irq_thread, + IRQF_SHARED, "AudioDSP", sdev); +@@ -531,10 +535,6 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev) + goto free_smn_dev; + } + +- ret = acp_init(sdev); +- if (ret < 0) +- goto free_ipc_irq; +- + sdev->dsp_box.offset = 0; + sdev->dsp_box.size = BOX_SIZE_512; + +diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h +index 798e60b5454b7..845a4023ba44e 100644 +--- a/tools/arch/x86/include/asm/cpufeatures.h ++++ b/tools/arch/x86/include/asm/cpufeatures.h +@@ -219,7 +219,7 @@ + #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ + #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ + #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +-#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */ ++#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ + #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ + #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ + #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ +diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py +index 897af958cee85..575b7e248e521 100755 +--- a/tools/net/ynl/ynl-gen-c.py ++++ b/tools/net/ynl/ynl-gen-c.py +@@ -198,8 +198,11 @@ class Type(SpecAttr): + presence = '' + for i in range(0, len(ref)): + presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}" +- if self.presence_type() == 'bit': +- code.append(presence + ' = 1;') ++ # Every layer below last is a nest, so we know it uses bit presence ++ # last layer is "self" and may be a complex type ++ if i == len(ref) - 1 and self.presence_type() != 'bit': ++ continue ++ code.append(presence + ' = 1;') + code += self._setter_lines(ri, member, presence) + + func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" +diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h +index c7fa61f0dff8d..0c603bec5e209 100644 +--- a/tools/testing/selftests/mm/vm_util.h ++++ b/tools/testing/selftests/mm/vm_util.h +@@ -3,7 +3,7 @@ + #include <stdbool.h> + #include <sys/mman.h> + #include <err.h> +-#include <string.h> /* ffsl() */ ++#include <strings.h> /* ffsl() */ + #include <unistd.h> /* _SC_PAGESIZE */ + + #define BIT_ULL(nr) (1ULL << (nr)) +diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh +index 3b971d1617d81..7647c74adb26c 100755 +--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh +@@ -1,6 +1,11 @@ + #!/bin/bash + # SPDX-License-Identifier: GPL-2.0 + ++# Double quotes to prevent globbing and word splitting is recommended in new ++# code but we accept it, especially because there were too many before having ++# address all other issues detected by shellcheck. ++#shellcheck disable=SC2086 ++ + . "$(dirname "${0}")/mptcp_lib.sh" + + time_start=$(date +%s) +@@ -13,7 +18,6 @@ sout="" + cin_disconnect="" + cin="" + cout="" +-ksft_skip=4 + capture=false + timeout_poll=30 + timeout_test=$((timeout_poll * 2 + 1)) +@@ -131,6 +135,8 @@ ns4="ns4-$rndh" + TEST_COUNT=0 + TEST_GROUP="" + ++# This function is used in the cleanup trap ++#shellcheck disable=SC2317 + cleanup() + { + rm -f "$cin_disconnect" "$cout_disconnect" +@@ -225,8 +231,9 @@ set_ethtool_flags() { + local dev="$2" + local flags="$3" + +- ip netns exec $ns ethtool -K $dev $flags 2>/dev/null +- [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags" ++ if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then ++ echo "INFO: set $ns dev $dev: ethtool -K $flags" ++ fi + } + + set_random_ethtool_flags() { +@@ -363,7 +370,7 @@ do_transfer() + local extra_args="$7" + + local port +- port=$((10000+$TEST_COUNT)) ++ port=$((10000+TEST_COUNT)) + TEST_COUNT=$((TEST_COUNT+1)) + + if [ "$rcvbuf" -gt 0 ]; then +@@ -420,12 +427,20 @@ do_transfer() + nstat -n + fi + +- local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") +- local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") +- local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") +- local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") +- local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") +- local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") ++ local stat_synrx_last_l ++ local stat_ackrx_last_l ++ local stat_cookietx_last ++ local stat_cookierx_last ++ local stat_csum_err_s ++ local stat_csum_err_c ++ local stat_tcpfb_last_l ++ stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") ++ stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") ++ stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") ++ stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") ++ stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") ++ stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") ++ stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") + + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ +@@ -488,11 +503,18 @@ do_transfer() + check_transfer $cin $sout "file received by server" + rets=$? + +- local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") +- local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") +- local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") +- local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") +- local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") ++ local stat_synrx_now_l ++ local stat_ackrx_now_l ++ local stat_cookietx_now ++ local stat_cookierx_now ++ local stat_ooo_now ++ local stat_tcpfb_now_l ++ stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") ++ stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") ++ stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") ++ stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") ++ stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") ++ stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") + + expect_synrx=$((stat_synrx_last_l)) + expect_ackrx=$((stat_ackrx_last_l)) +@@ -501,8 +523,8 @@ do_transfer() + cookies=${cookies##*=} + + if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then +- expect_synrx=$((stat_synrx_last_l+$connect_per_transfer)) +- expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer)) ++ expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) ++ expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) + fi + + if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then +@@ -510,7 +532,7 @@ do_transfer() + "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 + retc=1 + fi +- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then ++ if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ooo_now} -eq 0 ]; then + printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ + "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 +@@ -521,18 +543,20 @@ do_transfer() + fi + + if $checksum; then +- local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") +- local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") ++ local csum_err_s ++ local csum_err_c ++ csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") ++ csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + + local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) + if [ $csum_err_s_nr -gt 0 ]; then +- printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]" ++ printf "[ FAIL ]\nserver got %d data checksum error[s]" ${csum_err_s_nr} + rets=1 + fi + + local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) + if [ $csum_err_c_nr -gt 0 ]; then +- printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]" ++ printf "[ FAIL ]\nclient got %d data checksum error[s]" ${csum_err_c_nr} + retc=1 + fi + fi +@@ -544,6 +568,11 @@ do_transfer() + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + fi + ++ if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then ++ mptcp_lib_pr_fail "unexpected fallback to TCP" ++ rets=1 ++ fi ++ + if [ $cookies -eq 2 ];then + if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then + printf " WARN: CookieSent: did not advance" +@@ -701,7 +730,7 @@ run_test_transparent() + return + fi + +-ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" ++ if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" + flush ruleset + table inet mangle { + chain divert { +@@ -712,7 +741,7 @@ table inet mangle { + } + } + EOF +- if [ $? -ne 0 ]; then ++ then + echo "SKIP: $msg, could not load nft ruleset" + mptcp_lib_fail_if_expected_feature "nft rules" + mptcp_lib_result_skip "${TEST_GROUP}" +@@ -727,8 +756,7 @@ EOF + local_addr="0.0.0.0" + fi + +- ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100 +- if [ $? -ne 0 ]; then ++ if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then + ip netns exec "$listener_ns" nft flush ruleset + echo "SKIP: $msg, ip $r6flag rule failed" + mptcp_lib_fail_if_expected_feature "ip rule" +@@ -736,8 +764,7 @@ EOF + return + fi + +- ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100 +- if [ $? -ne 0 ]; then ++ if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then + ip netns exec "$listener_ns" nft flush ruleset + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 + echo "SKIP: $msg, ip route add local $local_addr failed" +@@ -900,7 +927,7 @@ stop_if_error "Could not even run ping tests" + echo -n "INFO: Using loss of $tc_loss " + test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " + +-reorder_delay=$(($tc_delay / 4)) ++reorder_delay=$((tc_delay / 4)) + + if [ -z "${tc_reorder}" ]; then + reorder1=$((RANDOM%10)) +diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh +index 34c3423469679..00cf4efac4c21 100755 +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -796,7 +796,7 @@ pm_nl_check_endpoint() + [ -n "$_flags" ]; flags="flags $_flags" + shift + elif [ $1 = "dev" ]; then +- [ -n "$2" ]; dev="dev $1" ++ [ -n "$2" ]; dev="dev $2" + shift + elif [ $1 = "id" ]; then + _id=$2 +@@ -3507,6 +3507,8 @@ endpoint_tests() + local tests_pid=$! + + wait_mpj $ns2 ++ pm_nl_check_endpoint "creation" \ ++ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + +diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c +index 7c5b12664b03b..bfb07dc495186 100644 +--- a/tools/testing/selftests/net/reuseaddr_conflict.c ++++ b/tools/testing/selftests/net/reuseaddr_conflict.c +@@ -109,6 +109,6 @@ int main(void) + fd1 = open_port(0, 1); + if (fd1 >= 0) + error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); +- fprintf(stderr, "Success"); ++ fprintf(stderr, "Success\n"); + return 0; + } +diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh +index 31e5f0f8859d1..be8e66abc74e1 100755 +--- a/tools/testing/selftests/net/test_vxlan_mdb.sh ++++ b/tools/testing/selftests/net/test_vxlan_mdb.sh +@@ -984,6 +984,7 @@ encap_params_common() + local plen=$1; shift + local enc_ethtype=$1; shift + local grp=$1; shift ++ local grp_dmac=$1; shift + local src=$1; shift + local mz=$1; shift + +@@ -1002,11 +1003,11 @@ encap_params_common() + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - match" + +- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - no match" + +@@ -1019,20 +1020,20 @@ encap_params_common() + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - match" + +- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - no match" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" +- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - match" + +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - no match" + +@@ -1045,11 +1046,11 @@ encap_params_common() + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - match" + +- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - no match" + +@@ -1057,11 +1058,11 @@ encap_params_common() + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - match" + +- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - no match" + +@@ -1079,6 +1080,7 @@ encap_params_ipv4_ipv4() + local plen=32 + local enc_ethtype="ip" + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local src=192.0.2.129 + + echo +@@ -1086,7 +1088,7 @@ encap_params_ipv4_ipv4() + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ +- $grp $src "mausezahn" ++ $grp $grp_dmac $src "mausezahn" + } + + encap_params_ipv6_ipv4() +@@ -1098,6 +1100,7 @@ encap_params_ipv6_ipv4() + local plen=32 + local enc_ethtype="ip" + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local src=2001:db8:100::1 + + echo +@@ -1105,7 +1108,7 @@ encap_params_ipv6_ipv4() + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ +- $grp $src "mausezahn -6" ++ $grp $grp_dmac $src "mausezahn -6" + } + + encap_params_ipv4_ipv6() +@@ -1117,6 +1120,7 @@ encap_params_ipv4_ipv6() + local plen=128 + local enc_ethtype="ipv6" + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local src=192.0.2.129 + + echo +@@ -1124,7 +1128,7 @@ encap_params_ipv4_ipv6() + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ +- $grp $src "mausezahn" ++ $grp $grp_dmac $src "mausezahn" + } + + encap_params_ipv6_ipv6() +@@ -1136,6 +1140,7 @@ encap_params_ipv6_ipv6() + local plen=128 + local enc_ethtype="ipv6" + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local src=2001:db8:100::1 + + echo +@@ -1143,7 +1148,7 @@ encap_params_ipv6_ipv6() + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ +- $grp $src "mausezahn -6" ++ $grp $grp_dmac $src "mausezahn -6" + } + + starg_exclude_ir_common() +@@ -1154,6 +1159,7 @@ starg_exclude_ir_common() + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift ++ local grp_dmac=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift +@@ -1175,14 +1181,14 @@ starg_exclude_ir_common() + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 +@@ -1192,14 +1198,14 @@ starg_exclude_ir_common() + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 +@@ -1214,6 +1220,7 @@ starg_exclude_ir_ipv4_ipv4() + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + +@@ -1222,7 +1229,7 @@ starg_exclude_ir_ipv4_ipv4() + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ +- $valid_src $invalid_src "mausezahn" ++ $grp_dmac $valid_src $invalid_src "mausezahn" + } + + starg_exclude_ir_ipv6_ipv4() +@@ -1233,6 +1240,7 @@ starg_exclude_ir_ipv6_ipv4() + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + +@@ -1241,7 +1249,7 @@ starg_exclude_ir_ipv6_ipv4() + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ +- $valid_src $invalid_src "mausezahn -6" ++ $grp_dmac $valid_src $invalid_src "mausezahn -6" + } + + starg_exclude_ir_ipv4_ipv6() +@@ -1252,6 +1260,7 @@ starg_exclude_ir_ipv4_ipv6() + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + +@@ -1260,7 +1269,7 @@ starg_exclude_ir_ipv4_ipv6() + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ +- $valid_src $invalid_src "mausezahn" ++ $grp_dmac $valid_src $invalid_src "mausezahn" + } + + starg_exclude_ir_ipv6_ipv6() +@@ -1271,6 +1280,7 @@ starg_exclude_ir_ipv6_ipv6() + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + +@@ -1279,7 +1289,7 @@ starg_exclude_ir_ipv6_ipv6() + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ +- $valid_src $invalid_src "mausezahn -6" ++ $grp_dmac $valid_src $invalid_src "mausezahn -6" + } + + starg_include_ir_common() +@@ -1290,6 +1300,7 @@ starg_include_ir_common() + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift ++ local grp_dmac=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift +@@ -1311,14 +1322,14 @@ starg_include_ir_common() + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 +@@ -1328,14 +1339,14 @@ starg_include_ir_common() + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 +@@ -1350,6 +1361,7 @@ starg_include_ir_ipv4_ipv4() + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + +@@ -1358,7 +1370,7 @@ starg_include_ir_ipv4_ipv4() + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ +- $valid_src $invalid_src "mausezahn" ++ $grp_dmac $valid_src $invalid_src "mausezahn" + } + + starg_include_ir_ipv6_ipv4() +@@ -1369,6 +1381,7 @@ starg_include_ir_ipv6_ipv4() + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + +@@ -1377,7 +1390,7 @@ starg_include_ir_ipv6_ipv4() + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ +- $valid_src $invalid_src "mausezahn -6" ++ $grp_dmac $valid_src $invalid_src "mausezahn -6" + } + + starg_include_ir_ipv4_ipv6() +@@ -1388,6 +1401,7 @@ starg_include_ir_ipv4_ipv6() + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + +@@ -1396,7 +1410,7 @@ starg_include_ir_ipv4_ipv6() + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ +- $valid_src $invalid_src "mausezahn" ++ $grp_dmac $valid_src $invalid_src "mausezahn" + } + + starg_include_ir_ipv6_ipv6() +@@ -1407,6 +1421,7 @@ starg_include_ir_ipv6_ipv6() + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + +@@ -1415,7 +1430,7 @@ starg_include_ir_ipv6_ipv6() + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ +- $valid_src $invalid_src "mausezahn -6" ++ $grp_dmac $valid_src $invalid_src "mausezahn -6" + } + + starg_exclude_p2mp_common() +@@ -1425,6 +1440,7 @@ starg_exclude_p2mp_common() + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift ++ local grp_dmac=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift +@@ -1442,12 +1458,12 @@ starg_exclude_p2mp_common() + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + +@@ -1455,7 +1471,7 @@ starg_exclude_p2mp_common() + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" + } +@@ -1467,6 +1483,7 @@ starg_exclude_p2mp_ipv4_ipv4() + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + +@@ -1474,7 +1491,7 @@ starg_exclude_p2mp_ipv4_ipv4() + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + +- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ ++ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ + $valid_src $invalid_src "mausezahn" + } + +@@ -1485,6 +1502,7 @@ starg_exclude_p2mp_ipv6_ipv4() + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + +@@ -1492,7 +1510,7 @@ starg_exclude_p2mp_ipv6_ipv4() + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + +- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ ++ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ + $valid_src $invalid_src "mausezahn -6" + } + +@@ -1503,6 +1521,7 @@ starg_exclude_p2mp_ipv4_ipv6() + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + +@@ -1510,7 +1529,7 @@ starg_exclude_p2mp_ipv4_ipv6() + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + +- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ ++ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ + $valid_src $invalid_src "mausezahn" + } + +@@ -1521,6 +1540,7 @@ starg_exclude_p2mp_ipv6_ipv6() + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + +@@ -1528,7 +1548,7 @@ starg_exclude_p2mp_ipv6_ipv6() + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + +- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ ++ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ + $valid_src $invalid_src "mausezahn -6" + } + +@@ -1539,6 +1559,7 @@ starg_include_p2mp_common() + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift ++ local grp_dmac=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift +@@ -1556,12 +1577,12 @@ starg_include_p2mp_common() + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + +@@ -1569,7 +1590,7 @@ starg_include_p2mp_common() + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" + } +@@ -1581,6 +1602,7 @@ starg_include_p2mp_ipv4_ipv4() + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + +@@ -1588,7 +1610,7 @@ starg_include_p2mp_ipv4_ipv4() + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + +- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ ++ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ + $valid_src $invalid_src "mausezahn" + } + +@@ -1599,6 +1621,7 @@ starg_include_p2mp_ipv6_ipv4() + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + +@@ -1606,7 +1629,7 @@ starg_include_p2mp_ipv6_ipv4() + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + +- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ ++ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ + $valid_src $invalid_src "mausezahn -6" + } + +@@ -1617,6 +1640,7 @@ starg_include_p2mp_ipv4_ipv6() + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + +@@ -1624,7 +1648,7 @@ starg_include_p2mp_ipv4_ipv6() + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + +- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ ++ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ + $valid_src $invalid_src "mausezahn" + } + +@@ -1635,6 +1659,7 @@ starg_include_p2mp_ipv6_ipv6() + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + +@@ -1642,7 +1667,7 @@ starg_include_p2mp_ipv6_ipv6() + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + +- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ ++ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ + $valid_src $invalid_src "mausezahn -6" + } + +@@ -1654,6 +1679,7 @@ egress_vni_translation_common() + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift ++ local grp_dmac=$1; shift + local src=$1; shift + local mz=$1; shift + +@@ -1689,20 +1715,20 @@ egress_vni_translation_common() + # Make sure that packets sent from the first VTEP over VLAN 10 are + # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on + # the second VTEP, since it is configured as PVID. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - PVID configured" + + # Remove PVID flag from VLAN 4000 on the second VTEP and make sure + # packets are no longer received by the SVI interface. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - no PVID configured" + + # Reconfigure the PVID and make sure packets are received again. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 + log_test $? 0 "Egress VNI translation - PVID reconfigured" + } +@@ -1715,6 +1741,7 @@ egress_vni_translation_ipv4_ipv4() + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local src=192.0.2.129 + + echo +@@ -1722,7 +1749,7 @@ egress_vni_translation_ipv4_ipv4() + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ +- $src "mausezahn" ++ $grp_dmac $src "mausezahn" + } + + egress_vni_translation_ipv6_ipv4() +@@ -1733,6 +1760,7 @@ egress_vni_translation_ipv6_ipv4() + local plen=32 + local proto="ipv6" + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local src=2001:db8:100::1 + + echo +@@ -1740,7 +1768,7 @@ egress_vni_translation_ipv6_ipv4() + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ +- $src "mausezahn -6" ++ $grp_dmac $src "mausezahn -6" + } + + egress_vni_translation_ipv4_ipv6() +@@ -1751,6 +1779,7 @@ egress_vni_translation_ipv4_ipv6() + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local src=192.0.2.129 + + echo +@@ -1758,7 +1787,7 @@ egress_vni_translation_ipv4_ipv6() + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ +- $src "mausezahn" ++ $grp_dmac $src "mausezahn" + } + + egress_vni_translation_ipv6_ipv6() +@@ -1769,6 +1798,7 @@ egress_vni_translation_ipv6_ipv6() + local plen=128 + local proto="ipv6" + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local src=2001:db8:100::1 + + echo +@@ -1776,7 +1806,7 @@ egress_vni_translation_ipv6_ipv6() + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ +- $src "mausezahn -6" ++ $grp_dmac $src "mausezahn -6" + } + + all_zeros_mdb_common() +@@ -1789,12 +1819,18 @@ all_zeros_mdb_common() + local vtep4_ip=$1; shift + local plen=$1; shift + local ipv4_grp=239.1.1.1 ++ local ipv4_grp_dmac=01:00:5e:01:01:01 + local ipv4_unreg_grp=239.2.2.2 ++ local ipv4_unreg_grp_dmac=01:00:5e:02:02:02 + local ipv4_ll_grp=224.0.0.100 ++ local ipv4_ll_grp_dmac=01:00:5e:00:00:64 + local ipv4_src=192.0.2.129 + local ipv6_grp=ff0e::1 ++ local ipv6_grp_dmac=33:33:00:00:00:01 + local ipv6_unreg_grp=ff0e::2 ++ local ipv6_unreg_grp_dmac=33:33:00:00:00:02 + local ipv6_ll_grp=ff02::1 ++ local ipv6_ll_grp_dmac=33:33:00:00:00:01 + local ipv6_src=2001:db8:100::1 + + # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic +@@ -1830,7 +1866,7 @@ all_zeros_mdb_common() + + # Send registered IPv4 multicast and make sure it only arrives to the + # first VTEP. +- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Registered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 +@@ -1838,7 +1874,7 @@ all_zeros_mdb_common() + + # Send unregistered IPv4 multicast that is not link-local and make sure + # it arrives to the first and second VTEPs. +- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Unregistered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 +@@ -1846,7 +1882,7 @@ all_zeros_mdb_common() + + # Send IPv4 link-local multicast traffic and make sure it does not + # arrive to any VTEP. +- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Link-local IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 +@@ -1881,7 +1917,7 @@ all_zeros_mdb_common() + + # Send registered IPv6 multicast and make sure it only arrives to the + # third VTEP. +- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 1 + log_test $? 0 "Registered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 0 +@@ -1889,7 +1925,7 @@ all_zeros_mdb_common() + + # Send unregistered IPv6 multicast that is not link-local and make sure + # it arrives to the third and fourth VTEPs. +- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Unregistered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 +@@ -1897,7 +1933,7 @@ all_zeros_mdb_common() + + # Send IPv6 link-local multicast traffic and make sure it does not + # arrive to any VTEP. +- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Link-local IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 +@@ -1972,6 +2008,7 @@ mdb_fdb_common() + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift ++ local grp_dmac=$1; shift + local src=$1; shift + local mz=$1; shift + +@@ -1995,7 +2032,7 @@ mdb_fdb_common() + + # Send IP multicast traffic and make sure it is forwarded by the MDB + # and only arrives to the first VTEP. +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 +@@ -2012,7 +2049,7 @@ mdb_fdb_common() + # Remove the MDB entry and make sure that IP multicast is now forwarded + # by the FDB to the second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" +- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" ++ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 2 +@@ -2028,14 +2065,15 @@ mdb_fdb_ipv4_ipv4() + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" + echo "------------------------------------------------------" + +- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ +- "mausezahn" ++ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ ++ $grp_dmac $src "mausezahn" + } + + mdb_fdb_ipv6_ipv4() +@@ -2047,14 +2085,15 @@ mdb_fdb_ipv6_ipv4() + local plen=32 + local proto="ipv6" + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" + echo "------------------------------------------------------" + +- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ +- "mausezahn -6" ++ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ ++ $grp_dmac $src "mausezahn -6" + } + + mdb_fdb_ipv4_ipv6() +@@ -2066,14 +2105,15 @@ mdb_fdb_ipv4_ipv6() + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 ++ local grp_dmac=01:00:5e:01:01:01 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" + echo "------------------------------------------------------" + +- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ +- "mausezahn" ++ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ ++ $grp_dmac $src "mausezahn" + } + + mdb_fdb_ipv6_ipv6() +@@ -2085,14 +2125,15 @@ mdb_fdb_ipv6_ipv6() + local plen=128 + local proto="ipv6" + local grp=ff0e::1 ++ local grp_dmac=33:33:00:00:00:01 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" + echo "------------------------------------------------------" + +- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ +- "mausezahn -6" ++ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ ++ $grp_dmac $src "mausezahn -6" + } + + mdb_grp1_loop() +@@ -2127,7 +2168,9 @@ mdb_torture_common() + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local grp1=$1; shift ++ local grp1_dmac=$1; shift + local grp2=$1; shift ++ local grp2_dmac=$1; shift + local src=$1; shift + local mz=$1; shift + local pid1 +@@ -2152,9 +2195,9 @@ mdb_torture_common() + pid1=$! + mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & + pid2=$! +- ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & ++ ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid3=$! +- ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & ++ ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid4=$! + + sleep 30 +@@ -2170,15 +2213,17 @@ mdb_torture_ipv4_ipv4() + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=239.1.1.1 ++ local grp1_dmac=01:00:5e:01:01:01 + local grp2=239.2.2.2 ++ local grp2_dmac=01:00:5e:02:02:02 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + +- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ +- "mausezahn" ++ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ ++ $grp2_dmac $src "mausezahn" + } + + mdb_torture_ipv6_ipv4() +@@ -2187,15 +2232,17 @@ mdb_torture_ipv6_ipv4() + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=ff0e::1 ++ local grp1_dmac=33:33:00:00:00:01 + local grp2=ff0e::2 ++ local grp2_dmac=33:33:00:00:00:02 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + +- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ +- "mausezahn -6" ++ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ ++ $grp2_dmac $src "mausezahn -6" + } + + mdb_torture_ipv4_ipv6() +@@ -2204,15 +2251,17 @@ mdb_torture_ipv4_ipv6() + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=239.1.1.1 ++ local grp1_dmac=01:00:5e:01:01:01 + local grp2=239.2.2.2 ++ local grp2_dmac=01:00:5e:02:02:02 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + +- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ +- "mausezahn" ++ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ ++ $grp2_dmac $src "mausezahn" + } + + mdb_torture_ipv6_ipv6() +@@ -2221,15 +2270,17 @@ mdb_torture_ipv6_ipv6() + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=ff0e::1 ++ local grp1_dmac=33:33:00:00:00:01 + local grp2=ff0e::2 ++ local grp2_dmac=33:33:00:00:00:02 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + +- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ +- "mausezahn -6" ++ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ ++ $grp2_dmac $src "mausezahn -6" + } + + ################################################################################ +diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh +index 9cd5e885e91f7..f4549e6894dd9 100755 +--- a/tools/testing/selftests/net/udpgro_fwd.sh ++++ b/tools/testing/selftests/net/udpgro_fwd.sh +@@ -241,7 +241,7 @@ for family in 4 6; do + + create_vxlan_pair + ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on +- run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 ++ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10 + cleanup + + # use NAT to circumvent GRO FWD check +@@ -254,13 +254,7 @@ for family in 4 6; do + # load arp cache before running the test to reduce the amount of + # stray traffic on top of the UDP tunnel + ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null +- run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST +- cleanup +- +- create_vxlan_pair +- run_bench "UDP tunnel fwd perf" $OL_NET$DST +- ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +- run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST ++ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST + cleanup + done + |