diff options
author | Mike Pagano <mpagano@gentoo.org> | 2024-08-19 06:23:57 -0400 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2024-08-19 06:23:57 -0400 |
commit | 93a63db9b68552114ee10dfe2295847e72609574 (patch) | |
tree | a6f3cbbefa3d588926ee33ebe68e5d3b2a3aa261 | |
parent | Remove redundant patch (diff) | |
download | linux-patches-6.6.tar.gz linux-patches-6.6.tar.bz2 linux-patches-6.6.zip |
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1046_linux-6.6.47.patch | 4211 |
2 files changed, 4215 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 5e05e9e5..ca76ad48 100644 --- a/0000_README +++ b/0000_README @@ -227,6 +227,10 @@ Patch: 1045_linux-6.6.46.patch From: https://www.kernel.org Desc: Linux 6.6.46 +Patch: 1046_linux-6.6.47.patch +From: https://www.kernel.org +Desc: Linux 6.6.47 + Patch: 1510_fs-enable-link-security-restrictions-by-default.patch From: http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch Desc: Enable link security restrictions by default. diff --git a/1046_linux-6.6.47.patch b/1046_linux-6.6.47.patch new file mode 100644 index 00000000..830bd4b9 --- /dev/null +++ b/1046_linux-6.6.47.patch @@ -0,0 +1,4211 @@ +diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst +index 74d64a30f50073..f9cd579496c9ce 100644 +--- a/Documentation/bpf/map_lpm_trie.rst ++++ b/Documentation/bpf/map_lpm_trie.rst +@@ -17,7 +17,7 @@ significant byte. + + LPM tries may be created with a maximum prefix length that is a multiple + of 8, in the range from 8 to 2048. The key used for lookup and update +-operations is a ``struct bpf_lpm_trie_key``, extended by ++operations is a ``struct bpf_lpm_trie_key_u8``, extended by + ``max_prefixlen/8`` bytes. + + - For IPv4 addresses the data length is 4 bytes +diff --git a/Documentation/mm/page_table_check.rst b/Documentation/mm/page_table_check.rst +index c12838ce6b8de2..c59f22eb6a0f9a 100644 +--- a/Documentation/mm/page_table_check.rst ++++ b/Documentation/mm/page_table_check.rst +@@ -14,7 +14,7 @@ Page table check performs extra verifications at the time when new pages become + accessible from the userspace by getting their page table entries (PTEs PMDs + etc.) added into the table. + +-In case of detected corruption, the kernel is crashed. There is a small ++In case of most detected corruption, the kernel is crashed. There is a small + performance and memory overhead associated with the page table check. Therefore, + it is disabled by default, but can be optionally enabled on systems where the + extra hardening outweighs the performance costs. Also, because page table check +@@ -22,6 +22,13 @@ is synchronous, it can help with debugging double map memory corruption issues, + by crashing kernel at the time wrong mapping occurs instead of later which is + often the case with memory corruptions bugs. + ++It can also be used to do page table entry checks over various flags, dump ++warnings when illegal combinations of entry flags are detected. Currently, ++userfaultfd is the only user of such to sanity check wr-protect bit against ++any writable flags. Illegal flag combinations will not directly cause data ++corruption in this case immediately, but that will cause read-only data to ++be writable, leading to corrupt when the page content is later modified. ++ + Double mapping detection logic + ============================== + +diff --git a/Makefile b/Makefile +index 77de99984c2f18..6b967e135c80f0 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 6 + PATCHLEVEL = 6 +-SUBLEVEL = 46 ++SUBLEVEL = 47 + EXTRAVERSION = + NAME = Hurr durr I'ma ninja sloth + +diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c +index 15aa9bad1c280b..ca0bf0b92ca09e 100644 +--- a/arch/arm64/kvm/hyp/pgtable.c ++++ b/arch/arm64/kvm/hyp/pgtable.c +@@ -523,7 +523,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, + + kvm_clear_pte(ctx->ptep); + dsb(ishst); +- __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); ++ __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), 0); + } else { + if (ctx->end - ctx->addr < granule) + return -EINVAL; +@@ -861,9 +861,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, + if (kvm_pte_valid(ctx->old)) { + kvm_clear_pte(ctx->ptep); + +- if (!stage2_unmap_defer_tlb_flush(pgt)) +- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, +- ctx->addr, ctx->level); ++ if (kvm_pte_table(ctx->old, ctx->level)) { ++ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ++ 0); ++ } else if (!stage2_unmap_defer_tlb_flush(pgt)) { ++ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ++ ctx->level); ++ } + } + + mm_ops->put_page(ctx->ptep); +diff --git a/arch/loongarch/include/uapi/asm/unistd.h b/arch/loongarch/include/uapi/asm/unistd.h +index fcb668984f0336..b344b1f917153b 100644 +--- a/arch/loongarch/include/uapi/asm/unistd.h ++++ b/arch/loongarch/include/uapi/asm/unistd.h +@@ -1,4 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#define __ARCH_WANT_NEW_STAT + #define __ARCH_WANT_SYS_CLONE + #define __ARCH_WANT_SYS_CLONE3 + +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index e02b179ec65989..d03fe4fb41f43c 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -387,23 +387,7 @@ static inline pte_t pte_wrprotect(pte_t pte) + #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP + static inline int pte_uffd_wp(pte_t pte) + { +- bool wp = pte_flags(pte) & _PAGE_UFFD_WP; +- +-#ifdef CONFIG_DEBUG_VM +- /* +- * Having write bit for wr-protect-marked present ptes is fatal, +- * because it means the uffd-wp bit will be ignored and write will +- * just go through. +- * +- * Use any chance of pgtable walking to verify this (e.g., when +- * page swapped out or being migrated for all purposes). It means +- * something is already wrong. Tell the admin even before the +- * process crashes. We also nail it with wrong pgtable setup. +- */ +- WARN_ON_ONCE(wp && pte_write(pte)); +-#endif +- +- return wp; ++ return pte_flags(pte) & _PAGE_UFFD_WP; + } + + static inline pte_t pte_mkuffd_wp(pte_t pte) +diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c +index 77dbd516a05463..277bf0e8ed0918 100644 +--- a/drivers/ata/libata-scsi.c ++++ b/drivers/ata/libata-scsi.c +@@ -941,8 +941,19 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) + &sense_key, &asc, &ascq); + ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq); + } else { +- /* ATA PASS-THROUGH INFORMATION AVAILABLE */ +- ata_scsi_set_sense(qc->dev, cmd, RECOVERED_ERROR, 0, 0x1D); ++ /* ++ * ATA PASS-THROUGH INFORMATION AVAILABLE ++ * ++ * Note: we are supposed to call ata_scsi_set_sense(), which ++ * respects the D_SENSE bit, instead of unconditionally ++ * generating the sense data in descriptor format. However, ++ * because hdparm, hddtemp, and udisks incorrectly assume sense ++ * data in descriptor format, without even looking at the ++ * RESPONSE CODE field in the returned sense data (to see which ++ * format the returned sense data is in), we are stuck with ++ * being bug compatible with older kernels. ++ */ ++ scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D); + } + } + +diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c +index 2776ca5fc33f39..b215b28cad7b76 100644 +--- a/drivers/isdn/mISDN/socket.c ++++ b/drivers/isdn/mISDN/socket.c +@@ -401,23 +401,23 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) + } + + static int data_sock_setsockopt(struct socket *sock, int level, int optname, +- sockptr_t optval, unsigned int len) ++ sockptr_t optval, unsigned int optlen) + { + struct sock *sk = sock->sk; + int err = 0, opt = 0; + + if (*debug & DEBUG_SOCKET) + printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock, +- level, optname, len); ++ level, optname, optlen); + + lock_sock(sk); + + switch (optname) { + case MISDN_TIME_STAMP: +- if (copy_from_sockptr(&opt, optval, sizeof(int))) { +- err = -EFAULT; ++ err = copy_safe_from_sockptr(&opt, sizeof(opt), ++ optval, optlen); ++ if (err) + break; +- } + + if (opt) + _pms(sk)->cmask |= MISDN_TIME_STAMP; +diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c +index 22d83ac18eb735..fbf58012becdf2 100644 +--- a/drivers/media/usb/dvb-usb/dvb-usb-init.c ++++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c +@@ -23,40 +23,11 @@ static int dvb_usb_force_pid_filter_usage; + module_param_named(force_pid_filter_usage, dvb_usb_force_pid_filter_usage, int, 0444); + MODULE_PARM_DESC(force_pid_filter_usage, "force all dvb-usb-devices to use a PID filter, if any (default: 0)."); + +-static int dvb_usb_check_bulk_endpoint(struct dvb_usb_device *d, u8 endpoint) +-{ +- if (endpoint) { +- int ret; +- +- ret = usb_pipe_type_check(d->udev, usb_sndbulkpipe(d->udev, endpoint)); +- if (ret) +- return ret; +- ret = usb_pipe_type_check(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); +- if (ret) +- return ret; +- } +- return 0; +-} +- +-static void dvb_usb_clear_halt(struct dvb_usb_device *d, u8 endpoint) +-{ +- if (endpoint) { +- usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, endpoint)); +- usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); +- } +-} +- + static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) + { + struct dvb_usb_adapter *adap; + int ret, n, o; + +- ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint); +- if (ret) +- return ret; +- ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint_response); +- if (ret) +- return ret; + for (n = 0; n < d->props.num_adapters; n++) { + adap = &d->adapter[n]; + adap->dev = d; +@@ -132,8 +103,10 @@ static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) + * when reloading the driver w/o replugging the device + * sometimes a timeout occurs, this helps + */ +- dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint); +- dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint_response); ++ if (d->props.generic_bulk_ctrl_endpoint != 0) { ++ usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); ++ usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); ++ } + + return 0; + +diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c +index ba8b6bd8233cad..96cca4ee470a4b 100644 +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -1007,26 +1007,21 @@ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, + struct sk_buff *skb; + int error = 0; + +- if (sk->sk_state & PPPOX_BOUND) { +- error = -EIO; +- goto end; +- } ++ if (sk->sk_state & PPPOX_BOUND) ++ return -EIO; + + skb = skb_recv_datagram(sk, flags, &error); +- if (error < 0) +- goto end; ++ if (!skb) ++ return error; + +- if (skb) { +- total_len = min_t(size_t, total_len, skb->len); +- error = skb_copy_datagram_msg(skb, 0, m, total_len); +- if (error == 0) { +- consume_skb(skb); +- return total_len; +- } ++ total_len = min_t(size_t, total_len, skb->len); ++ error = skb_copy_datagram_msg(skb, 0, m, total_len); ++ if (error == 0) { ++ consume_skb(skb); ++ return total_len; + } + + kfree_skb(skb); +-end: + return error; + } + +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index 0fc7aa78b2e5b9..2c3f55877a1134 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -2931,6 +2931,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) + return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND; + } + ++ /* ++ * NVMe SSD drops off the PCIe bus after system idle ++ * for 10 hours on a Lenovo N60z board. ++ */ ++ if (dmi_match(DMI_BOARD_NAME, "LXKT-ZXEG-N6")) ++ return NVME_QUIRK_NO_APST; ++ + return 0; + } + +diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c +index c26545d71d39a3..cd6d5bbb4b9df5 100644 +--- a/fs/binfmt_flat.c ++++ b/fs/binfmt_flat.c +@@ -72,8 +72,10 @@ + + #ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET + #define DATA_START_OFFSET_WORDS (0) ++#define MAX_SHARED_LIBS_UPDATE (0) + #else + #define DATA_START_OFFSET_WORDS (MAX_SHARED_LIBS) ++#define MAX_SHARED_LIBS_UPDATE (MAX_SHARED_LIBS) + #endif + + struct lib_info { +@@ -880,7 +882,7 @@ static int load_flat_binary(struct linux_binprm *bprm) + return res; + + /* Update data segment pointers for all libraries */ +- for (i = 0; i < MAX_SHARED_LIBS; i++) { ++ for (i = 0; i < MAX_SHARED_LIBS_UPDATE; i++) { + if (!libinfo.lib_list[i].loaded) + continue; + for (j = 0; j < MAX_SHARED_LIBS; j++) { +diff --git a/fs/buffer.c b/fs/buffer.c +index 12e9a71c693d74..ecd8b47507ff80 100644 +--- a/fs/buffer.c ++++ b/fs/buffer.c +@@ -2179,6 +2179,8 @@ static void __block_commit_write(struct folio *folio, size_t from, size_t to) + struct buffer_head *bh, *head; + + bh = head = folio_buffers(folio); ++ if (!bh) ++ return; + blocksize = bh->b_size; + + block_start = 0; +diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c +index 5ee7d7bbb361ce..2fbf97077ce910 100644 +--- a/fs/cramfs/inode.c ++++ b/fs/cramfs/inode.c +@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb) + sb->s_mtd = NULL; + } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { + sync_blockdev(sb->s_bdev); +- blkdev_put(sb->s_bdev, sb); ++ bdev_release(sb->s_bdev_handle); + } + kfree(sbi); + } +diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c +index d36b3963c0bf3c..aa59788a61e6e4 100644 +--- a/fs/erofs/decompressor.c ++++ b/fs/erofs/decompressor.c +@@ -248,15 +248,9 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, + if (ret != rq->outputsize) { + erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]", + ret, rq->inputsize, inputmargin, rq->outputsize); +- +- print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET, +- 16, 1, src + inputmargin, rq->inputsize, true); +- print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET, +- 16, 1, out, rq->outputsize, true); +- + if (ret >= 0) + memset(out + ret, 0, rq->outputsize - ret); +- ret = -EIO; ++ ret = -EFSCORRUPTED; + } else { + ret = 0; + } +diff --git a/fs/exec.c b/fs/exec.c +index 89a9017af7e86f..1cbbef281f8cfe 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1609,6 +1609,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) + unsigned int mode; + vfsuid_t vfsuid; + vfsgid_t vfsgid; ++ int err; + + if (!mnt_may_suid(file->f_path.mnt)) + return; +@@ -1625,12 +1626,17 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) + /* Be careful if suid/sgid is set */ + inode_lock(inode); + +- /* reload atomically mode/uid/gid now that lock held */ ++ /* Atomically reload and check mode/uid/gid now that lock held. */ + mode = inode->i_mode; + vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid = i_gid_into_vfsgid(idmap, inode); ++ err = inode_permission(idmap, inode, MAY_EXEC); + inode_unlock(inode); + ++ /* Did the exec bit vanish out from under us? Give up. */ ++ if (err) ++ return; ++ + /* We ignore suid/sgid if there are no mappings for them in the ns */ + if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) || + !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid)) +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index cef119a2476bb4..a4ffd1acac6514 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2966,23 +2966,29 @@ static int ext4_da_should_update_i_disksize(struct folio *folio, + + static int ext4_da_do_write_end(struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, +- struct page *page) ++ struct folio *folio) + { + struct inode *inode = mapping->host; + loff_t old_size = inode->i_size; + bool disksize_changed = false; + loff_t new_i_size; + ++ if (unlikely(!folio_buffers(folio))) { ++ folio_unlock(folio); ++ folio_put(folio); ++ return -EIO; ++ } + /* + * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES + * flag, which all that's needed to trigger page writeback. + */ +- copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL); ++ copied = block_write_end(NULL, mapping, pos, len, copied, ++ &folio->page, NULL); + new_i_size = pos + copied; + + /* +- * It's important to update i_size while still holding page lock, +- * because page writeout could otherwise come in and zero beyond ++ * It's important to update i_size while still holding folio lock, ++ * because folio writeout could otherwise come in and zero beyond + * i_size. + * + * Since we are holding inode lock, we are sure i_disksize <= +@@ -3000,14 +3006,14 @@ static int ext4_da_do_write_end(struct address_space *mapping, + + i_size_write(inode, new_i_size); + end = (new_i_size - 1) & (PAGE_SIZE - 1); +- if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) { ++ if (copied && ext4_da_should_update_i_disksize(folio, end)) { + ext4_update_i_disksize(inode, new_i_size); + disksize_changed = true; + } + } + +- unlock_page(page); +- put_page(page); ++ folio_unlock(folio); ++ folio_put(folio); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); +@@ -3046,10 +3052,10 @@ static int ext4_da_write_end(struct file *file, + return ext4_write_inline_data_end(inode, pos, len, copied, + folio); + +- if (unlikely(copied < len) && !PageUptodate(page)) ++ if (unlikely(copied < len) && !folio_test_uptodate(folio)) + copied = 0; + +- return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page); ++ return ext4_da_do_write_end(mapping, pos, len, copied, folio); + } + + /* +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index c58cbe9f7809c1..c368ff671d7739 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1571,46 +1571,49 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, + /* + * Add value of the EA in an inode. + */ +-static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode, +- const void *value, size_t value_len, +- struct inode **ret_inode) ++static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle, ++ struct inode *inode, const void *value, size_t value_len) + { + struct inode *ea_inode; + u32 hash; + int err; + ++ /* Account inode & space to quota even if sharing... */ ++ err = ext4_xattr_inode_alloc_quota(inode, value_len); ++ if (err) ++ return ERR_PTR(err); ++ + hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len); + ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash); + if (ea_inode) { + err = ext4_xattr_inode_inc_ref(handle, ea_inode); +- if (err) { +- iput(ea_inode); +- return err; +- } +- +- *ret_inode = ea_inode; +- return 0; ++ if (err) ++ goto out_err; ++ return ea_inode; + } + + /* Create an inode for the EA value */ + ea_inode = ext4_xattr_inode_create(handle, inode, hash); +- if (IS_ERR(ea_inode)) +- return PTR_ERR(ea_inode); ++ if (IS_ERR(ea_inode)) { ++ ext4_xattr_inode_free_quota(inode, NULL, value_len); ++ return ea_inode; ++ } + + err = ext4_xattr_inode_write(handle, ea_inode, value, value_len); + if (err) { + if (ext4_xattr_inode_dec_ref(handle, ea_inode)) + ext4_warning_inode(ea_inode, "cleanup dec ref error %d", err); +- iput(ea_inode); +- return err; ++ goto out_err; + } + + if (EA_INODE_CACHE(inode)) + mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash, + ea_inode->i_ino, true /* reusable */); +- +- *ret_inode = ea_inode; +- return 0; ++ return ea_inode; ++out_err: ++ iput(ea_inode); ++ ext4_xattr_inode_free_quota(inode, NULL, value_len); ++ return ERR_PTR(err); + } + + /* +@@ -1622,6 +1625,7 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode, + static int ext4_xattr_set_entry(struct ext4_xattr_info *i, + struct ext4_xattr_search *s, + handle_t *handle, struct inode *inode, ++ struct inode *new_ea_inode, + bool is_block) + { + struct ext4_xattr_entry *last, *next; +@@ -1629,7 +1633,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, + size_t min_offs = s->end - s->base, name_len = strlen(i->name); + int in_inode = i->in_inode; + struct inode *old_ea_inode = NULL; +- struct inode *new_ea_inode = NULL; + size_t old_size, new_size; + int ret; + +@@ -1714,43 +1717,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, + old_ea_inode = NULL; + goto out; + } +- } +- if (i->value && in_inode) { +- WARN_ON_ONCE(!i->value_len); +- +- ret = ext4_xattr_inode_alloc_quota(inode, i->value_len); +- if (ret) +- goto out; +- +- ret = ext4_xattr_inode_lookup_create(handle, inode, i->value, +- i->value_len, +- &new_ea_inode); +- if (ret) { +- new_ea_inode = NULL; +- ext4_xattr_inode_free_quota(inode, NULL, i->value_len); +- goto out; +- } +- } + +- if (old_ea_inode) { + /* We are ready to release ref count on the old_ea_inode. */ + ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode); +- if (ret) { +- /* Release newly required ref count on new_ea_inode. */ +- if (new_ea_inode) { +- int err; +- +- err = ext4_xattr_inode_dec_ref(handle, +- new_ea_inode); +- if (err) +- ext4_warning_inode(new_ea_inode, +- "dec ref new_ea_inode err=%d", +- err); +- ext4_xattr_inode_free_quota(inode, new_ea_inode, +- i->value_len); +- } ++ if (ret) + goto out; +- } + + ext4_xattr_inode_free_quota(inode, old_ea_inode, + le32_to_cpu(here->e_value_size)); +@@ -1874,7 +1845,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, + ret = 0; + out: + iput(old_ea_inode); +- iput(new_ea_inode); + return ret; + } + +@@ -1937,9 +1907,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + size_t old_ea_inode_quota = 0; + unsigned int ea_ino; + +- + #define header(x) ((struct ext4_xattr_header *)(x)) + ++ /* If we need EA inode, prepare it before locking the buffer */ ++ if (i->value && i->in_inode) { ++ WARN_ON_ONCE(!i->value_len); ++ ++ ea_inode = ext4_xattr_inode_lookup_create(handle, inode, ++ i->value, i->value_len); ++ if (IS_ERR(ea_inode)) { ++ error = PTR_ERR(ea_inode); ++ ea_inode = NULL; ++ goto cleanup; ++ } ++ } ++ + if (s->base) { + int offset = (char *)s->here - bs->bh->b_data; + +@@ -1948,6 +1930,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + EXT4_JTR_NONE); + if (error) + goto cleanup; ++ + lock_buffer(bs->bh); + + if (header(s->base)->h_refcount == cpu_to_le32(1)) { +@@ -1974,7 +1957,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + } + ea_bdebug(bs->bh, "modifying in-place"); + error = ext4_xattr_set_entry(i, s, handle, inode, +- true /* is_block */); ++ ea_inode, true /* is_block */); + ext4_xattr_block_csum_set(inode, bs->bh); + unlock_buffer(bs->bh); + if (error == -EFSCORRUPTED) +@@ -2042,29 +2025,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + s->end = s->base + sb->s_blocksize; + } + +- error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); ++ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode, ++ true /* is_block */); + if (error == -EFSCORRUPTED) + goto bad_block; + if (error) + goto cleanup; + +- if (i->value && s->here->e_value_inum) { +- /* +- * A ref count on ea_inode has been taken as part of the call to +- * ext4_xattr_set_entry() above. We would like to drop this +- * extra ref but we have to wait until the xattr block is +- * initialized and has its own ref count on the ea_inode. +- */ +- ea_ino = le32_to_cpu(s->here->e_value_inum); +- error = ext4_xattr_inode_iget(inode, ea_ino, +- le32_to_cpu(s->here->e_hash), +- &ea_inode); +- if (error) { +- ea_inode = NULL; +- goto cleanup; +- } +- } +- + inserted: + if (!IS_LAST_ENTRY(s->first)) { + new_bh = ext4_xattr_block_cache_find(inode, header(s->base), +@@ -2217,17 +2184,16 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + + cleanup: + if (ea_inode) { +- int error2; +- +- error2 = ext4_xattr_inode_dec_ref(handle, ea_inode); +- if (error2) +- ext4_warning_inode(ea_inode, "dec ref error=%d", +- error2); ++ if (error) { ++ int error2; + +- /* If there was an error, revert the quota charge. */ +- if (error) ++ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode); ++ if (error2) ++ ext4_warning_inode(ea_inode, "dec ref error=%d", ++ error2); + ext4_xattr_inode_free_quota(inode, ea_inode, + i_size_read(ea_inode)); ++ } + iput(ea_inode); + } + if (ce) +@@ -2285,14 +2251,38 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, + { + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_search *s = &is->s; ++ struct inode *ea_inode = NULL; + int error; + + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) + return -ENOSPC; + +- error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); +- if (error) ++ /* If we need EA inode, prepare it before locking the buffer */ ++ if (i->value && i->in_inode) { ++ WARN_ON_ONCE(!i->value_len); ++ ++ ea_inode = ext4_xattr_inode_lookup_create(handle, inode, ++ i->value, i->value_len); ++ if (IS_ERR(ea_inode)) ++ return PTR_ERR(ea_inode); ++ } ++ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode, ++ false /* is_block */); ++ if (error) { ++ if (ea_inode) { ++ int error2; ++ ++ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode); ++ if (error2) ++ ext4_warning_inode(ea_inode, "dec ref error=%d", ++ error2); ++ ++ ext4_xattr_inode_free_quota(inode, ea_inode, ++ i_size_read(ea_inode)); ++ iput(ea_inode); ++ } + return error; ++ } + header = IHDR(inode, ext4_raw_inode(&is->iloc)); + if (!IS_LAST_ENTRY(s->first)) { + header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); +@@ -2301,6 +2291,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, + header->h_magic = cpu_to_le32(0); + ext4_clear_inode_state(inode, EXT4_STATE_XATTR); + } ++ iput(ea_inode); + return 0; + } + +diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c +index ad8dfac73bd446..6a9a470345bfc7 100644 +--- a/fs/f2fs/extent_cache.c ++++ b/fs/f2fs/extent_cache.c +@@ -19,34 +19,24 @@ + #include "node.h" + #include <trace/events/f2fs.h> + +-bool sanity_check_extent_cache(struct inode *inode) ++bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) + { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); +- struct f2fs_inode_info *fi = F2FS_I(inode); +- struct extent_tree *et = fi->extent_tree[EX_READ]; +- struct extent_info *ei; +- +- if (!et) +- return true; ++ struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; ++ struct extent_info ei; + +- ei = &et->largest; +- if (!ei->len) +- return true; ++ get_read_extent_info(&ei, i_ext); + +- /* Let's drop, if checkpoint got corrupted. */ +- if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) { +- ei->len = 0; +- et->largest_updated = true; ++ if (!ei.len) + return true; +- } + +- if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) || +- !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, ++ if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) || ++ !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1, + DATA_GENERIC_ENHANCE)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", + __func__, inode->i_ino, +- ei->blk, ei->fofs, ei->len); ++ ei.blk, ei.fofs, ei.len); + return false; + } + return true; +@@ -395,24 +385,22 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) + + if (!__may_extent_tree(inode, EX_READ)) { + /* drop largest read extent */ +- if (i_ext && i_ext->len) { ++ if (i_ext->len) { + f2fs_wait_on_page_writeback(ipage, NODE, true, true); + i_ext->len = 0; + set_page_dirty(ipage); + } +- goto out; ++ set_inode_flag(inode, FI_NO_EXTENT); ++ return; + } + + et = __grab_extent_tree(inode, EX_READ); + +- if (!i_ext || !i_ext->len) +- goto out; +- + get_read_extent_info(&ei, i_ext); + + write_lock(&et->lock); +- if (atomic_read(&et->node_cnt)) +- goto unlock_out; ++ if (atomic_read(&et->node_cnt) || !ei.len) ++ goto skip; + + en = __attach_extent_node(sbi, et, &ei, NULL, + &et->root.rb_root.rb_node, true); +@@ -424,11 +412,13 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) + list_add_tail(&en->list, &eti->extent_list); + spin_unlock(&eti->extent_lock); + } +-unlock_out: ++skip: ++ /* Let's drop, if checkpoint got corrupted. */ ++ if (f2fs_cp_error(sbi)) { ++ et->largest.len = 0; ++ et->largest_updated = true; ++ } + write_unlock(&et->lock); +-out: +- if (!F2FS_I(inode)->extent_tree[EX_READ]) +- set_inode_flag(inode, FI_NO_EXTENT); + } + + void f2fs_init_age_extent_tree(struct inode *inode) +diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h +index 19490dd8321943..00eff023cd9d63 100644 +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -4189,7 +4189,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); + /* + * extent_cache.c + */ +-bool sanity_check_extent_cache(struct inode *inode); ++bool sanity_check_extent_cache(struct inode *inode, struct page *ipage); + void f2fs_init_extent_tree(struct inode *inode); + void f2fs_drop_extent_tree(struct inode *inode); + void f2fs_destroy_extent_node(struct inode *inode); +diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c +index afb7c88ba06b2c..888c301ffe8f4c 100644 +--- a/fs/f2fs/gc.c ++++ b/fs/f2fs/gc.c +@@ -1563,6 +1563,16 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, + continue; + } + ++ if (f2fs_has_inline_data(inode)) { ++ iput(inode); ++ set_sbi_flag(sbi, SBI_NEED_FSCK); ++ f2fs_err_ratelimited(sbi, ++ "inode %lx has both inline_data flag and " ++ "data block, nid=%u, ofs_in_node=%u", ++ inode->i_ino, dni.nid, ofs_in_node); ++ continue; ++ } ++ + err = f2fs_gc_pinned_control(inode, gc_type, segno); + if (err == -EAGAIN) { + iput(inode); +diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c +index 0172f4e503061d..26e857fee631d9 100644 +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -511,16 +511,16 @@ static int do_read_inode(struct inode *inode) + + init_idisk_time(inode); + +- /* Need all the flag bits */ +- f2fs_init_read_extent_tree(inode, node_page); +- f2fs_init_age_extent_tree(inode); +- +- if (!sanity_check_extent_cache(inode)) { ++ if (!sanity_check_extent_cache(inode, node_page)) { + f2fs_put_page(node_page, 1); + f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); + return -EFSCORRUPTED; + } + ++ /* Need all the flag bits */ ++ f2fs_init_read_extent_tree(inode, node_page); ++ f2fs_init_age_extent_tree(inode); ++ + f2fs_put_page(node_page, 1); + + stat_inc_inline_xattr(inode); +diff --git a/fs/fhandle.c b/fs/fhandle.c +index 99dcf07cfecfe1..c361d7ff1b88dd 100644 +--- a/fs/fhandle.c ++++ b/fs/fhandle.c +@@ -40,7 +40,7 @@ static long do_sys_name_to_handle(const struct path *path, + if (f_handle.handle_bytes > MAX_HANDLE_SZ) + return -EINVAL; + +- handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, ++ handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes), + GFP_KERNEL); + if (!handle) + return -ENOMEM; +@@ -75,7 +75,7 @@ static long do_sys_name_to_handle(const struct path *path, + /* copy the mount id */ + if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || + copy_to_user(ufh, handle, +- sizeof(struct file_handle) + handle_bytes)) ++ struct_size(handle, f_handle, handle_bytes))) + retval = -EFAULT; + kfree(handle); + return retval; +@@ -196,7 +196,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, + retval = -EINVAL; + goto out_err; + } +- handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, ++ handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), + GFP_KERNEL); + if (!handle) { + retval = -ENOMEM; +diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c +index cb3cda1390adb1..5713994328cbcb 100644 +--- a/fs/jfs/jfs_dmap.c ++++ b/fs/jfs/jfs_dmap.c +@@ -1626,6 +1626,8 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) + } else if (rc == -ENOSPC) { + /* search for next smaller log2 block */ + l2nb = BLKSTOL2(nblocks) - 1; ++ if (unlikely(l2nb < 0)) ++ break; + nblocks = 1LL << l2nb; + } else { + /* Trim any already allocated blocks */ +diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c +index 031d8f570f581f..5d3127ca68a42d 100644 +--- a/fs/jfs/jfs_dtree.c ++++ b/fs/jfs/jfs_dtree.c +@@ -834,6 +834,8 @@ int dtInsert(tid_t tid, struct inode *ip, + * the full page. + */ + DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); ++ if (p->header.freelist == 0) ++ return -EINVAL; + + /* + * insert entry for new key +diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c +index e855b8fde76ce1..cb6d1fda66a702 100644 +--- a/fs/jfs/jfs_logmgr.c ++++ b/fs/jfs/jfs_logmgr.c +@@ -1058,7 +1058,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync) + int lmLogOpen(struct super_block *sb) + { + int rc; +- struct block_device *bdev; ++ struct bdev_handle *bdev_handle; + struct jfs_log *log; + struct jfs_sb_info *sbi = JFS_SBI(sb); + +@@ -1070,7 +1070,7 @@ int lmLogOpen(struct super_block *sb) + + mutex_lock(&jfs_log_mutex); + list_for_each_entry(log, &jfs_external_logs, journal_list) { +- if (log->bdev->bd_dev == sbi->logdev) { ++ if (log->bdev_handle->bdev->bd_dev == sbi->logdev) { + if (!uuid_equal(&log->uuid, &sbi->loguuid)) { + jfs_warn("wrong uuid on JFS journal"); + mutex_unlock(&jfs_log_mutex); +@@ -1100,14 +1100,14 @@ int lmLogOpen(struct super_block *sb) + * file systems to log may have n-to-1 relationship; + */ + +- bdev = blkdev_get_by_dev(sbi->logdev, BLK_OPEN_READ | BLK_OPEN_WRITE, +- log, NULL); +- if (IS_ERR(bdev)) { +- rc = PTR_ERR(bdev); ++ bdev_handle = bdev_open_by_dev(sbi->logdev, ++ BLK_OPEN_READ | BLK_OPEN_WRITE, log, NULL); ++ if (IS_ERR(bdev_handle)) { ++ rc = PTR_ERR(bdev_handle); + goto free; + } + +- log->bdev = bdev; ++ log->bdev_handle = bdev_handle; + uuid_copy(&log->uuid, &sbi->loguuid); + + /* +@@ -1141,7 +1141,7 @@ int lmLogOpen(struct super_block *sb) + lbmLogShutdown(log); + + close: /* close external log device */ +- blkdev_put(bdev, log); ++ bdev_release(bdev_handle); + + free: /* free log descriptor */ + mutex_unlock(&jfs_log_mutex); +@@ -1162,7 +1162,7 @@ static int open_inline_log(struct super_block *sb) + init_waitqueue_head(&log->syncwait); + + set_bit(log_INLINELOG, &log->flag); +- log->bdev = sb->s_bdev; ++ log->bdev_handle = sb->s_bdev_handle; + log->base = addressPXD(&JFS_SBI(sb)->logpxd); + log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> + (L2LOGPSIZE - sb->s_blocksize_bits); +@@ -1436,7 +1436,7 @@ int lmLogClose(struct super_block *sb) + { + struct jfs_sb_info *sbi = JFS_SBI(sb); + struct jfs_log *log = sbi->log; +- struct block_device *bdev; ++ struct bdev_handle *bdev_handle; + int rc = 0; + + jfs_info("lmLogClose: log:0x%p", log); +@@ -1482,10 +1482,10 @@ int lmLogClose(struct super_block *sb) + * external log as separate logical volume + */ + list_del(&log->journal_list); +- bdev = log->bdev; ++ bdev_handle = log->bdev_handle; + rc = lmLogShutdown(log); + +- blkdev_put(bdev, log); ++ bdev_release(bdev_handle); + + kfree(log); + +@@ -1972,7 +1972,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) + + bp->l_flag |= lbmREAD; + +- bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS); ++ bio = bio_alloc(log->bdev_handle->bdev, 1, REQ_OP_READ, GFP_NOFS); + bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); + __bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); + BUG_ON(bio->bi_iter.bi_size != LOGPSIZE); +@@ -2110,10 +2110,15 @@ static void lbmStartIO(struct lbuf * bp) + { + struct bio *bio; + struct jfs_log *log = bp->l_log; ++ struct block_device *bdev = NULL; + + jfs_info("lbmStartIO"); + +- bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS); ++ if (!log->no_integrity) ++ bdev = log->bdev_handle->bdev; ++ ++ bio = bio_alloc(bdev, 1, REQ_OP_WRITE | REQ_SYNC, ++ GFP_NOFS); + bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); + __bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); + BUG_ON(bio->bi_iter.bi_size != LOGPSIZE); +diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h +index 805877ce502044..84aa2d25390743 100644 +--- a/fs/jfs/jfs_logmgr.h ++++ b/fs/jfs/jfs_logmgr.h +@@ -356,7 +356,7 @@ struct jfs_log { + * before writing syncpt. + */ + struct list_head journal_list; /* Global list */ +- struct block_device *bdev; /* 4: log lv pointer */ ++ struct bdev_handle *bdev_handle; /* 4: log lv pointer */ + int serial; /* 4: log mount serial number */ + + s64 base; /* @8: log extent address (inline log ) */ +diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c +index 631b8bd3e43849..9b5c6a20b30c83 100644 +--- a/fs/jfs/jfs_mount.c ++++ b/fs/jfs/jfs_mount.c +@@ -430,7 +430,8 @@ int updateSuper(struct super_block *sb, uint state) + + if (state == FM_MOUNT) { + /* record log's dev_t and mount serial number */ +- j_sb->s_logdev = cpu_to_le32(new_encode_dev(sbi->log->bdev->bd_dev)); ++ j_sb->s_logdev = cpu_to_le32( ++ new_encode_dev(sbi->log->bdev_handle->bdev->bd_dev)); + j_sb->s_logserial = cpu_to_le32(sbi->log->serial); + } else if (state == FM_CLEAN) { + /* +diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c +index 6579948070a482..a62331487ebf16 100644 +--- a/fs/lockd/svc.c ++++ b/fs/lockd/svc.c +@@ -712,8 +712,6 @@ static const struct svc_version *nlmsvc_version[] = { + #endif + }; + +-static struct svc_stat nlmsvc_stats; +- + #define NLM_NRVERS ARRAY_SIZE(nlmsvc_version) + static struct svc_program nlmsvc_program = { + .pg_prog = NLM_PROGRAM, /* program number */ +@@ -721,7 +719,6 @@ static struct svc_program nlmsvc_program = { + .pg_vers = nlmsvc_version, /* version table */ + .pg_name = "lockd", /* service name */ + .pg_class = "nfsd", /* share authentication with nfsd */ +- .pg_stats = &nlmsvc_stats, /* stats table */ + .pg_authenticate = &lockd_authenticate, /* export authentication */ + .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, +diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c +index 466ebf1d41b2b7..869c88978899c0 100644 +--- a/fs/nfs/callback.c ++++ b/fs/nfs/callback.c +@@ -399,15 +399,12 @@ static const struct svc_version *nfs4_callback_version[] = { + [4] = &nfs4_callback_version4, + }; + +-static struct svc_stat nfs4_callback_stats; +- + static struct svc_program nfs4_callback_program = { + .pg_prog = NFS4_CALLBACK, /* RPC service number */ + .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */ + .pg_vers = nfs4_callback_version, /* version table */ + .pg_name = "NFSv4 callback", /* service name */ + .pg_class = "nfs", /* authentication class */ +- .pg_stats = &nfs4_callback_stats, + .pg_authenticate = nfs_callback_authenticate, + .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, +diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h +index 4cbe0434cbb8ce..66a05fefae98ea 100644 +--- a/fs/nfsd/cache.h ++++ b/fs/nfsd/cache.h +@@ -80,8 +80,6 @@ enum { + + int nfsd_drc_slab_create(void); + void nfsd_drc_slab_free(void); +-int nfsd_net_reply_cache_init(struct nfsd_net *nn); +-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn); + int nfsd_reply_cache_init(struct nfsd_net *); + void nfsd_reply_cache_shutdown(struct nfsd_net *); + int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, +diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c +index 11a0eaa2f91407..b7da17e530077e 100644 +--- a/fs/nfsd/export.c ++++ b/fs/nfsd/export.c +@@ -339,12 +339,16 @@ static int export_stats_init(struct export_stats *stats) + + static void export_stats_reset(struct export_stats *stats) + { +- nfsd_percpu_counters_reset(stats->counter, EXP_STATS_COUNTERS_NUM); ++ if (stats) ++ nfsd_percpu_counters_reset(stats->counter, ++ EXP_STATS_COUNTERS_NUM); + } + + static void export_stats_destroy(struct export_stats *stats) + { +- nfsd_percpu_counters_destroy(stats->counter, EXP_STATS_COUNTERS_NUM); ++ if (stats) ++ nfsd_percpu_counters_destroy(stats->counter, ++ EXP_STATS_COUNTERS_NUM); + } + + static void svc_export_put(struct kref *ref) +@@ -353,7 +357,8 @@ static void svc_export_put(struct kref *ref) + path_put(&exp->ex_path); + auth_domain_put(exp->ex_client); + nfsd4_fslocs_free(&exp->ex_fslocs); +- export_stats_destroy(&exp->ex_stats); ++ export_stats_destroy(exp->ex_stats); ++ kfree(exp->ex_stats); + kfree(exp->ex_uuid); + kfree_rcu(exp, ex_rcu); + } +@@ -767,13 +772,15 @@ static int svc_export_show(struct seq_file *m, + seq_putc(m, '\t'); + seq_escape(m, exp->ex_client->name, " \t\n\\"); + if (export_stats) { +- seq_printf(m, "\t%lld\n", exp->ex_stats.start_time); ++ struct percpu_counter *counter = exp->ex_stats->counter; ++ ++ seq_printf(m, "\t%lld\n", exp->ex_stats->start_time); + seq_printf(m, "\tfh_stale: %lld\n", +- percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_FH_STALE])); ++ percpu_counter_sum_positive(&counter[EXP_STATS_FH_STALE])); + seq_printf(m, "\tio_read: %lld\n", +- percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_READ])); ++ percpu_counter_sum_positive(&counter[EXP_STATS_IO_READ])); + seq_printf(m, "\tio_write: %lld\n", +- percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_WRITE])); ++ percpu_counter_sum_positive(&counter[EXP_STATS_IO_WRITE])); + seq_putc(m, '\n'); + return 0; + } +@@ -819,7 +826,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) + new->ex_layout_types = 0; + new->ex_uuid = NULL; + new->cd = item->cd; +- export_stats_reset(&new->ex_stats); ++ export_stats_reset(new->ex_stats); + } + + static void export_update(struct cache_head *cnew, struct cache_head *citem) +@@ -856,7 +863,14 @@ static struct cache_head *svc_export_alloc(void) + if (!i) + return NULL; + +- if (export_stats_init(&i->ex_stats)) { ++ i->ex_stats = kmalloc(sizeof(*(i->ex_stats)), GFP_KERNEL); ++ if (!i->ex_stats) { ++ kfree(i); ++ return NULL; ++ } ++ ++ if (export_stats_init(i->ex_stats)) { ++ kfree(i->ex_stats); + kfree(i); + return NULL; + } +diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h +index 2df8ae25aad302..ca9dc230ae3d0b 100644 +--- a/fs/nfsd/export.h ++++ b/fs/nfsd/export.h +@@ -64,10 +64,10 @@ struct svc_export { + struct cache_head h; + struct auth_domain * ex_client; + int ex_flags; ++ int ex_fsid; + struct path ex_path; + kuid_t ex_anon_uid; + kgid_t ex_anon_gid; +- int ex_fsid; + unsigned char * ex_uuid; /* 16 byte fsid */ + struct nfsd4_fs_locations ex_fslocs; + uint32_t ex_nflavors; +@@ -76,8 +76,8 @@ struct svc_export { + struct nfsd4_deviceid_map *ex_devid_map; + struct cache_detail *cd; + struct rcu_head ex_rcu; +- struct export_stats ex_stats; + unsigned long ex_xprtsec_modes; ++ struct export_stats *ex_stats; + }; + + /* an "export key" (expkey) maps a filehandlefragement to an +diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h +index ec49b200b79762..9bfca3dda63d33 100644 +--- a/fs/nfsd/netns.h ++++ b/fs/nfsd/netns.h +@@ -11,8 +11,10 @@ + #include <net/net_namespace.h> + #include <net/netns/generic.h> + #include <linux/filelock.h> ++#include <linux/nfs4.h> + #include <linux/percpu_counter.h> + #include <linux/siphash.h> ++#include <linux/sunrpc/stats.h> + + /* Hash tables for nfs4_clientid state */ + #define CLIENT_HASH_BITS 4 +@@ -26,10 +28,22 @@ struct nfsd4_client_tracking_ops; + + enum { + /* cache misses due only to checksum comparison failures */ +- NFSD_NET_PAYLOAD_MISSES, ++ NFSD_STATS_PAYLOAD_MISSES, + /* amount of memory (in bytes) currently consumed by the DRC */ +- NFSD_NET_DRC_MEM_USAGE, +- NFSD_NET_COUNTERS_NUM ++ NFSD_STATS_DRC_MEM_USAGE, ++ NFSD_STATS_RC_HITS, /* repcache hits */ ++ NFSD_STATS_RC_MISSES, /* repcache misses */ ++ NFSD_STATS_RC_NOCACHE, /* uncached reqs */ ++ NFSD_STATS_FH_STALE, /* FH stale error */ ++ NFSD_STATS_IO_READ, /* bytes returned to read requests */ ++ NFSD_STATS_IO_WRITE, /* bytes passed in write requests */ ++#ifdef CONFIG_NFSD_V4 ++ NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */ ++ NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP, ++#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op)) ++ NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */ ++#endif ++ NFSD_STATS_COUNTERS_NUM + }; + + /* +@@ -169,7 +183,10 @@ struct nfsd_net { + atomic_t num_drc_entries; + + /* Per-netns stats counters */ +- struct percpu_counter counter[NFSD_NET_COUNTERS_NUM]; ++ struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; ++ ++ /* sunrpc svc stats */ ++ struct svc_stat nfsd_svcstats; + + /* longest hash chain seen */ + unsigned int longest_chain; +diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c +index 451026f9986b61..ae0057c54ef4ed 100644 +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2478,10 +2478,10 @@ nfsd4_proc_null(struct svc_rqst *rqstp) + return rpc_success; + } + +-static inline void nfsd4_increment_op_stats(u32 opnum) ++static inline void nfsd4_increment_op_stats(struct nfsd_net *nn, u32 opnum) + { + if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_NFS4_OP(opnum)]); + } + + static const struct nfsd4_operation nfsd4_ops[]; +@@ -2756,7 +2756,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) + status, nfsd4_op_name(op->opnum)); + + nfsd4_cstate_clear_replay(cstate); +- nfsd4_increment_op_stats(op->opnum); ++ nfsd4_increment_op_stats(nn, op->opnum); + } + + fh_put(current_fh); +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index c7e52d980cd75f..cdad1eaa4a3180 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -8422,6 +8422,7 @@ __be32 + nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode) + { + __be32 status; ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct file_lock_context *ctx; + struct file_lock *fl; + struct nfs4_delegation *dp; +@@ -8451,7 +8452,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode) + } + break_lease: + spin_unlock(&ctx->flc_lock); +- nfsd_stats_wdeleg_getattr_inc(); ++ nfsd_stats_wdeleg_getattr_inc(nn); + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + if (status != nfserr_jukebox || + !nfsd_wait_for_delegreturn(rqstp, inode)) +diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c +index 6cd36af2f97e10..c52132ecb339d5 100644 +--- a/fs/nfsd/nfscache.c ++++ b/fs/nfsd/nfscache.c +@@ -176,27 +176,6 @@ void nfsd_drc_slab_free(void) + kmem_cache_destroy(drc_slab); + } + +-/** +- * nfsd_net_reply_cache_init - per net namespace reply cache set-up +- * @nn: nfsd_net being initialized +- * +- * Returns zero on succes; otherwise a negative errno is returned. +- */ +-int nfsd_net_reply_cache_init(struct nfsd_net *nn) +-{ +- return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM); +-} +- +-/** +- * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down +- * @nn: nfsd_net being freed +- * +- */ +-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn) +-{ +- nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM); +-} +- + int nfsd_reply_cache_init(struct nfsd_net *nn) + { + unsigned int hashsize; +@@ -502,7 +481,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key, + int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + unsigned int len, struct nfsd_cacherep **cacherep) + { +- struct nfsd_net *nn; ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfsd_cacherep *rp, *found; + __wsum csum; + struct nfsd_drc_bucket *b; +@@ -512,7 +491,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + int rtn = RC_DOIT; + + if (type == RC_NOCACHE) { +- nfsd_stats_rc_nocache_inc(); ++ nfsd_stats_rc_nocache_inc(nn); + goto out; + } + +@@ -522,7 +501,6 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + * Since the common case is a cache miss followed by an insert, + * preallocate an entry. + */ +- nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + rp = nfsd_cacherep_alloc(rqstp, csum, nn); + if (!rp) + goto out; +@@ -540,7 +518,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + freed = nfsd_cacherep_dispose(&dispose); + trace_nfsd_drc_gc(nn, freed); + +- nfsd_stats_rc_misses_inc(); ++ nfsd_stats_rc_misses_inc(nn); + atomic_inc(&nn->num_drc_entries); + nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); + goto out; +@@ -548,7 +526,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + found_entry: + /* We found a matching entry which is either in progress or done. */ + nfsd_reply_cache_free_locked(NULL, rp, nn); +- nfsd_stats_rc_hits_inc(); ++ nfsd_stats_rc_hits_inc(nn); + rtn = RC_DROPIT; + rp = found; + +@@ -690,15 +668,15 @@ int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) + atomic_read(&nn->num_drc_entries)); + seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); + seq_printf(m, "mem usage: %lld\n", +- percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE])); + seq_printf(m, "cache hits: %lld\n", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS])); + seq_printf(m, "cache misses: %lld\n", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES])); + seq_printf(m, "not cached: %lld\n", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE])); + seq_printf(m, "payload misses: %lld\n", +- percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES])); + seq_printf(m, "longest chain len: %u\n", nn->longest_chain); + seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); + return 0; +diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c +index a13e81e450718a..887035b7446763 100644 +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -1524,14 +1524,17 @@ static __net_init int nfsd_net_init(struct net *net) + retval = nfsd_idmap_init(net); + if (retval) + goto out_idmap_error; +- retval = nfsd_net_reply_cache_init(nn); ++ retval = nfsd_stat_counters_init(nn); + if (retval) + goto out_repcache_error; ++ memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats)); ++ nn->nfsd_svcstats.program = &nfsd_program; + nn->nfsd_versions = NULL; + nn->nfsd4_minorversions = NULL; + nfsd4_init_leases_net(nn); + get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); + seqlock_init(&nn->writeverf_lock); ++ nfsd_proc_stat_init(net); + + return 0; + +@@ -1552,7 +1555,8 @@ static __net_exit void nfsd_net_exit(struct net *net) + { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + +- nfsd_net_reply_cache_destroy(nn); ++ nfsd_proc_stat_shutdown(net); ++ nfsd_stat_counters_destroy(nn); + nfsd_idmap_shutdown(net); + nfsd_export_shutdown(net); + nfsd_netns_free_versions(nn); +@@ -1575,12 +1579,9 @@ static int __init init_nfsd(void) + retval = nfsd4_init_pnfs(); + if (retval) + goto out_free_slabs; +- retval = nfsd_stat_init(); /* Statistics */ +- if (retval) +- goto out_free_pnfs; + retval = nfsd_drc_slab_create(); + if (retval) +- goto out_free_stat; ++ goto out_free_pnfs; + nfsd_lockd_init(); /* lockd->nfsd callbacks */ + retval = create_proc_exports_entry(); + if (retval) +@@ -1610,8 +1611,6 @@ static int __init init_nfsd(void) + out_free_lockd: + nfsd_lockd_shutdown(); + nfsd_drc_slab_free(); +-out_free_stat: +- nfsd_stat_shutdown(); + out_free_pnfs: + nfsd4_exit_pnfs(); + out_free_slabs: +@@ -1628,7 +1627,6 @@ static void __exit exit_nfsd(void) + nfsd_drc_slab_free(); + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); +- nfsd_stat_shutdown(); + nfsd_lockd_shutdown(); + nfsd4_free_slabs(); + nfsd4_exit_pnfs(); +diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h +index fe846a360ae18d..d05bd2b811f377 100644 +--- a/fs/nfsd/nfsd.h ++++ b/fs/nfsd/nfsd.h +@@ -69,6 +69,7 @@ extern struct mutex nfsd_mutex; + extern spinlock_t nfsd_drc_lock; + extern unsigned long nfsd_drc_max_mem; + extern unsigned long nfsd_drc_mem_used; ++extern atomic_t nfsd_th_cnt; /* number of available threads */ + + extern const struct seq_operations nfs_exports_op; + +diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c +index 937be276bb6b48..c2495d98c18928 100644 +--- a/fs/nfsd/nfsfh.c ++++ b/fs/nfsd/nfsfh.c +@@ -327,6 +327,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) + __be32 + fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) + { ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct svc_export *exp = NULL; + struct dentry *dentry; + __be32 error; +@@ -395,7 +396,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) + out: + trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); + if (error == nfserr_stale) +- nfsd_stats_fh_stale_inc(exp); ++ nfsd_stats_fh_stale_inc(nn, exp); + return error; + } + +diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c +index 7ef6af908faacb..7911c4b3b5d355 100644 +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -34,6 +34,7 @@ + + #define NFSDDBG_FACILITY NFSDDBG_SVC + ++atomic_t nfsd_th_cnt = ATOMIC_INIT(0); + extern struct svc_program nfsd_program; + static int nfsd(void *vrqstp); + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +@@ -89,7 +90,6 @@ unsigned long nfsd_drc_max_mem; + unsigned long nfsd_drc_mem_used; + + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +-static struct svc_stat nfsd_acl_svcstats; + static const struct svc_version *nfsd_acl_version[] = { + # if defined(CONFIG_NFSD_V2_ACL) + [2] = &nfsd_acl_version2, +@@ -108,15 +108,11 @@ static struct svc_program nfsd_acl_program = { + .pg_vers = nfsd_acl_version, + .pg_name = "nfsacl", + .pg_class = "nfsd", +- .pg_stats = &nfsd_acl_svcstats, + .pg_authenticate = &svc_set_client, + .pg_init_request = nfsd_acl_init_request, + .pg_rpcbind_set = nfsd_acl_rpcbind_set, + }; + +-static struct svc_stat nfsd_acl_svcstats = { +- .program = &nfsd_acl_program, +-}; + #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ + + static const struct svc_version *nfsd_version[] = { +@@ -141,7 +137,6 @@ struct svc_program nfsd_program = { + .pg_vers = nfsd_version, /* version table */ + .pg_name = "nfsd", /* program name */ + .pg_class = "nfsd", /* authentication class */ +- .pg_stats = &nfsd_svcstats, /* version table */ + .pg_authenticate = &svc_set_client, /* export authentication */ + .pg_init_request = nfsd_init_request, + .pg_rpcbind_set = nfsd_rpcbind_set, +@@ -675,7 +670,8 @@ int nfsd_create_serv(struct net *net) + if (nfsd_max_blksize == 0) + nfsd_max_blksize = nfsd_get_default_max_blksize(); + nfsd_reset_versions(nn); +- serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd); ++ serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats, ++ nfsd_max_blksize, nfsd); + if (serv == NULL) + return -ENOMEM; + +@@ -950,7 +946,7 @@ nfsd(void *vrqstp) + + current->fs->umask = 0; + +- atomic_inc(&nfsdstats.th_cnt); ++ atomic_inc(&nfsd_th_cnt); + + set_freezable(); + +@@ -964,7 +960,7 @@ nfsd(void *vrqstp) + svc_recv(rqstp); + } + +- atomic_dec(&nfsdstats.th_cnt); ++ atomic_dec(&nfsd_th_cnt); + + out: + /* Release the thread */ +diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c +index 63797635e1c328..9f606fa08bd4b8 100644 +--- a/fs/nfsd/stats.c ++++ b/fs/nfsd/stats.c +@@ -27,25 +27,22 @@ + + #include "nfsd.h" + +-struct nfsd_stats nfsdstats; +-struct svc_stat nfsd_svcstats = { +- .program = &nfsd_program, +-}; +- + static int nfsd_show(struct seq_file *seq, void *v) + { ++ struct net *net = pde_data(file_inode(seq->file)); ++ struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]), +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]), ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE])); + + /* thread usage: */ +- seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); ++ seq_printf(seq, "th %u 0", atomic_read(&nfsd_th_cnt)); + + /* deprecated thread usage histogram stats */ + for (i = 0; i < 10; i++) +@@ -55,7 +52,7 @@ static int nfsd_show(struct seq_file *seq, void *v) + seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n"); + + /* show my rpc info */ +- svc_seq_show(seq, &nfsd_svcstats); ++ svc_seq_show(seq, &nn->nfsd_svcstats); + + #ifdef CONFIG_NFSD_V4 + /* Show count for individual nfsv4 operations */ +@@ -63,10 +60,10 @@ static int nfsd_show(struct seq_file *seq, void *v) + seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1); + for (i = 0; i <= LAST_NFS4_OP; i++) { + seq_printf(seq, " %lld", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)])); + } + seq_printf(seq, "\nwdeleg_getattr %lld", +- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR])); ++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR])); + + seq_putc(seq, '\n'); + #endif +@@ -76,7 +73,7 @@ static int nfsd_show(struct seq_file *seq, void *v) + + DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); + +-int nfsd_percpu_counters_init(struct percpu_counter counters[], int num) ++int nfsd_percpu_counters_init(struct percpu_counter *counters, int num) + { + int i, err = 0; + +@@ -108,31 +105,24 @@ void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num) + percpu_counter_destroy(&counters[i]); + } + +-static int nfsd_stat_counters_init(void) ++int nfsd_stat_counters_init(struct nfsd_net *nn) + { +- return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); ++ return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM); + } + +-static void nfsd_stat_counters_destroy(void) ++void nfsd_stat_counters_destroy(struct nfsd_net *nn) + { +- nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); ++ nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM); + } + +-int nfsd_stat_init(void) ++void nfsd_proc_stat_init(struct net *net) + { +- int err; +- +- err = nfsd_stat_counters_init(); +- if (err) +- return err; ++ struct nfsd_net *nn = net_generic(net, nfsd_net_id); + +- svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops); +- +- return 0; ++ svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); + } + +-void nfsd_stat_shutdown(void) ++void nfsd_proc_stat_shutdown(struct net *net) + { +- nfsd_stat_counters_destroy(); +- svc_proc_unregister(&init_net, "nfsd"); ++ svc_proc_unregister(net, "nfsd"); + } +diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h +index cf5524e7ca0623..d2753e975dfd34 100644 +--- a/fs/nfsd/stats.h ++++ b/fs/nfsd/stats.h +@@ -10,94 +10,72 @@ + #include <uapi/linux/nfsd/stats.h> + #include <linux/percpu_counter.h> + +- +-enum { +- NFSD_STATS_RC_HITS, /* repcache hits */ +- NFSD_STATS_RC_MISSES, /* repcache misses */ +- NFSD_STATS_RC_NOCACHE, /* uncached reqs */ +- NFSD_STATS_FH_STALE, /* FH stale error */ +- NFSD_STATS_IO_READ, /* bytes returned to read requests */ +- NFSD_STATS_IO_WRITE, /* bytes passed in write requests */ +-#ifdef CONFIG_NFSD_V4 +- NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */ +- NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP, +-#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op)) +- NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */ +-#endif +- NFSD_STATS_COUNTERS_NUM +-}; +- +-struct nfsd_stats { +- struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; +- +- atomic_t th_cnt; /* number of available threads */ +-}; +- +-extern struct nfsd_stats nfsdstats; +- +-extern struct svc_stat nfsd_svcstats; +- +-int nfsd_percpu_counters_init(struct percpu_counter counters[], int num); +-void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num); +-void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num); +-int nfsd_stat_init(void); +-void nfsd_stat_shutdown(void); +- +-static inline void nfsd_stats_rc_hits_inc(void) ++int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); ++void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num); ++void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num); ++int nfsd_stat_counters_init(struct nfsd_net *nn); ++void nfsd_stat_counters_destroy(struct nfsd_net *nn); ++void nfsd_proc_stat_init(struct net *net); ++void nfsd_proc_stat_shutdown(struct net *net); ++ ++static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_HITS]); + } + +-static inline void nfsd_stats_rc_misses_inc(void) ++static inline void nfsd_stats_rc_misses_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_MISSES]); + } + +-static inline void nfsd_stats_rc_nocache_inc(void) ++static inline void nfsd_stats_rc_nocache_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_NOCACHE]); + } + +-static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp) ++static inline void nfsd_stats_fh_stale_inc(struct nfsd_net *nn, ++ struct svc_export *exp) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]); +- if (exp) +- percpu_counter_inc(&exp->ex_stats.counter[EXP_STATS_FH_STALE]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_FH_STALE]); ++ if (exp && exp->ex_stats) ++ percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]); + } + +-static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount) ++static inline void nfsd_stats_io_read_add(struct nfsd_net *nn, ++ struct svc_export *exp, s64 amount) + { +- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount); +- if (exp) +- percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_READ], amount); ++ percpu_counter_add(&nn->counter[NFSD_STATS_IO_READ], amount); ++ if (exp && exp->ex_stats) ++ percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount); + } + +-static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount) ++static inline void nfsd_stats_io_write_add(struct nfsd_net *nn, ++ struct svc_export *exp, s64 amount) + { +- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount); +- if (exp) +- percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_WRITE], amount); ++ percpu_counter_add(&nn->counter[NFSD_STATS_IO_WRITE], amount); ++ if (exp && exp->ex_stats) ++ percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount); + } + + static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]); + } + + static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount) + { +- percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); ++ percpu_counter_add(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount); + } + + static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount) + { +- percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); ++ percpu_counter_sub(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount); + } + + #ifdef CONFIG_NFSD_V4 +-static inline void nfsd_stats_wdeleg_getattr_inc(void) ++static inline void nfsd_stats_wdeleg_getattr_inc(struct nfsd_net *nn) + { +- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]); ++ percpu_counter_inc(&nn->counter[NFSD_STATS_WDELEG_GETATTR]); + } + #endif + #endif /* _NFSD_STATS_H */ +diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c +index d0fdf70ab20d36..1f2a5b22b6498e 100644 +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -985,7 +985,9 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned long *count, u32 *eof, ssize_t host_err) + { + if (host_err >= 0) { +- nfsd_stats_io_read_add(fhp->fh_export, host_err); ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); ++ ++ nfsd_stats_io_read_add(nn, fhp->fh_export, host_err); + *eof = nfsd_eof_on_read(file, offset, host_err, *count); + *count = host_err; + fsnotify_access(file); +@@ -1168,7 +1170,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, + goto out_nfserr; + } + *cnt = host_err; +- nfsd_stats_io_write_add(exp, *cnt); ++ nfsd_stats_io_write_add(nn, exp, *cnt); + fsnotify_modify(file); + host_err = filemap_check_wb_err(file->f_mapping, since); + if (host_err < 0) +diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c +index 424865dfca74ba..45b687aff700be 100644 +--- a/fs/ntfs3/frecord.c ++++ b/fs/ntfs3/frecord.c +@@ -1896,6 +1896,47 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, + return REPARSE_LINK; + } + ++/* ++ * fiemap_fill_next_extent_k - a copy of fiemap_fill_next_extent ++ * but it accepts kernel address for fi_extents_start ++ */ ++static int fiemap_fill_next_extent_k(struct fiemap_extent_info *fieinfo, ++ u64 logical, u64 phys, u64 len, u32 flags) ++{ ++ struct fiemap_extent extent; ++ struct fiemap_extent __user *dest = fieinfo->fi_extents_start; ++ ++ /* only count the extents */ ++ if (fieinfo->fi_extents_max == 0) { ++ fieinfo->fi_extents_mapped++; ++ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; ++ } ++ ++ if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) ++ return 1; ++ ++ if (flags & FIEMAP_EXTENT_DELALLOC) ++ flags |= FIEMAP_EXTENT_UNKNOWN; ++ if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED) ++ flags |= FIEMAP_EXTENT_ENCODED; ++ if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE)) ++ flags |= FIEMAP_EXTENT_NOT_ALIGNED; ++ ++ memset(&extent, 0, sizeof(extent)); ++ extent.fe_logical = logical; ++ extent.fe_physical = phys; ++ extent.fe_length = len; ++ extent.fe_flags = flags; ++ ++ dest += fieinfo->fi_extents_mapped; ++ memcpy(dest, &extent, sizeof(extent)); ++ ++ fieinfo->fi_extents_mapped++; ++ if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) ++ return 1; ++ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; ++} ++ + /* + * ni_fiemap - Helper for file_fiemap(). + * +@@ -1906,6 +1947,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + __u64 vbo, __u64 len) + { + int err = 0; ++ struct fiemap_extent __user *fe_u = fieinfo->fi_extents_start; ++ struct fiemap_extent *fe_k = NULL; + struct ntfs_sb_info *sbi = ni->mi.sbi; + u8 cluster_bits = sbi->cluster_bits; + struct runs_tree *run; +@@ -1953,6 +1996,18 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + goto out; + } + ++ /* ++ * To avoid lock problems replace pointer to user memory by pointer to kernel memory. ++ */ ++ fe_k = kmalloc_array(fieinfo->fi_extents_max, ++ sizeof(struct fiemap_extent), ++ GFP_NOFS | __GFP_ZERO); ++ if (!fe_k) { ++ err = -ENOMEM; ++ goto out; ++ } ++ fieinfo->fi_extents_start = fe_k; ++ + end = vbo + len; + alloc_size = le64_to_cpu(attr->nres.alloc_size); + if (end > alloc_size) +@@ -2041,8 +2096,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + if (vbo + dlen >= end) + flags |= FIEMAP_EXTENT_LAST; + +- err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, +- flags); ++ err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, dlen, ++ flags); ++ + if (err < 0) + break; + if (err == 1) { +@@ -2062,7 +2118,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + if (vbo + bytes >= end) + flags |= FIEMAP_EXTENT_LAST; + +- err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); ++ err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, bytes, ++ flags); + if (err < 0) + break; + if (err == 1) { +@@ -2075,7 +2132,19 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + + up_read(run_lock); + ++ /* ++ * Copy to user memory out of lock ++ */ ++ if (copy_to_user(fe_u, fe_k, ++ fieinfo->fi_extents_max * ++ sizeof(struct fiemap_extent))) { ++ err = -EFAULT; ++ } ++ + out: ++ /* Restore original pointer. */ ++ fieinfo->fi_extents_start = fe_u; ++ kfree(fe_k); + return err; + } + +diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c +index 0f1493e0f6d059..254f6359b287fa 100644 +--- a/fs/quota/quota_tree.c ++++ b/fs/quota/quota_tree.c +@@ -21,6 +21,12 @@ MODULE_AUTHOR("Jan Kara"); + MODULE_DESCRIPTION("Quota trie support"); + MODULE_LICENSE("GPL"); + ++/* ++ * Maximum quota tree depth we support. Only to limit recursion when working ++ * with the tree. ++ */ ++#define MAX_QTREE_DEPTH 6 ++ + #define __QUOTA_QT_PARANOIA + + static int __get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth) +@@ -327,27 +333,36 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, + + /* Insert reference to structure into the trie */ + static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, +- uint *treeblk, int depth) ++ uint *blks, int depth) + { + char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS); + int ret = 0, newson = 0, newact = 0; + __le32 *ref; + uint newblk; ++ int i; + + if (!buf) + return -ENOMEM; +- if (!*treeblk) { ++ if (!blks[depth]) { + ret = get_free_dqblk(info); + if (ret < 0) + goto out_buf; +- *treeblk = ret; ++ for (i = 0; i < depth; i++) ++ if (ret == blks[i]) { ++ quota_error(dquot->dq_sb, ++ "Free block already used in tree: block %u", ++ ret); ++ ret = -EIO; ++ goto out_buf; ++ } ++ blks[depth] = ret; + memset(buf, 0, info->dqi_usable_bs); + newact = 1; + } else { +- ret = read_blk(info, *treeblk, buf); ++ ret = read_blk(info, blks[depth], buf); + if (ret < 0) { + quota_error(dquot->dq_sb, "Can't read tree quota " +- "block %u", *treeblk); ++ "block %u", blks[depth]); + goto out_buf; + } + } +@@ -357,8 +372,20 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, + info->dqi_blocks - 1); + if (ret) + goto out_buf; +- if (!newblk) ++ if (!newblk) { + newson = 1; ++ } else { ++ for (i = 0; i <= depth; i++) ++ if (newblk == blks[i]) { ++ quota_error(dquot->dq_sb, ++ "Cycle in quota tree detected: block %u index %u", ++ blks[depth], ++ get_index(info, dquot->dq_id, depth)); ++ ret = -EIO; ++ goto out_buf; ++ } ++ } ++ blks[depth + 1] = newblk; + if (depth == info->dqi_qtree_depth - 1) { + #ifdef __QUOTA_QT_PARANOIA + if (newblk) { +@@ -370,16 +397,16 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, + goto out_buf; + } + #endif +- newblk = find_free_dqentry(info, dquot, &ret); ++ blks[depth + 1] = find_free_dqentry(info, dquot, &ret); + } else { +- ret = do_insert_tree(info, dquot, &newblk, depth+1); ++ ret = do_insert_tree(info, dquot, blks, depth + 1); + } + if (newson && ret >= 0) { + ref[get_index(info, dquot->dq_id, depth)] = +- cpu_to_le32(newblk); +- ret = write_blk(info, *treeblk, buf); ++ cpu_to_le32(blks[depth + 1]); ++ ret = write_blk(info, blks[depth], buf); + } else if (newact && ret < 0) { +- put_free_dqblk(info, buf, *treeblk); ++ put_free_dqblk(info, buf, blks[depth]); + } + out_buf: + kfree(buf); +@@ -390,7 +417,7 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, + static inline int dq_insert_tree(struct qtree_mem_dqinfo *info, + struct dquot *dquot) + { +- int tmp = QT_TREEOFF; ++ uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF }; + + #ifdef __QUOTA_QT_PARANOIA + if (info->dqi_blocks <= QT_TREEOFF) { +@@ -398,7 +425,11 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info, + return -EIO; + } + #endif +- return do_insert_tree(info, dquot, &tmp, 0); ++ if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) { ++ quota_error(dquot->dq_sb, "Quota tree depth too big!"); ++ return -EIO; ++ } ++ return do_insert_tree(info, dquot, blks, 0); + } + + /* +@@ -511,19 +542,20 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, + + /* Remove reference to dquot from tree */ + static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, +- uint *blk, int depth) ++ uint *blks, int depth) + { + char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS); + int ret = 0; + uint newblk; + __le32 *ref = (__le32 *)buf; ++ int i; + + if (!buf) + return -ENOMEM; +- ret = read_blk(info, *blk, buf); ++ ret = read_blk(info, blks[depth], buf); + if (ret < 0) { + quota_error(dquot->dq_sb, "Can't read quota data block %u", +- *blk); ++ blks[depth]); + goto out_buf; + } + newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); +@@ -532,29 +564,38 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, + if (ret) + goto out_buf; + ++ for (i = 0; i <= depth; i++) ++ if (newblk == blks[i]) { ++ quota_error(dquot->dq_sb, ++ "Cycle in quota tree detected: block %u index %u", ++ blks[depth], ++ get_index(info, dquot->dq_id, depth)); ++ ret = -EIO; ++ goto out_buf; ++ } + if (depth == info->dqi_qtree_depth - 1) { + ret = free_dqentry(info, dquot, newblk); +- newblk = 0; ++ blks[depth + 1] = 0; + } else { +- ret = remove_tree(info, dquot, &newblk, depth+1); ++ blks[depth + 1] = newblk; ++ ret = remove_tree(info, dquot, blks, depth + 1); + } +- if (ret >= 0 && !newblk) { +- int i; ++ if (ret >= 0 && !blks[depth + 1]) { + ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0); + /* Block got empty? */ + for (i = 0; i < (info->dqi_usable_bs >> 2) && !ref[i]; i++) + ; + /* Don't put the root block into the free block list */ + if (i == (info->dqi_usable_bs >> 2) +- && *blk != QT_TREEOFF) { +- put_free_dqblk(info, buf, *blk); +- *blk = 0; ++ && blks[depth] != QT_TREEOFF) { ++ put_free_dqblk(info, buf, blks[depth]); ++ blks[depth] = 0; + } else { +- ret = write_blk(info, *blk, buf); ++ ret = write_blk(info, blks[depth], buf); + if (ret < 0) + quota_error(dquot->dq_sb, + "Can't write quota tree block %u", +- *blk); ++ blks[depth]); + } + } + out_buf: +@@ -565,11 +606,15 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, + /* Delete dquot from tree */ + int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) + { +- uint tmp = QT_TREEOFF; ++ uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF }; + + if (!dquot->dq_off) /* Even not allocated? */ + return 0; +- return remove_tree(info, dquot, &tmp, 0); ++ if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) { ++ quota_error(dquot->dq_sb, "Quota tree depth too big!"); ++ return -EIO; ++ } ++ return remove_tree(info, dquot, blks, 0); + } + EXPORT_SYMBOL(qtree_delete_dquot); + +@@ -613,18 +658,20 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, + + /* Find entry for given id in the tree */ + static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, +- struct dquot *dquot, uint blk, int depth) ++ struct dquot *dquot, uint *blks, int depth) + { + char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS); + loff_t ret = 0; + __le32 *ref = (__le32 *)buf; ++ uint blk; ++ int i; + + if (!buf) + return -ENOMEM; +- ret = read_blk(info, blk, buf); ++ ret = read_blk(info, blks[depth], buf); + if (ret < 0) { + quota_error(dquot->dq_sb, "Can't read quota tree block %u", +- blk); ++ blks[depth]); + goto out_buf; + } + ret = 0; +@@ -636,8 +683,19 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, + if (ret) + goto out_buf; + ++ /* Check for cycles in the tree */ ++ for (i = 0; i <= depth; i++) ++ if (blk == blks[i]) { ++ quota_error(dquot->dq_sb, ++ "Cycle in quota tree detected: block %u index %u", ++ blks[depth], ++ get_index(info, dquot->dq_id, depth)); ++ ret = -EIO; ++ goto out_buf; ++ } ++ blks[depth + 1] = blk; + if (depth < info->dqi_qtree_depth - 1) +- ret = find_tree_dqentry(info, dquot, blk, depth+1); ++ ret = find_tree_dqentry(info, dquot, blks, depth + 1); + else + ret = find_block_dqentry(info, dquot, blk); + out_buf: +@@ -649,7 +707,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, + static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info, + struct dquot *dquot) + { +- return find_tree_dqentry(info, dquot, QT_TREEOFF, 0); ++ uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF }; ++ ++ if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) { ++ quota_error(dquot->dq_sb, "Quota tree depth too big!"); ++ return -EIO; ++ } ++ return find_tree_dqentry(info, dquot, blks, 0); + } + + int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) +diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c +index ae99e7b88205b2..7978ab671e0c6a 100644 +--- a/fs/quota/quota_v2.c ++++ b/fs/quota/quota_v2.c +@@ -166,14 +166,17 @@ static int v2_read_file_info(struct super_block *sb, int type) + i_size_read(sb_dqopt(sb)->files[type])); + goto out_free; + } +- if (qinfo->dqi_free_blk >= qinfo->dqi_blocks) { +- quota_error(sb, "Free block number too big (%u >= %u).", +- qinfo->dqi_free_blk, qinfo->dqi_blocks); ++ if (qinfo->dqi_free_blk && (qinfo->dqi_free_blk <= QT_TREEOFF || ++ qinfo->dqi_free_blk >= qinfo->dqi_blocks)) { ++ quota_error(sb, "Free block number %u out of range (%u, %u).", ++ qinfo->dqi_free_blk, QT_TREEOFF, qinfo->dqi_blocks); + goto out_free; + } +- if (qinfo->dqi_free_entry >= qinfo->dqi_blocks) { +- quota_error(sb, "Block with free entry too big (%u >= %u).", +- qinfo->dqi_free_entry, qinfo->dqi_blocks); ++ if (qinfo->dqi_free_entry && (qinfo->dqi_free_entry <= QT_TREEOFF || ++ qinfo->dqi_free_entry >= qinfo->dqi_blocks)) { ++ quota_error(sb, "Block with free entry %u out of range (%u, %u).", ++ qinfo->dqi_free_entry, QT_TREEOFF, ++ qinfo->dqi_blocks); + goto out_free; + } + ret = 0; +diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c +index 3676e02a0232a4..4ab8cab6ea6147 100644 +--- a/fs/reiserfs/stree.c ++++ b/fs/reiserfs/stree.c +@@ -1407,7 +1407,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, + INITIALIZE_PATH(path); + int item_len = 0; + int tb_init = 0; +- struct cpu_key cpu_key; ++ struct cpu_key cpu_key = {}; + int retval; + int quota_cut_bytes = 0; + +diff --git a/fs/romfs/super.c b/fs/romfs/super.c +index 5c35f6c760377e..b1bdfbc211c3c0 100644 +--- a/fs/romfs/super.c ++++ b/fs/romfs/super.c +@@ -593,7 +593,7 @@ static void romfs_kill_sb(struct super_block *sb) + #ifdef CONFIG_ROMFS_ON_BLOCK + if (sb->s_bdev) { + sync_blockdev(sb->s_bdev); +- blkdev_put(sb->s_bdev, sb); ++ bdev_release(sb->s_bdev_handle); + } + #endif + } +diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c +index 581ce951933901..2dc730800f448d 100644 +--- a/fs/squashfs/block.c ++++ b/fs/squashfs/block.c +@@ -321,7 +321,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, + TRACE("Block @ 0x%llx, %scompressed size %d\n", index - 2, + compressed ? "" : "un", length); + } +- if (length < 0 || length > output->length || ++ if (length <= 0 || length > output->length || + (index + length) > msblk->bytes_used) { + res = -EIO; + goto out; +diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c +index 8ba8c4c5077078..e8df6430444b01 100644 +--- a/fs/squashfs/file.c ++++ b/fs/squashfs/file.c +@@ -544,7 +544,8 @@ static void squashfs_readahead(struct readahead_control *ractl) + struct squashfs_page_actor *actor; + unsigned int nr_pages = 0; + struct page **pages; +- int i, file_end = i_size_read(inode) >> msblk->block_log; ++ int i; ++ loff_t file_end = i_size_read(inode) >> msblk->block_log; + unsigned int max_pages = 1UL << shift; + + readahead_expand(ractl, start, (len | mask) + 1); +diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c +index f1ccad519e28cc..763a3f7a75f6dd 100644 +--- a/fs/squashfs/file_direct.c ++++ b/fs/squashfs/file_direct.c +@@ -26,10 +26,10 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, + struct inode *inode = target_page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + +- int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT; ++ loff_t file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT; + int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; +- int start_index = target_page->index & ~mask; +- int end_index = start_index | mask; ++ loff_t start_index = target_page->index & ~mask; ++ loff_t end_index = start_index | mask; + int i, n, pages, bytes, res = -ENOMEM; + struct page **page; + struct squashfs_page_actor *actor; +diff --git a/fs/super.c b/fs/super.c +index 576abb1ff0403d..b142e71eb8dfdd 100644 +--- a/fs/super.c ++++ b/fs/super.c +@@ -1490,14 +1490,16 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, + struct fs_context *fc) + { + blk_mode_t mode = sb_open_mode(sb_flags); ++ struct bdev_handle *bdev_handle; + struct block_device *bdev; + +- bdev = blkdev_get_by_dev(sb->s_dev, mode, sb, &fs_holder_ops); +- if (IS_ERR(bdev)) { ++ bdev_handle = bdev_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops); ++ if (IS_ERR(bdev_handle)) { + if (fc) + errorf(fc, "%s: Can't open blockdev", fc->source); +- return PTR_ERR(bdev); ++ return PTR_ERR(bdev_handle); + } ++ bdev = bdev_handle->bdev; + + /* + * This really should be in blkdev_get_by_dev, but right now can't due +@@ -1505,7 +1507,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, + * writable from userspace even for a read-only block device. + */ + if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { +- blkdev_put(bdev, sb); ++ bdev_release(bdev_handle); + return -EACCES; + } + +@@ -1521,10 +1523,11 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, + mutex_unlock(&bdev->bd_fsfreeze_mutex); + if (fc) + warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); +- blkdev_put(bdev, sb); ++ bdev_release(bdev_handle); + return -EBUSY; + } + spin_lock(&sb_lock); ++ sb->s_bdev_handle = bdev_handle; + sb->s_bdev = bdev; + sb->s_bdi = bdi_get(bdev->bd_disk->bdi); + if (bdev_stable_writes(bdev)) +@@ -1657,7 +1660,7 @@ void kill_block_super(struct super_block *sb) + generic_shutdown_super(sb); + if (bdev) { + sync_blockdev(bdev); +- blkdev_put(bdev, sb); ++ bdev_release(sb->s_bdev_handle); + } + } + +diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h +index 265da00a1a8b1b..6eefe5153a6ff7 100644 +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -543,6 +543,10 @@ struct cgroup_root { + /* Unique id for this hierarchy. */ + int hierarchy_id; + ++ /* A list running through the active hierarchies */ ++ struct list_head root_list; ++ struct rcu_head rcu; /* Must be near the top */ ++ + /* + * The root cgroup. The containing cgroup_root will be destroyed on its + * release. cgrp->ancestors[0] will be used overflowing into the +@@ -556,9 +560,6 @@ struct cgroup_root { + /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ + atomic_t nr_cgrps; + +- /* A list running through the active hierarchies */ +- struct list_head root_list; +- + /* Hierarchy-specific flags */ + unsigned int flags; + +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 56dce38c478627..43e640fb4a7f77 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1036,7 +1036,7 @@ struct file_handle { + __u32 handle_bytes; + int handle_type; + /* file identifier */ +- unsigned char f_handle[]; ++ unsigned char f_handle[] __counted_by(handle_bytes); + }; + + static inline struct file *get_file(struct file *f) +@@ -1223,6 +1223,7 @@ struct super_block { + struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ + struct list_head s_mounts; /* list of mounts; _not_ for fs use */ + struct block_device *s_bdev; ++ struct bdev_handle *s_bdev_handle; + struct backing_dev_info *s_bdi; + struct mtd_info *s_mtd; + struct hlist_node s_instances; +diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h +index bae5e2369b4f7a..1c1a5d926b1713 100644 +--- a/include/linux/sockptr.h ++++ b/include/linux/sockptr.h +@@ -50,11 +50,36 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, + return 0; + } + ++/* Deprecated. ++ * This is unsafe, unless caller checked user provided optlen. ++ * Prefer copy_safe_from_sockptr() instead. ++ */ + static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) + { + return copy_from_sockptr_offset(dst, src, 0, size); + } + ++/** ++ * copy_safe_from_sockptr: copy a struct from sockptr ++ * @dst: Destination address, in kernel space. This buffer must be @ksize ++ * bytes long. ++ * @ksize: Size of @dst struct. ++ * @optval: Source address. (in user or kernel space) ++ * @optlen: Size of @optval data. ++ * ++ * Returns: ++ * * -EINVAL: @optlen < @ksize ++ * * -EFAULT: access to userspace failed. ++ * * 0 : @ksize bytes were copied ++ */ ++static inline int copy_safe_from_sockptr(void *dst, size_t ksize, ++ sockptr_t optval, unsigned int optlen) ++{ ++ if (optlen < ksize) ++ return -EINVAL; ++ return copy_from_sockptr(dst, optval, ksize); ++} ++ + static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, + const void *src, size_t size) + { +diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h +index dbf5b21feafe48..3d8b215f32d5b0 100644 +--- a/include/linux/sunrpc/svc.h ++++ b/include/linux/sunrpc/svc.h +@@ -336,7 +336,6 @@ struct svc_program { + const struct svc_version **pg_vers; /* version array */ + char * pg_name; /* service name */ + char * pg_class; /* class name: services sharing authentication */ +- struct svc_stat * pg_stats; /* rpc statistics */ + enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); + __be32 (*pg_init_request)(struct svc_rqst *, + const struct svc_program *, +@@ -408,7 +407,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, + void svc_rqst_release_pages(struct svc_rqst *rqstp); + void svc_rqst_free(struct svc_rqst *); + void svc_exit_thread(struct svc_rqst *); +-struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, ++struct svc_serv * svc_create_pooled(struct svc_program *prog, ++ struct svc_stat *stats, ++ unsigned int bufsize, + int (*threadfn)(void *data)); + int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); + int svc_pool_stats_open(struct svc_serv *serv, struct file *file); +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index fb09fd1767f289..ba6e346c8d669a 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -77,12 +77,29 @@ struct bpf_insn { + __s32 imm; /* signed immediate constant */ + }; + +-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ ++/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for ++ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for ++ * the trailing flexible array member) instead. ++ */ + struct bpf_lpm_trie_key { + __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ + __u8 data[0]; /* Arbitrary size */ + }; + ++/* Header for bpf_lpm_trie_key structs */ ++struct bpf_lpm_trie_key_hdr { ++ __u32 prefixlen; ++}; ++ ++/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */ ++struct bpf_lpm_trie_key_u8 { ++ union { ++ struct bpf_lpm_trie_key_hdr hdr; ++ __u32 prefixlen; ++ }; ++ __u8 data[]; /* Arbitrary size */ ++}; ++ + struct bpf_cgroup_storage_key { + __u64 cgroup_inode_id; /* cgroup inode id */ + __u32 attach_type; /* program attach type (enum bpf_attach_type) */ +diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c +index b32be680da6cdc..d0febf07051edf 100644 +--- a/kernel/bpf/lpm_trie.c ++++ b/kernel/bpf/lpm_trie.c +@@ -164,13 +164,13 @@ static inline int extract_bit(const u8 *data, size_t index) + */ + static size_t longest_prefix_match(const struct lpm_trie *trie, + const struct lpm_trie_node *node, +- const struct bpf_lpm_trie_key *key) ++ const struct bpf_lpm_trie_key_u8 *key) + { + u32 limit = min(node->prefixlen, key->prefixlen); + u32 prefixlen = 0, i = 0; + + BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32)); +- BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32)); ++ BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key_u8, data) % sizeof(u32)); + + #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT) + +@@ -229,7 +229,7 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) + { + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + struct lpm_trie_node *node, *found = NULL; +- struct bpf_lpm_trie_key *key = _key; ++ struct bpf_lpm_trie_key_u8 *key = _key; + + if (key->prefixlen > trie->max_prefixlen) + return NULL; +@@ -308,8 +308,9 @@ static long trie_update_elem(struct bpf_map *map, + { + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL; ++ struct lpm_trie_node *free_node = NULL; + struct lpm_trie_node __rcu **slot; +- struct bpf_lpm_trie_key *key = _key; ++ struct bpf_lpm_trie_key_u8 *key = _key; + unsigned long irq_flags; + unsigned int next_bit; + size_t matchlen = 0; +@@ -382,7 +383,7 @@ static long trie_update_elem(struct bpf_map *map, + trie->n_entries--; + + rcu_assign_pointer(*slot, new_node); +- kfree_rcu(node, rcu); ++ free_node = node; + + goto out; + } +@@ -429,6 +430,7 @@ static long trie_update_elem(struct bpf_map *map, + } + + spin_unlock_irqrestore(&trie->lock, irq_flags); ++ kfree_rcu(free_node, rcu); + + return ret; + } +@@ -437,7 +439,8 @@ static long trie_update_elem(struct bpf_map *map, + static long trie_delete_elem(struct bpf_map *map, void *_key) + { + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); +- struct bpf_lpm_trie_key *key = _key; ++ struct lpm_trie_node *free_node = NULL, *free_parent = NULL; ++ struct bpf_lpm_trie_key_u8 *key = _key; + struct lpm_trie_node __rcu **trim, **trim2; + struct lpm_trie_node *node, *parent; + unsigned long irq_flags; +@@ -506,8 +509,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key) + else + rcu_assign_pointer( + *trim2, rcu_access_pointer(parent->child[0])); +- kfree_rcu(parent, rcu); +- kfree_rcu(node, rcu); ++ free_parent = parent; ++ free_node = node; + goto out; + } + +@@ -521,10 +524,12 @@ static long trie_delete_elem(struct bpf_map *map, void *_key) + rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1])); + else + RCU_INIT_POINTER(*trim, NULL); +- kfree_rcu(node, rcu); ++ free_node = node; + + out: + spin_unlock_irqrestore(&trie->lock, irq_flags); ++ kfree_rcu(free_parent, rcu); ++ kfree_rcu(free_node, rcu); + + return ret; + } +@@ -536,7 +541,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key) + sizeof(struct lpm_trie_node)) + #define LPM_VAL_SIZE_MIN 1 + +-#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key) + (X)) ++#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key_u8) + (X)) + #define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX) + #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) + +@@ -565,7 +570,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) + /* copy mandatory map attributes */ + bpf_map_init_from_attr(&trie->map, attr); + trie->data_size = attr->key_size - +- offsetof(struct bpf_lpm_trie_key, data); ++ offsetof(struct bpf_lpm_trie_key_u8, data); + trie->max_prefixlen = trie->data_size * 8; + + spin_lock_init(&trie->lock); +@@ -616,7 +621,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) + { + struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root; + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); +- struct bpf_lpm_trie_key *key = _key, *next_key = _next_key; ++ struct bpf_lpm_trie_key_u8 *key = _key, *next_key = _next_key; + struct lpm_trie_node **node_stack = NULL; + int err = 0, stack_ptr = -1; + unsigned int next_bit; +@@ -703,7 +708,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) + } + do_copy: + next_key->prefixlen = next_node->prefixlen; +- memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data), ++ memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key_u8, data), + next_node->data, trie->data_size); + free_stack: + kfree(node_stack); +@@ -715,7 +720,7 @@ static int trie_check_btf(const struct bpf_map *map, + const struct btf_type *key_type, + const struct btf_type *value_type) + { +- /* Keys must have struct bpf_lpm_trie_key embedded. */ ++ /* Keys must have struct bpf_lpm_trie_key_u8 embedded. */ + return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ? + -EINVAL : 0; + } +diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h +index c56071f150f2ae..5e17f01ced9fd2 100644 +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -170,7 +170,8 @@ extern struct list_head cgroup_roots; + + /* iterate across the hierarchies */ + #define for_each_root(root) \ +- list_for_each_entry((root), &cgroup_roots, root_list) ++ list_for_each_entry_rcu((root), &cgroup_roots, root_list, \ ++ lockdep_is_held(&cgroup_mutex)) + + /** + * for_each_subsys - iterate all enabled cgroup subsystems +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index 094f513319259d..d872fff901073f 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -1313,7 +1313,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root) + + void cgroup_free_root(struct cgroup_root *root) + { +- kfree(root); ++ kfree_rcu(root, rcu); + } + + static void cgroup_destroy_root(struct cgroup_root *root) +@@ -1346,7 +1346,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) + spin_unlock_irq(&css_set_lock); + + if (!list_empty(&root->root_list)) { +- list_del(&root->root_list); ++ list_del_rcu(&root->root_list); + cgroup_root_count--; + } + +@@ -1386,7 +1386,15 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, + } + } + +- BUG_ON(!res_cgroup); ++ /* ++ * If cgroup_mutex is not held, the cgrp_cset_link will be freed ++ * before we remove the cgroup root from the root_list. Consequently, ++ * when accessing a cgroup root, the cset_link may have already been ++ * freed, resulting in a NULL res_cgroup. However, by holding the ++ * cgroup_mutex, we ensure that res_cgroup can't be NULL. ++ * If we don't hold cgroup_mutex in the caller, we must do the NULL ++ * check. ++ */ + return res_cgroup; + } + +@@ -1445,7 +1453,6 @@ static struct cgroup *current_cgns_cgroup_dfl(void) + static struct cgroup *cset_cgroup_from_root(struct css_set *cset, + struct cgroup_root *root) + { +- lockdep_assert_held(&cgroup_mutex); + lockdep_assert_held(&css_set_lock); + + return __cset_cgroup_from_root(cset, root); +@@ -1453,7 +1460,9 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset, + + /* + * Return the cgroup for "task" from the given hierarchy. Must be +- * called with cgroup_mutex and css_set_lock held. ++ * called with css_set_lock held to prevent task's groups from being modified. ++ * Must be called with either cgroup_mutex or rcu read lock to prevent the ++ * cgroup root from being destroyed. + */ + struct cgroup *task_cgroup_from_root(struct task_struct *task, + struct cgroup_root *root) +@@ -2014,7 +2023,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx) + struct cgroup_root *root = ctx->root; + struct cgroup *cgrp = &root->cgrp; + +- INIT_LIST_HEAD(&root->root_list); ++ INIT_LIST_HEAD_RCU(&root->root_list); + atomic_set(&root->nr_cgrps, 1); + cgrp->root = root; + init_cgroup_housekeeping(cgrp); +@@ -2097,7 +2106,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) + * care of subsystems' refcounts, which are explicitly dropped in + * the failure exit path. + */ +- list_add(&root->root_list, &cgroup_roots); ++ list_add_rcu(&root->root_list, &cgroup_roots); + cgroup_root_count++; + + /* +diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c +index 5ecd072a34fe72..eb86283901565b 100644 +--- a/kernel/irq/cpuhotplug.c ++++ b/kernel/irq/cpuhotplug.c +@@ -130,6 +130,22 @@ static bool migrate_one_irq(struct irq_desc *desc) + * CPU. + */ + err = irq_do_set_affinity(d, affinity, false); ++ ++ /* ++ * If there are online CPUs in the affinity mask, but they have no ++ * vectors left to make the migration work, try to break the ++ * affinity by migrating to any online CPU. ++ */ ++ if (err == -ENOSPC && !irqd_affinity_is_managed(d) && affinity != cpu_online_mask) { ++ pr_debug("IRQ%u: set affinity failed for %*pbl, re-try with online CPUs\n", ++ d->irq, cpumask_pr_args(affinity)); ++ ++ affinity = cpu_online_mask; ++ brokeaff = true; ++ ++ err = irq_do_set_affinity(d, affinity, false); ++ } ++ + if (err) { + pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", + d->irq, err); +@@ -195,10 +211,15 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) + !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) + return; + +- if (irqd_is_managed_and_shutdown(data)) { +- irq_startup(desc, IRQ_RESEND, IRQ_START_COND); ++ /* ++ * Don't restore suspended interrupts here when a system comes back ++ * from S3. They are reenabled via resume_device_irqs(). ++ */ ++ if (desc->istate & IRQS_SUSPENDED) + return; +- } ++ ++ if (irqd_is_managed_and_shutdown(data)) ++ irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + + /* + * If the interrupt can only be directed to a single target +diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c +index a054cd5ec08bce..8a936c1ffad390 100644 +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -796,10 +796,14 @@ void __enable_irq(struct irq_desc *desc) + irq_settings_set_noprobe(desc); + /* + * Call irq_startup() not irq_enable() here because the +- * interrupt might be marked NOAUTOEN. So irq_startup() +- * needs to be invoked when it gets enabled the first +- * time. If it was already started up, then irq_startup() +- * will invoke irq_enable() under the hood. ++ * interrupt might be marked NOAUTOEN so irq_startup() ++ * needs to be invoked when it gets enabled the first time. ++ * This is also required when __enable_irq() is invoked for ++ * a managed and shutdown interrupt from the S3 resume ++ * path. ++ * ++ * If it was already started up, then irq_startup() will ++ * invoke irq_enable() under the hood. + */ + irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); + break; +diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c +index 13f0d11927074a..68af76ca8bc992 100644 +--- a/mm/debug_vm_pgtable.c ++++ b/mm/debug_vm_pgtable.c +@@ -39,22 +39,7 @@ + * Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics + * expectations that are being validated here. All future changes in here + * or the documentation need to be in sync. +- * +- * On s390 platform, the lower 4 bits are used to identify given page table +- * entry type. But these bits might affect the ability to clear entries with +- * pxx_clear() because of how dynamic page table folding works on s390. So +- * while loading up the entries do not change the lower 4 bits. It does not +- * have affect any other platform. Also avoid the 62nd bit on ppc64 that is +- * used to mark a pte entry. + */ +-#define S390_SKIP_MASK GENMASK(3, 0) +-#if __BITS_PER_LONG == 64 +-#define PPC64_SKIP_MASK GENMASK(62, 62) +-#else +-#define PPC64_SKIP_MASK 0x0 +-#endif +-#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK) +-#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK) + #define RANDOM_NZVALUE GENMASK(7, 0) + + struct pgtable_debug_args { +@@ -510,8 +495,7 @@ static void __init pud_clear_tests(struct pgtable_debug_args *args) + return; + + pr_debug("Validating PUD clear\n"); +- pud = __pud(pud_val(pud) | RANDOM_ORVALUE); +- WRITE_ONCE(*args->pudp, pud); ++ WARN_ON(pud_none(pud)); + pud_clear(args->pudp); + pud = READ_ONCE(*args->pudp); + WARN_ON(!pud_none(pud)); +@@ -547,8 +531,7 @@ static void __init p4d_clear_tests(struct pgtable_debug_args *args) + return; + + pr_debug("Validating P4D clear\n"); +- p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); +- WRITE_ONCE(*args->p4dp, p4d); ++ WARN_ON(p4d_none(p4d)); + p4d_clear(args->p4dp); + p4d = READ_ONCE(*args->p4dp); + WARN_ON(!p4d_none(p4d)); +@@ -581,8 +564,7 @@ static void __init pgd_clear_tests(struct pgtable_debug_args *args) + return; + + pr_debug("Validating PGD clear\n"); +- pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); +- WRITE_ONCE(*args->pgdp, pgd); ++ WARN_ON(pgd_none(pgd)); + pgd_clear(args->pgdp); + pgd = READ_ONCE(*args->pgdp); + WARN_ON(!pgd_none(pgd)); +@@ -633,10 +615,8 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args) + if (WARN_ON(!args->ptep)) + return; + +-#ifndef CONFIG_RISCV +- pte = __pte(pte_val(pte) | RANDOM_ORVALUE); +-#endif + set_pte_at(args->mm, args->vaddr, args->ptep, pte); ++ WARN_ON(pte_none(pte)); + flush_dcache_page(page); + barrier(); + ptep_clear(args->mm, args->vaddr, args->ptep); +@@ -649,8 +629,7 @@ static void __init pmd_clear_tests(struct pgtable_debug_args *args) + pmd_t pmd = READ_ONCE(*args->pmdp); + + pr_debug("Validating PMD clear\n"); +- pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); +- WRITE_ONCE(*args->pmdp, pmd); ++ WARN_ON(pmd_none(pmd)); + pmd_clear(args->pmdp); + pmd = READ_ONCE(*args->pmdp); + WARN_ON(!pmd_none(pmd)); +diff --git a/mm/gup.c b/mm/gup.c +index f50fe2219a13b6..fdd75384160d8d 100644 +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -97,95 +97,6 @@ static inline struct folio *try_get_folio(struct page *page, int refs) + return folio; + } + +-/** +- * try_grab_folio() - Attempt to get or pin a folio. +- * @page: pointer to page to be grabbed +- * @refs: the value to (effectively) add to the folio's refcount +- * @flags: gup flags: these are the FOLL_* flag values. +- * +- * "grab" names in this file mean, "look at flags to decide whether to use +- * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. +- * +- * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the +- * same time. (That's true throughout the get_user_pages*() and +- * pin_user_pages*() APIs.) Cases: +- * +- * FOLL_GET: folio's refcount will be incremented by @refs. +- * +- * FOLL_PIN on large folios: folio's refcount will be incremented by +- * @refs, and its pincount will be incremented by @refs. +- * +- * FOLL_PIN on single-page folios: folio's refcount will be incremented by +- * @refs * GUP_PIN_COUNTING_BIAS. +- * +- * Return: The folio containing @page (with refcount appropriately +- * incremented) for success, or NULL upon failure. If neither FOLL_GET +- * nor FOLL_PIN was set, that's considered failure, and furthermore, +- * a likely bug in the caller, so a warning is also emitted. +- */ +-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) +-{ +- struct folio *folio; +- +- if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) +- return NULL; +- +- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) +- return NULL; +- +- if (flags & FOLL_GET) +- return try_get_folio(page, refs); +- +- /* FOLL_PIN is set */ +- +- /* +- * Don't take a pin on the zero page - it's not going anywhere +- * and it is used in a *lot* of places. +- */ +- if (is_zero_page(page)) +- return page_folio(page); +- +- folio = try_get_folio(page, refs); +- if (!folio) +- return NULL; +- +- /* +- * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a +- * right zone, so fail and let the caller fall back to the slow +- * path. +- */ +- if (unlikely((flags & FOLL_LONGTERM) && +- !folio_is_longterm_pinnable(folio))) { +- if (!put_devmap_managed_page_refs(&folio->page, refs)) +- folio_put_refs(folio, refs); +- return NULL; +- } +- +- /* +- * When pinning a large folio, use an exact count to track it. +- * +- * However, be sure to *also* increment the normal folio +- * refcount field at least once, so that the folio really +- * is pinned. That's why the refcount from the earlier +- * try_get_folio() is left intact. +- */ +- if (folio_test_large(folio)) +- atomic_add(refs, &folio->_pincount); +- else +- folio_ref_add(folio, +- refs * (GUP_PIN_COUNTING_BIAS - 1)); +- /* +- * Adjust the pincount before re-checking the PTE for changes. +- * This is essentially a smp_mb() and is paired with a memory +- * barrier in page_try_share_anon_rmap(). +- */ +- smp_mb__after_atomic(); +- +- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); +- +- return folio; +-} +- + static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) + { + if (flags & FOLL_PIN) { +@@ -203,58 +114,59 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) + } + + /** +- * try_grab_page() - elevate a page's refcount by a flag-dependent amount +- * @page: pointer to page to be grabbed +- * @flags: gup flags: these are the FOLL_* flag values. ++ * try_grab_folio() - add a folio's refcount by a flag-dependent amount ++ * @folio: pointer to folio to be grabbed ++ * @refs: the value to (effectively) add to the folio's refcount ++ * @flags: gup flags: these are the FOLL_* flag values + * + * This might not do anything at all, depending on the flags argument. + * + * "grab" names in this file mean, "look at flags to decide whether to use +- * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. ++ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. + * + * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same +- * time. Cases: please see the try_grab_folio() documentation, with +- * "refs=1". ++ * time. + * + * Return: 0 for success, or if no action was required (if neither FOLL_PIN + * nor FOLL_GET was set, nothing is done). A negative error code for failure: + * +- * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not ++ * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not + * be grabbed. ++ * ++ * It is called when we have a stable reference for the folio, typically in ++ * GUP slow path. + */ +-int __must_check try_grab_page(struct page *page, unsigned int flags) ++int __must_check try_grab_folio(struct folio *folio, int refs, ++ unsigned int flags) + { +- struct folio *folio = page_folio(page); +- + if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) + return -ENOMEM; + +- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) ++ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page))) + return -EREMOTEIO; + + if (flags & FOLL_GET) +- folio_ref_inc(folio); ++ folio_ref_add(folio, refs); + else if (flags & FOLL_PIN) { + /* + * Don't take a pin on the zero page - it's not going anywhere + * and it is used in a *lot* of places. + */ +- if (is_zero_page(page)) ++ if (is_zero_folio(folio)) + return 0; + + /* +- * Similar to try_grab_folio(): be sure to *also* +- * increment the normal page refcount field at least once, ++ * Increment the normal page refcount field at least once, + * so that the page really is pinned. + */ + if (folio_test_large(folio)) { +- folio_ref_add(folio, 1); +- atomic_add(1, &folio->_pincount); ++ folio_ref_add(folio, refs); ++ atomic_add(refs, &folio->_pincount); + } else { +- folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); ++ folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS); + } + +- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); ++ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); + } + + return 0; +@@ -647,8 +559,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, + VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && + !PageAnonExclusive(page), page); + +- /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ +- ret = try_grab_page(page, flags); ++ /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */ ++ ret = try_grab_folio(page_folio(page), 1, flags); + if (unlikely(ret)) { + page = ERR_PTR(ret); + goto out; +@@ -899,7 +811,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, + goto unmap; + *page = pte_page(entry); + } +- ret = try_grab_page(*page, gup_flags); ++ ret = try_grab_folio(page_folio(*page), 1, gup_flags); + if (unlikely(ret)) + goto unmap; + out: +@@ -1302,20 +1214,19 @@ static long __get_user_pages(struct mm_struct *mm, + * pages. + */ + if (page_increm > 1) { +- struct folio *folio; ++ struct folio *folio = page_folio(page); + + /* + * Since we already hold refcount on the + * large folio, this should never fail. + */ +- folio = try_grab_folio(page, page_increm - 1, +- foll_flags); +- if (WARN_ON_ONCE(!folio)) { ++ if (try_grab_folio(folio, page_increm - 1, ++ foll_flags)) { + /* + * Release the 1st page ref if the + * folio is problematic, fail hard. + */ +- gup_put_folio(page_folio(page), 1, ++ gup_put_folio(folio, 1, + foll_flags); + ret = -EFAULT; + goto out; +@@ -2541,6 +2452,102 @@ static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start, + } + } + ++/** ++ * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. ++ * @page: pointer to page to be grabbed ++ * @refs: the value to (effectively) add to the folio's refcount ++ * @flags: gup flags: these are the FOLL_* flag values. ++ * ++ * "grab" names in this file mean, "look at flags to decide whether to use ++ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. ++ * ++ * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the ++ * same time. (That's true throughout the get_user_pages*() and ++ * pin_user_pages*() APIs.) Cases: ++ * ++ * FOLL_GET: folio's refcount will be incremented by @refs. ++ * ++ * FOLL_PIN on large folios: folio's refcount will be incremented by ++ * @refs, and its pincount will be incremented by @refs. ++ * ++ * FOLL_PIN on single-page folios: folio's refcount will be incremented by ++ * @refs * GUP_PIN_COUNTING_BIAS. ++ * ++ * Return: The folio containing @page (with refcount appropriately ++ * incremented) for success, or NULL upon failure. If neither FOLL_GET ++ * nor FOLL_PIN was set, that's considered failure, and furthermore, ++ * a likely bug in the caller, so a warning is also emitted. ++ * ++ * It uses add ref unless zero to elevate the folio refcount and must be called ++ * in fast path only. ++ */ ++static struct folio *try_grab_folio_fast(struct page *page, int refs, ++ unsigned int flags) ++{ ++ struct folio *folio; ++ ++ /* Raise warn if it is not called in fast GUP */ ++ VM_WARN_ON_ONCE(!irqs_disabled()); ++ ++ if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) ++ return NULL; ++ ++ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) ++ return NULL; ++ ++ if (flags & FOLL_GET) ++ return try_get_folio(page, refs); ++ ++ /* FOLL_PIN is set */ ++ ++ /* ++ * Don't take a pin on the zero page - it's not going anywhere ++ * and it is used in a *lot* of places. ++ */ ++ if (is_zero_page(page)) ++ return page_folio(page); ++ ++ folio = try_get_folio(page, refs); ++ if (!folio) ++ return NULL; ++ ++ /* ++ * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a ++ * right zone, so fail and let the caller fall back to the slow ++ * path. ++ */ ++ if (unlikely((flags & FOLL_LONGTERM) && ++ !folio_is_longterm_pinnable(folio))) { ++ if (!put_devmap_managed_page_refs(&folio->page, refs)) ++ folio_put_refs(folio, refs); ++ return NULL; ++ } ++ ++ /* ++ * When pinning a large folio, use an exact count to track it. ++ * ++ * However, be sure to *also* increment the normal folio ++ * refcount field at least once, so that the folio really ++ * is pinned. That's why the refcount from the earlier ++ * try_get_folio() is left intact. ++ */ ++ if (folio_test_large(folio)) ++ atomic_add(refs, &folio->_pincount); ++ else ++ folio_ref_add(folio, ++ refs * (GUP_PIN_COUNTING_BIAS - 1)); ++ /* ++ * Adjust the pincount before re-checking the PTE for changes. ++ * This is essentially a smp_mb() and is paired with a memory ++ * barrier in folio_try_share_anon_rmap_*(). ++ */ ++ smp_mb__after_atomic(); ++ ++ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); ++ ++ return folio; ++} ++ + #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL + /* + * Fast-gup relies on pte change detection to avoid concurrent pgtable +@@ -2605,7 +2612,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + +- folio = try_grab_folio(page, 1, flags); ++ folio = try_grab_folio_fast(page, 1, flags); + if (!folio) + goto pte_unmap; + +@@ -2699,7 +2706,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, + + SetPageReferenced(page); + pages[*nr] = page; +- if (unlikely(try_grab_page(page, flags))) { ++ if (unlikely(try_grab_folio(page_folio(page), 1, flags))) { + undo_dev_pagemap(nr, nr_start, flags, pages); + break; + } +@@ -2808,7 +2815,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, + page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT); + refs = record_subpages(page, addr, end, pages + *nr); + +- folio = try_grab_folio(page, refs, flags); ++ folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + +@@ -2879,7 +2886,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, + page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT); + refs = record_subpages(page, addr, end, pages + *nr); + +- folio = try_grab_folio(page, refs, flags); ++ folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + +@@ -2923,7 +2930,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, + page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT); + refs = record_subpages(page, addr, end, pages + *nr); + +- folio = try_grab_folio(page, refs, flags); ++ folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + +@@ -2963,7 +2970,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, + page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT); + refs = record_subpages(page, addr, end, pages + *nr); + +- folio = try_grab_folio(page, refs, flags); ++ folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 7ac2877e76629b..f2816c9a1f3ec8 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1056,7 +1056,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, + if (!*pgmap) + return ERR_PTR(-EFAULT); + page = pfn_to_page(pfn); +- ret = try_grab_page(page, flags); ++ ret = try_grab_folio(page_folio(page), 1, flags); + if (ret) + page = ERR_PTR(ret); + +@@ -1214,7 +1214,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, + return ERR_PTR(-EFAULT); + page = pfn_to_page(pfn); + +- ret = try_grab_page(page, flags); ++ ret = try_grab_folio(page_folio(page), 1, flags); + if (ret) + page = ERR_PTR(ret); + +@@ -1475,7 +1475,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, + VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && + !PageAnonExclusive(page), page); + +- ret = try_grab_page(page, flags); ++ ret = try_grab_folio(page_folio(page), 1, flags); + if (ret) + return ERR_PTR(ret); + +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index fb7a531fce7174..0acb04c3e95291 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -6532,7 +6532,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, + * try_grab_page() should always be able to get the page here, + * because we hold the ptl lock and have verified pte_present(). + */ +- ret = try_grab_page(page, flags); ++ ret = try_grab_folio(page_folio(page), 1, flags); + + if (WARN_ON_ONCE(ret)) { + page = ERR_PTR(ret); +diff --git a/mm/internal.h b/mm/internal.h +index abed947f784b7b..ef8d787a510c5c 100644 +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -938,8 +938,8 @@ int migrate_device_coherent_page(struct page *page); + /* + * mm/gup.c + */ +-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); +-int __must_check try_grab_page(struct page *page, unsigned int flags); ++int __must_check try_grab_folio(struct folio *folio, int refs, ++ unsigned int flags); + + /* + * mm/huge_memory.c +diff --git a/mm/page_table_check.c b/mm/page_table_check.c +index 6363f93a47c691..509c6ef8de400e 100644 +--- a/mm/page_table_check.c ++++ b/mm/page_table_check.c +@@ -7,6 +7,8 @@ + #include <linux/kstrtox.h> + #include <linux/mm.h> + #include <linux/page_table_check.h> ++#include <linux/swap.h> ++#include <linux/swapops.h> + + #undef pr_fmt + #define pr_fmt(fmt) "page_table_check: " fmt +@@ -191,6 +193,22 @@ void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) + } + EXPORT_SYMBOL(__page_table_check_pud_clear); + ++/* Whether the swap entry cached writable information */ ++static inline bool swap_cached_writable(swp_entry_t entry) ++{ ++ return is_writable_device_exclusive_entry(entry) || ++ is_writable_device_private_entry(entry) || ++ is_writable_migration_entry(entry); ++} ++ ++static inline void page_table_check_pte_flags(pte_t pte) ++{ ++ if (pte_present(pte) && pte_uffd_wp(pte)) ++ WARN_ON_ONCE(pte_write(pte)); ++ else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte)) ++ WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte))); ++} ++ + void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, + unsigned int nr) + { +@@ -199,6 +217,8 @@ void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, + if (&init_mm == mm) + return; + ++ page_table_check_pte_flags(pte); ++ + for (i = 0; i < nr; i++) + __page_table_check_pte_clear(mm, ptep_get(ptep + i)); + if (pte_user_accessible_page(pte)) +@@ -206,11 +226,21 @@ void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, + } + EXPORT_SYMBOL(__page_table_check_ptes_set); + ++static inline void page_table_check_pmd_flags(pmd_t pmd) ++{ ++ if (pmd_present(pmd) && pmd_uffd_wp(pmd)) ++ WARN_ON_ONCE(pmd_write(pmd)); ++ else if (is_swap_pmd(pmd) && pmd_swp_uffd_wp(pmd)) ++ WARN_ON_ONCE(swap_cached_writable(pmd_to_swp_entry(pmd))); ++} ++ + void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd) + { + if (&init_mm == mm) + return; + ++ page_table_check_pmd_flags(pmd); ++ + __page_table_check_pmd_clear(mm, *pmdp); + if (pmd_user_accessible_page(pmd)) { + page_table_check_set(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT, +diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c +index b54e8a530f55a1..29aa07e9db9d71 100644 +--- a/net/bluetooth/rfcomm/sock.c ++++ b/net/bluetooth/rfcomm/sock.c +@@ -629,7 +629,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, + + switch (optname) { + case RFCOMM_LM: +- if (copy_from_sockptr(&opt, optval, sizeof(u32))) { ++ if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) { + err = -EFAULT; + break; + } +@@ -664,7 +664,6 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, + struct sock *sk = sock->sk; + struct bt_security sec; + int err = 0; +- size_t len; + u32 opt; + + BT_DBG("sk %p", sk); +@@ -686,11 +685,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, + + sec.level = BT_SECURITY_LOW; + +- len = min_t(unsigned int, sizeof(sec), optlen); +- if (copy_from_sockptr(&sec, optval, len)) { +- err = -EFAULT; ++ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen); ++ if (err) + break; +- } + + if (sec.level > BT_SECURITY_HIGH) { + err = -EINVAL; +@@ -706,10 +703,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, + break; + } + +- if (copy_from_sockptr(&opt, optval, sizeof(u32))) { +- err = -EFAULT; ++ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); ++ if (err) + break; +- } + + if (opt) + set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); +diff --git a/net/core/filter.c b/net/core/filter.c +index 8cb44cd29967bb..be313928d272c6 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -2271,12 +2271,12 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, + + err = bpf_out_neigh_v6(net, skb, dev, nh); + if (unlikely(net_xmit_eval(err))) +- dev->stats.tx_errors++; ++ DEV_STATS_INC(dev, tx_errors); + else + ret = NET_XMIT_SUCCESS; + goto out_xmit; + out_drop: +- dev->stats.tx_errors++; ++ DEV_STATS_INC(dev, tx_errors); + kfree_skb(skb); + out_xmit: + return ret; +@@ -2378,12 +2378,12 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, + + err = bpf_out_neigh_v4(net, skb, dev, nh); + if (unlikely(net_xmit_eval(err))) +- dev->stats.tx_errors++; ++ DEV_STATS_INC(dev, tx_errors); + else + ret = NET_XMIT_SUCCESS; + goto out_xmit; + out_drop: +- dev->stats.tx_errors++; ++ DEV_STATS_INC(dev, tx_errors); + kfree_skb(skb); + out_xmit: + return ret; +diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c +index 0c41076e31edad..b38b82ae903de0 100644 +--- a/net/ipv4/fou_core.c ++++ b/net/ipv4/fou_core.c +@@ -433,7 +433,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, + + offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; + ops = rcu_dereference(offloads[proto]); +- if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) ++ if (!ops || !ops->callbacks.gro_receive) + goto out; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index b71f94a5932ac0..e0883ba709b0bf 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -899,11 +899,13 @@ static void tcp_metrics_flush_all(struct net *net) + unsigned int row; + + for (row = 0; row < max_rows; row++, hb++) { +- struct tcp_metrics_block __rcu **pp; ++ struct tcp_metrics_block __rcu **pp = &hb->chain; + bool match; + ++ if (!rcu_access_pointer(*pp)) ++ continue; ++ + spin_lock_bh(&tcp_metrics_lock); +- pp = &hb->chain; + for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { + match = net ? net_eq(tm_net(tm), net) : + !refcount_read(&tm_net(tm)->ns.count); +@@ -915,6 +917,7 @@ static void tcp_metrics_flush_all(struct net *net) + } + } + spin_unlock_bh(&tcp_metrics_lock); ++ cond_resched(); + } + } + +diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c +index 6e3bfb46af44d3..52b048807feae5 100644 +--- a/net/mac80211/iface.c ++++ b/net/mac80211/iface.c +@@ -251,9 +251,9 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata + return ret; + } + +-static int ieee80211_change_mac(struct net_device *dev, void *addr) ++static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata, ++ void *addr) + { +- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sockaddr *sa = addr; + bool check_dup = true; +@@ -278,7 +278,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) + + if (live) + drv_remove_interface(local, sdata); +- ret = eth_mac_addr(dev, sa); ++ ret = eth_mac_addr(sdata->dev, sa); + + if (ret == 0) { + memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN); +@@ -294,6 +294,27 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) + return ret; + } + ++static int ieee80211_change_mac(struct net_device *dev, void *addr) ++{ ++ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); ++ struct ieee80211_local *local = sdata->local; ++ int ret; ++ ++ /* ++ * This happens during unregistration if there's a bond device ++ * active (maybe other cases?) and we must get removed from it. ++ * But we really don't care anymore if it's not registered now. ++ */ ++ if (!dev->ieee80211_ptr->registered) ++ return 0; ++ ++ wiphy_lock(local->hw.wiphy); ++ ret = _ieee80211_change_mac(sdata, addr); ++ wiphy_unlock(local->hw.wiphy); ++ ++ return ret; ++} ++ + static inline int identical_mac_addr_allowed(int type1, int type2) + { + return type1 == NL80211_IFTYPE_MONITOR || +diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c +index 819157bbb5a2c6..d5344563e525c9 100644 +--- a/net/nfc/llcp_sock.c ++++ b/net/nfc/llcp_sock.c +@@ -252,10 +252,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, + break; + } + +- if (copy_from_sockptr(&opt, optval, sizeof(u32))) { +- err = -EFAULT; ++ err = copy_safe_from_sockptr(&opt, sizeof(opt), ++ optval, optlen); ++ if (err) + break; +- } + + if (opt > LLCP_MAX_RW) { + err = -EINVAL; +@@ -274,10 +274,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, + break; + } + +- if (copy_from_sockptr(&opt, optval, sizeof(u32))) { +- err = -EFAULT; ++ err = copy_safe_from_sockptr(&opt, sizeof(opt), ++ optval, optlen); ++ if (err) + break; +- } + + if (opt > LLCP_MAX_MIUX) { + err = -EINVAL; +diff --git a/net/rds/recv.c b/net/rds/recv.c +index c71b923764fd7c..5627f80013f8b1 100644 +--- a/net/rds/recv.c ++++ b/net/rds/recv.c +@@ -425,6 +425,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, + struct sock *sk = rds_rs_to_sk(rs); + int ret = 0; + unsigned long flags; ++ struct rds_incoming *to_drop = NULL; + + write_lock_irqsave(&rs->rs_recv_lock, flags); + if (!list_empty(&inc->i_item)) { +@@ -435,11 +436,14 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, + -be32_to_cpu(inc->i_hdr.h_len), + inc->i_hdr.h_dport); + list_del_init(&inc->i_item); +- rds_inc_put(inc); ++ to_drop = inc; + } + } + write_unlock_irqrestore(&rs->rs_recv_lock, flags); + ++ if (to_drop) ++ rds_inc_put(to_drop); ++ + rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop); + return ret; + } +@@ -758,16 +762,21 @@ void rds_clear_recv_queue(struct rds_sock *rs) + struct sock *sk = rds_rs_to_sk(rs); + struct rds_incoming *inc, *tmp; + unsigned long flags; ++ LIST_HEAD(to_drop); + + write_lock_irqsave(&rs->rs_recv_lock, flags); + list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) { + rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, + -be32_to_cpu(inc->i_hdr.h_len), + inc->i_hdr.h_dport); ++ list_move(&inc->i_item, &to_drop); ++ } ++ write_unlock_irqrestore(&rs->rs_recv_lock, flags); ++ ++ list_for_each_entry_safe(inc, tmp, &to_drop, i_item) { + list_del_init(&inc->i_item); + rds_inc_put(inc); + } +- write_unlock_irqrestore(&rs->rs_recv_lock, flags); + } + + /* +diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c +index 4023c955036b12..6ab9359c1706f1 100644 +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t) + + if (unlikely(timedout_ms)) { + trace_net_dev_xmit_timeout(dev, i); +- WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n", +- dev->name, netdev_drivername(dev), i, timedout_ms); ++ netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n", ++ raw_smp_processor_id(), ++ i, timedout_ms); + netif_freeze_queues(dev); + dev->netdev_ops->ndo_tx_timeout(dev, i); + netif_unfreeze_queues(dev); +diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c +index 7182c5a450fb5b..5c165218180588 100644 +--- a/net/sctp/inqueue.c ++++ b/net/sctp/inqueue.c +@@ -38,6 +38,14 @@ void sctp_inq_init(struct sctp_inq *queue) + INIT_WORK(&queue->immediate, NULL); + } + ++/* Properly release the chunk which is being worked on. */ ++static inline void sctp_inq_chunk_free(struct sctp_chunk *chunk) ++{ ++ if (chunk->head_skb) ++ chunk->skb = chunk->head_skb; ++ sctp_chunk_free(chunk); ++} ++ + /* Release the memory associated with an SCTP inqueue. */ + void sctp_inq_free(struct sctp_inq *queue) + { +@@ -53,7 +61,7 @@ void sctp_inq_free(struct sctp_inq *queue) + * free it as well. + */ + if (queue->in_progress) { +- sctp_chunk_free(queue->in_progress); ++ sctp_inq_chunk_free(queue->in_progress); + queue->in_progress = NULL; + } + } +@@ -130,9 +138,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) + goto new_skb; + } + +- if (chunk->head_skb) +- chunk->skb = chunk->head_skb; +- sctp_chunk_free(chunk); ++ sctp_inq_chunk_free(chunk); + chunk = queue->in_progress = NULL; + } else { + /* Nothing to do. Next chunk in the packet, please. */ +diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c +index 65fc1297c6dfa4..383860cb1d5b0f 100644 +--- a/net/sunrpc/stats.c ++++ b/net/sunrpc/stats.c +@@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister); + struct proc_dir_entry * + svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops) + { +- return do_register(net, statp->program->pg_name, statp, proc_ops); ++ return do_register(net, statp->program->pg_name, net, proc_ops); + } + EXPORT_SYMBOL_GPL(svc_proc_register); + +diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c +index 691499d1d2315c..029c49065016ac 100644 +--- a/net/sunrpc/svc.c ++++ b/net/sunrpc/svc.c +@@ -453,8 +453,8 @@ __svc_init_bc(struct svc_serv *serv) + * Create an RPC service + */ + static struct svc_serv * +-__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, +- int (*threadfn)(void *data)) ++__svc_create(struct svc_program *prog, struct svc_stat *stats, ++ unsigned int bufsize, int npools, int (*threadfn)(void *data)) + { + struct svc_serv *serv; + unsigned int vers; +@@ -466,7 +466,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, + serv->sv_name = prog->pg_name; + serv->sv_program = prog; + kref_init(&serv->sv_refcnt); +- serv->sv_stats = prog->pg_stats; ++ serv->sv_stats = stats; + if (bufsize > RPCSVC_MAXPAYLOAD) + bufsize = RPCSVC_MAXPAYLOAD; + serv->sv_max_payload = bufsize? bufsize : 4096; +@@ -532,26 +532,28 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, + struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize, + int (*threadfn)(void *data)) + { +- return __svc_create(prog, bufsize, 1, threadfn); ++ return __svc_create(prog, NULL, bufsize, 1, threadfn); + } + EXPORT_SYMBOL_GPL(svc_create); + + /** + * svc_create_pooled - Create an RPC service with pooled threads + * @prog: the RPC program the new service will handle ++ * @stats: the stats struct if desired + * @bufsize: maximum message size for @prog + * @threadfn: a function to service RPC requests for @prog + * + * Returns an instantiated struct svc_serv object or NULL. + */ + struct svc_serv *svc_create_pooled(struct svc_program *prog, ++ struct svc_stat *stats, + unsigned int bufsize, + int (*threadfn)(void *data)) + { + struct svc_serv *serv; + unsigned int npools = svc_pool_map_get(); + +- serv = __svc_create(prog, bufsize, npools, threadfn); ++ serv = __svc_create(prog, stats, bufsize, npools, threadfn); + if (!serv) + goto out_err; + return serv; +@@ -1377,7 +1379,8 @@ svc_process_common(struct svc_rqst *rqstp) + goto err_bad_proc; + + /* Syntactic check complete */ +- serv->sv_stats->rpccnt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpccnt++; + trace_svc_process(rqstp, progp->pg_name); + + aoffset = xdr_stream_pos(xdr); +@@ -1429,7 +1432,8 @@ svc_process_common(struct svc_rqst *rqstp) + goto close_xprt; + + err_bad_rpc: +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + xdr_stream_encode_u32(xdr, RPC_MSG_DENIED); + xdr_stream_encode_u32(xdr, RPC_MISMATCH); + /* Only RPCv2 supported */ +@@ -1440,7 +1444,8 @@ svc_process_common(struct svc_rqst *rqstp) + err_bad_auth: + dprintk("svc: authentication failed (%d)\n", + be32_to_cpu(rqstp->rq_auth_stat)); +- serv->sv_stats->rpcbadauth++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadauth++; + /* Restore write pointer to location of reply status: */ + xdr_truncate_encode(xdr, XDR_UNIT * 2); + xdr_stream_encode_u32(xdr, RPC_MSG_DENIED); +@@ -1450,7 +1455,8 @@ svc_process_common(struct svc_rqst *rqstp) + + err_bad_prog: + dprintk("svc: unknown program %d\n", rqstp->rq_prog); +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_prog_unavail; + goto sendit; + +@@ -1458,7 +1464,8 @@ svc_process_common(struct svc_rqst *rqstp) + svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n", + rqstp->rq_vers, rqstp->rq_prog, progp->pg_name); + +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_prog_mismatch; + + /* +@@ -1472,19 +1479,22 @@ svc_process_common(struct svc_rqst *rqstp) + err_bad_proc: + svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc); + +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_proc_unavail; + goto sendit; + + err_garbage_args: + svc_printk(rqstp, "failed to decode RPC header\n"); + +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_garbage_args; + goto sendit; + + err_system_err: +- serv->sv_stats->rpcbadfmt++; ++ if (serv->sv_stats) ++ serv->sv_stats->rpcbadfmt++; + *rqstp->rq_accept_statp = rpc_system_err; + goto sendit; + } +@@ -1536,7 +1546,8 @@ void svc_process(struct svc_rqst *rqstp) + out_baddir: + svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", + be32_to_cpu(*p)); +- rqstp->rq_server->sv_stats->rpcbadfmt++; ++ if (rqstp->rq_server->sv_stats) ++ rqstp->rq_server->sv_stats->rpcbadfmt++; + out_drop: + svc_drop(rqstp); + } +diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c +index be5c42d6ffbeab..2b2dc46dc701f9 100644 +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -468,6 +468,10 @@ static struct netlink_range_validation nl80211_punct_bitmap_range = { + .max = 0xffff, + }; + ++static struct netlink_range_validation q_range = { ++ .max = INT_MAX, ++}; ++ + static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { + [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, + [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, +@@ -750,7 +754,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { + + [NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 }, + [NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 }, +- [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 }, ++ [NL80211_ATTR_TXQ_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &q_range), + [NL80211_ATTR_HE_CAPABILITY] = + NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa, + NL80211_HE_MAX_CAPABILITY_LEN), +diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c +index d2fbcf963cdf6d..07ff471ed6aee0 100644 +--- a/samples/bpf/map_perf_test_user.c ++++ b/samples/bpf/map_perf_test_user.c +@@ -370,7 +370,7 @@ static void run_perf_test(int tasks) + + static void fill_lpm_trie(void) + { +- struct bpf_lpm_trie_key *key; ++ struct bpf_lpm_trie_key_u8 *key; + unsigned long value = 0; + unsigned int i; + int r; +diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c +index 9d41db09c4800f..266fdd0b025dc6 100644 +--- a/samples/bpf/xdp_router_ipv4_user.c ++++ b/samples/bpf/xdp_router_ipv4_user.c +@@ -91,7 +91,7 @@ static int recv_msg(struct sockaddr_nl sock_addr, int sock) + static void read_route(struct nlmsghdr *nh, int nll) + { + char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24]; +- struct bpf_lpm_trie_key *prefix_key; ++ struct bpf_lpm_trie_key_u8 *prefix_key; + struct rtattr *rt_attr; + struct rtmsg *rt_msg; + int rtm_family; +diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c +index 8b58a7864703ee..7e8fca0b066280 100644 +--- a/sound/soc/soc-topology.c ++++ b/sound/soc/soc-topology.c +@@ -1021,6 +1021,7 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg, + struct snd_soc_tplg_hdr *hdr) + { + struct snd_soc_dapm_context *dapm = &tplg->comp->dapm; ++ const size_t maxlen = SNDRV_CTL_ELEM_ID_NAME_MAXLEN; + struct snd_soc_tplg_dapm_graph_elem *elem; + struct snd_soc_dapm_route *route; + int count, i; +@@ -1044,39 +1045,22 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg, + tplg->pos += sizeof(struct snd_soc_tplg_dapm_graph_elem); + + /* validate routes */ +- if (strnlen(elem->source, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == +- SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { +- ret = -EINVAL; +- break; +- } +- if (strnlen(elem->sink, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == +- SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { +- ret = -EINVAL; +- break; +- } +- if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == +- SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { ++ if ((strnlen(elem->source, maxlen) == maxlen) || ++ (strnlen(elem->sink, maxlen) == maxlen) || ++ (strnlen(elem->control, maxlen) == maxlen)) { + ret = -EINVAL; + break; + } + +- route->source = devm_kmemdup(tplg->dev, elem->source, +- min(strlen(elem->source), +- SNDRV_CTL_ELEM_ID_NAME_MAXLEN), +- GFP_KERNEL); +- route->sink = devm_kmemdup(tplg->dev, elem->sink, +- min(strlen(elem->sink), SNDRV_CTL_ELEM_ID_NAME_MAXLEN), +- GFP_KERNEL); ++ route->source = devm_kstrdup(tplg->dev, elem->source, GFP_KERNEL); ++ route->sink = devm_kstrdup(tplg->dev, elem->sink, GFP_KERNEL); + if (!route->source || !route->sink) { + ret = -ENOMEM; + break; + } + +- if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) != 0) { +- route->control = devm_kmemdup(tplg->dev, elem->control, +- min(strlen(elem->control), +- SNDRV_CTL_ELEM_ID_NAME_MAXLEN), +- GFP_KERNEL); ++ if (strnlen(elem->control, maxlen) != 0) { ++ route->control = devm_kstrdup(tplg->dev, elem->control, GFP_KERNEL); + if (!route->control) { + ret = -ENOMEM; + break; +diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c +index d1bdb0b93bda0c..8cc2d4937f3403 100644 +--- a/sound/usb/mixer.c ++++ b/sound/usb/mixer.c +@@ -2021,6 +2021,13 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, + bmaControls = ftr->bmaControls; + } + ++ if (channels > 32) { ++ usb_audio_info(state->chip, ++ "usbmixer: too many channels (%d) in unit %d\n", ++ channels, unitid); ++ return -EINVAL; ++ } ++ + /* parse the source unit */ + err = parse_audio_unit(state, hdr->bSourceID); + if (err < 0) +diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h +index fb09fd1767f289..ba6e346c8d669a 100644 +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -77,12 +77,29 @@ struct bpf_insn { + __s32 imm; /* signed immediate constant */ + }; + +-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ ++/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for ++ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for ++ * the trailing flexible array member) instead. ++ */ + struct bpf_lpm_trie_key { + __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ + __u8 data[0]; /* Arbitrary size */ + }; + ++/* Header for bpf_lpm_trie_key structs */ ++struct bpf_lpm_trie_key_hdr { ++ __u32 prefixlen; ++}; ++ ++/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */ ++struct bpf_lpm_trie_key_u8 { ++ union { ++ struct bpf_lpm_trie_key_hdr hdr; ++ __u32 prefixlen; ++ }; ++ __u8 data[]; /* Arbitrary size */ ++}; ++ + struct bpf_cgroup_storage_key { + __u64 cgroup_inode_id; /* cgroup inode id */ + __u32 attach_type; /* program attach type (enum bpf_attach_type) */ +diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c +index 3325da17ec81af..efaf622c28ddec 100644 +--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c ++++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c +@@ -316,7 +316,7 @@ struct lpm_trie { + } __attribute__((preserve_access_index)); + + struct lpm_key { +- struct bpf_lpm_trie_key trie_key; ++ struct bpf_lpm_trie_key_hdr trie_key; + __u32 data; + }; + +diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c +index c028d621c744da..d98c72dc563eaf 100644 +--- a/tools/testing/selftests/bpf/test_lpm_map.c ++++ b/tools/testing/selftests/bpf/test_lpm_map.c +@@ -211,7 +211,7 @@ static void test_lpm_map(int keysize) + volatile size_t n_matches, n_matches_after_delete; + size_t i, j, n_nodes, n_lookups; + struct tlpm_node *t, *list = NULL; +- struct bpf_lpm_trie_key *key; ++ struct bpf_lpm_trie_key_u8 *key; + uint8_t *data, *value; + int r, map; + +@@ -331,8 +331,8 @@ static void test_lpm_map(int keysize) + static void test_lpm_ipaddr(void) + { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); +- struct bpf_lpm_trie_key *key_ipv4; +- struct bpf_lpm_trie_key *key_ipv6; ++ struct bpf_lpm_trie_key_u8 *key_ipv4; ++ struct bpf_lpm_trie_key_u8 *key_ipv6; + size_t key_size_ipv4; + size_t key_size_ipv6; + int map_fd_ipv4; +@@ -423,7 +423,7 @@ static void test_lpm_ipaddr(void) + static void test_lpm_delete(void) + { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); +- struct bpf_lpm_trie_key *key; ++ struct bpf_lpm_trie_key_u8 *key; + size_t key_size; + int map_fd; + __u64 value; +@@ -532,7 +532,7 @@ static void test_lpm_delete(void) + static void test_lpm_get_next_key(void) + { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); +- struct bpf_lpm_trie_key *key_p, *next_key_p; ++ struct bpf_lpm_trie_key_u8 *key_p, *next_key_p; + size_t key_size; + __u32 value = 0; + int map_fd; +@@ -693,9 +693,9 @@ static void *lpm_test_command(void *arg) + { + int i, j, ret, iter, key_size; + struct lpm_mt_test_info *info = arg; +- struct bpf_lpm_trie_key *key_p; ++ struct bpf_lpm_trie_key_u8 *key_p; + +- key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32); ++ key_size = sizeof(*key_p) + sizeof(__u32); + key_p = alloca(key_size); + for (iter = 0; iter < info->iter; iter++) + for (i = 0; i < MAX_TEST_KEYS; i++) { +@@ -717,7 +717,7 @@ static void *lpm_test_command(void *arg) + ret = bpf_map_lookup_elem(info->map_fd, key_p, &value); + assert(ret == 0 || errno == ENOENT); + } else { +- struct bpf_lpm_trie_key *next_key_p = alloca(key_size); ++ struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size); + ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p); + assert(ret == 0 || errno == ENOENT || errno == ENOMEM); + } +@@ -752,7 +752,7 @@ static void test_lpm_multi_thread(void) + + /* create a trie */ + value_size = sizeof(__u32); +- key_size = sizeof(struct bpf_lpm_trie_key) + value_size; ++ key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size; + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts); + + /* create 4 threads to test update, delete, lookup and get_next_key */ +diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c +index ad993ab3ac1819..bc36c91c4480f5 100644 +--- a/tools/testing/selftests/net/tls.c ++++ b/tools/testing/selftests/net/tls.c +@@ -707,6 +707,20 @@ TEST_F(tls, splice_from_pipe) + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + } + ++TEST_F(tls, splice_more) ++{ ++ unsigned int f = SPLICE_F_NONBLOCK | SPLICE_F_MORE | SPLICE_F_GIFT; ++ int send_len = TLS_PAYLOAD_MAX_LEN; ++ char mem_send[TLS_PAYLOAD_MAX_LEN]; ++ int i, send_pipe = 1; ++ int p[2]; ++ ++ ASSERT_GE(pipe(p), 0); ++ EXPECT_GE(write(p[1], mem_send, send_len), 0); ++ for (i = 0; i < 32; i++) ++ EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, send_pipe, f), 1); ++} ++ + TEST_F(tls, splice_from_pipe2) + { + int send_len = 16000; |