summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pagano <mpagano@gentoo.org>2024-08-19 06:23:57 -0400
committerMike Pagano <mpagano@gentoo.org>2024-08-19 06:23:57 -0400
commit93a63db9b68552114ee10dfe2295847e72609574 (patch)
treea6f3cbbefa3d588926ee33ebe68e5d3b2a3aa261
parentRemove redundant patch (diff)
downloadlinux-patches-6.6.tar.gz
linux-patches-6.6.tar.bz2
linux-patches-6.6.zip
Linux patch 6.6.476.6-546.6
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r--0000_README4
-rw-r--r--1046_linux-6.6.47.patch4211
2 files changed, 4215 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index 5e05e9e5..ca76ad48 100644
--- a/0000_README
+++ b/0000_README
@@ -227,6 +227,10 @@ Patch: 1045_linux-6.6.46.patch
From: https://www.kernel.org
Desc: Linux 6.6.46
+Patch: 1046_linux-6.6.47.patch
+From: https://www.kernel.org
+Desc: Linux 6.6.47
+
Patch: 1510_fs-enable-link-security-restrictions-by-default.patch
From: http://sources.debian.net/src/linux/3.16.7-ckt4-3/debian/patches/debian/fs-enable-link-security-restrictions-by-default.patch
Desc: Enable link security restrictions by default.
diff --git a/1046_linux-6.6.47.patch b/1046_linux-6.6.47.patch
new file mode 100644
index 00000000..830bd4b9
--- /dev/null
+++ b/1046_linux-6.6.47.patch
@@ -0,0 +1,4211 @@
+diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst
+index 74d64a30f50073..f9cd579496c9ce 100644
+--- a/Documentation/bpf/map_lpm_trie.rst
++++ b/Documentation/bpf/map_lpm_trie.rst
+@@ -17,7 +17,7 @@ significant byte.
+
+ LPM tries may be created with a maximum prefix length that is a multiple
+ of 8, in the range from 8 to 2048. The key used for lookup and update
+-operations is a ``struct bpf_lpm_trie_key``, extended by
++operations is a ``struct bpf_lpm_trie_key_u8``, extended by
+ ``max_prefixlen/8`` bytes.
+
+ - For IPv4 addresses the data length is 4 bytes
+diff --git a/Documentation/mm/page_table_check.rst b/Documentation/mm/page_table_check.rst
+index c12838ce6b8de2..c59f22eb6a0f9a 100644
+--- a/Documentation/mm/page_table_check.rst
++++ b/Documentation/mm/page_table_check.rst
+@@ -14,7 +14,7 @@ Page table check performs extra verifications at the time when new pages become
+ accessible from the userspace by getting their page table entries (PTEs PMDs
+ etc.) added into the table.
+
+-In case of detected corruption, the kernel is crashed. There is a small
++In case of most detected corruption, the kernel is crashed. There is a small
+ performance and memory overhead associated with the page table check. Therefore,
+ it is disabled by default, but can be optionally enabled on systems where the
+ extra hardening outweighs the performance costs. Also, because page table check
+@@ -22,6 +22,13 @@ is synchronous, it can help with debugging double map memory corruption issues,
+ by crashing kernel at the time wrong mapping occurs instead of later which is
+ often the case with memory corruptions bugs.
+
++It can also be used to do page table entry checks over various flags, dump
++warnings when illegal combinations of entry flags are detected. Currently,
++userfaultfd is the only user of such to sanity check wr-protect bit against
++any writable flags. Illegal flag combinations will not directly cause data
++corruption in this case immediately, but that will cause read-only data to
++be writable, leading to corrupt when the page content is later modified.
++
+ Double mapping detection logic
+ ==============================
+
+diff --git a/Makefile b/Makefile
+index 77de99984c2f18..6b967e135c80f0 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 6
+ PATCHLEVEL = 6
+-SUBLEVEL = 46
++SUBLEVEL = 47
+ EXTRAVERSION =
+ NAME = Hurr durr I'ma ninja sloth
+
+diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
+index 15aa9bad1c280b..ca0bf0b92ca09e 100644
+--- a/arch/arm64/kvm/hyp/pgtable.c
++++ b/arch/arm64/kvm/hyp/pgtable.c
+@@ -523,7 +523,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
+
+ kvm_clear_pte(ctx->ptep);
+ dsb(ishst);
+- __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
++ __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), 0);
+ } else {
+ if (ctx->end - ctx->addr < granule)
+ return -EINVAL;
+@@ -861,9 +861,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+ if (kvm_pte_valid(ctx->old)) {
+ kvm_clear_pte(ctx->ptep);
+
+- if (!stage2_unmap_defer_tlb_flush(pgt))
+- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+- ctx->addr, ctx->level);
++ if (kvm_pte_table(ctx->old, ctx->level)) {
++ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
++ 0);
++ } else if (!stage2_unmap_defer_tlb_flush(pgt)) {
++ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
++ ctx->level);
++ }
+ }
+
+ mm_ops->put_page(ctx->ptep);
+diff --git a/arch/loongarch/include/uapi/asm/unistd.h b/arch/loongarch/include/uapi/asm/unistd.h
+index fcb668984f0336..b344b1f917153b 100644
+--- a/arch/loongarch/include/uapi/asm/unistd.h
++++ b/arch/loongarch/include/uapi/asm/unistd.h
+@@ -1,4 +1,5 @@
+ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++#define __ARCH_WANT_NEW_STAT
+ #define __ARCH_WANT_SYS_CLONE
+ #define __ARCH_WANT_SYS_CLONE3
+
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index e02b179ec65989..d03fe4fb41f43c 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -387,23 +387,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
+ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+ static inline int pte_uffd_wp(pte_t pte)
+ {
+- bool wp = pte_flags(pte) & _PAGE_UFFD_WP;
+-
+-#ifdef CONFIG_DEBUG_VM
+- /*
+- * Having write bit for wr-protect-marked present ptes is fatal,
+- * because it means the uffd-wp bit will be ignored and write will
+- * just go through.
+- *
+- * Use any chance of pgtable walking to verify this (e.g., when
+- * page swapped out or being migrated for all purposes). It means
+- * something is already wrong. Tell the admin even before the
+- * process crashes. We also nail it with wrong pgtable setup.
+- */
+- WARN_ON_ONCE(wp && pte_write(pte));
+-#endif
+-
+- return wp;
++ return pte_flags(pte) & _PAGE_UFFD_WP;
+ }
+
+ static inline pte_t pte_mkuffd_wp(pte_t pte)
+diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
+index 77dbd516a05463..277bf0e8ed0918 100644
+--- a/drivers/ata/libata-scsi.c
++++ b/drivers/ata/libata-scsi.c
+@@ -941,8 +941,19 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
+ &sense_key, &asc, &ascq);
+ ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq);
+ } else {
+- /* ATA PASS-THROUGH INFORMATION AVAILABLE */
+- ata_scsi_set_sense(qc->dev, cmd, RECOVERED_ERROR, 0, 0x1D);
++ /*
++ * ATA PASS-THROUGH INFORMATION AVAILABLE
++ *
++ * Note: we are supposed to call ata_scsi_set_sense(), which
++ * respects the D_SENSE bit, instead of unconditionally
++ * generating the sense data in descriptor format. However,
++ * because hdparm, hddtemp, and udisks incorrectly assume sense
++ * data in descriptor format, without even looking at the
++ * RESPONSE CODE field in the returned sense data (to see which
++ * format the returned sense data is in), we are stuck with
++ * being bug compatible with older kernels.
++ */
++ scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D);
+ }
+ }
+
+diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
+index 2776ca5fc33f39..b215b28cad7b76 100644
+--- a/drivers/isdn/mISDN/socket.c
++++ b/drivers/isdn/mISDN/socket.c
+@@ -401,23 +401,23 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+ }
+
+ static int data_sock_setsockopt(struct socket *sock, int level, int optname,
+- sockptr_t optval, unsigned int len)
++ sockptr_t optval, unsigned int optlen)
+ {
+ struct sock *sk = sock->sk;
+ int err = 0, opt = 0;
+
+ if (*debug & DEBUG_SOCKET)
+ printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock,
+- level, optname, len);
++ level, optname, optlen);
+
+ lock_sock(sk);
+
+ switch (optname) {
+ case MISDN_TIME_STAMP:
+- if (copy_from_sockptr(&opt, optval, sizeof(int))) {
+- err = -EFAULT;
++ err = copy_safe_from_sockptr(&opt, sizeof(opt),
++ optval, optlen);
++ if (err)
+ break;
+- }
+
+ if (opt)
+ _pms(sk)->cmask |= MISDN_TIME_STAMP;
+diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c
+index 22d83ac18eb735..fbf58012becdf2 100644
+--- a/drivers/media/usb/dvb-usb/dvb-usb-init.c
++++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c
+@@ -23,40 +23,11 @@ static int dvb_usb_force_pid_filter_usage;
+ module_param_named(force_pid_filter_usage, dvb_usb_force_pid_filter_usage, int, 0444);
+ MODULE_PARM_DESC(force_pid_filter_usage, "force all dvb-usb-devices to use a PID filter, if any (default: 0).");
+
+-static int dvb_usb_check_bulk_endpoint(struct dvb_usb_device *d, u8 endpoint)
+-{
+- if (endpoint) {
+- int ret;
+-
+- ret = usb_pipe_type_check(d->udev, usb_sndbulkpipe(d->udev, endpoint));
+- if (ret)
+- return ret;
+- ret = usb_pipe_type_check(d->udev, usb_rcvbulkpipe(d->udev, endpoint));
+- if (ret)
+- return ret;
+- }
+- return 0;
+-}
+-
+-static void dvb_usb_clear_halt(struct dvb_usb_device *d, u8 endpoint)
+-{
+- if (endpoint) {
+- usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, endpoint));
+- usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, endpoint));
+- }
+-}
+-
+ static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs)
+ {
+ struct dvb_usb_adapter *adap;
+ int ret, n, o;
+
+- ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint);
+- if (ret)
+- return ret;
+- ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint_response);
+- if (ret)
+- return ret;
+ for (n = 0; n < d->props.num_adapters; n++) {
+ adap = &d->adapter[n];
+ adap->dev = d;
+@@ -132,8 +103,10 @@ static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs)
+ * when reloading the driver w/o replugging the device
+ * sometimes a timeout occurs, this helps
+ */
+- dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint);
+- dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint_response);
++ if (d->props.generic_bulk_ctrl_endpoint != 0) {
++ usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint));
++ usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint));
++ }
+
+ return 0;
+
+diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
+index ba8b6bd8233cad..96cca4ee470a4b 100644
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -1007,26 +1007,21 @@ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
+ struct sk_buff *skb;
+ int error = 0;
+
+- if (sk->sk_state & PPPOX_BOUND) {
+- error = -EIO;
+- goto end;
+- }
++ if (sk->sk_state & PPPOX_BOUND)
++ return -EIO;
+
+ skb = skb_recv_datagram(sk, flags, &error);
+- if (error < 0)
+- goto end;
++ if (!skb)
++ return error;
+
+- if (skb) {
+- total_len = min_t(size_t, total_len, skb->len);
+- error = skb_copy_datagram_msg(skb, 0, m, total_len);
+- if (error == 0) {
+- consume_skb(skb);
+- return total_len;
+- }
++ total_len = min_t(size_t, total_len, skb->len);
++ error = skb_copy_datagram_msg(skb, 0, m, total_len);
++ if (error == 0) {
++ consume_skb(skb);
++ return total_len;
+ }
+
+ kfree_skb(skb);
+-end:
+ return error;
+ }
+
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 0fc7aa78b2e5b9..2c3f55877a1134 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -2931,6 +2931,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
+ return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND;
+ }
+
++ /*
++ * NVMe SSD drops off the PCIe bus after system idle
++ * for 10 hours on a Lenovo N60z board.
++ */
++ if (dmi_match(DMI_BOARD_NAME, "LXKT-ZXEG-N6"))
++ return NVME_QUIRK_NO_APST;
++
+ return 0;
+ }
+
+diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
+index c26545d71d39a3..cd6d5bbb4b9df5 100644
+--- a/fs/binfmt_flat.c
++++ b/fs/binfmt_flat.c
+@@ -72,8 +72,10 @@
+
+ #ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET
+ #define DATA_START_OFFSET_WORDS (0)
++#define MAX_SHARED_LIBS_UPDATE (0)
+ #else
+ #define DATA_START_OFFSET_WORDS (MAX_SHARED_LIBS)
++#define MAX_SHARED_LIBS_UPDATE (MAX_SHARED_LIBS)
+ #endif
+
+ struct lib_info {
+@@ -880,7 +882,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
+ return res;
+
+ /* Update data segment pointers for all libraries */
+- for (i = 0; i < MAX_SHARED_LIBS; i++) {
++ for (i = 0; i < MAX_SHARED_LIBS_UPDATE; i++) {
+ if (!libinfo.lib_list[i].loaded)
+ continue;
+ for (j = 0; j < MAX_SHARED_LIBS; j++) {
+diff --git a/fs/buffer.c b/fs/buffer.c
+index 12e9a71c693d74..ecd8b47507ff80 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -2179,6 +2179,8 @@ static void __block_commit_write(struct folio *folio, size_t from, size_t to)
+ struct buffer_head *bh, *head;
+
+ bh = head = folio_buffers(folio);
++ if (!bh)
++ return;
+ blocksize = bh->b_size;
+
+ block_start = 0;
+diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
+index 5ee7d7bbb361ce..2fbf97077ce910 100644
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
+ sb->s_mtd = NULL;
+ } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
+ sync_blockdev(sb->s_bdev);
+- blkdev_put(sb->s_bdev, sb);
++ bdev_release(sb->s_bdev_handle);
+ }
+ kfree(sbi);
+ }
+diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
+index d36b3963c0bf3c..aa59788a61e6e4 100644
+--- a/fs/erofs/decompressor.c
++++ b/fs/erofs/decompressor.c
+@@ -248,15 +248,9 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
+ if (ret != rq->outputsize) {
+ erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
+ ret, rq->inputsize, inputmargin, rq->outputsize);
+-
+- print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
+- 16, 1, src + inputmargin, rq->inputsize, true);
+- print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
+- 16, 1, out, rq->outputsize, true);
+-
+ if (ret >= 0)
+ memset(out + ret, 0, rq->outputsize - ret);
+- ret = -EIO;
++ ret = -EFSCORRUPTED;
+ } else {
+ ret = 0;
+ }
+diff --git a/fs/exec.c b/fs/exec.c
+index 89a9017af7e86f..1cbbef281f8cfe 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1609,6 +1609,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
+ unsigned int mode;
+ vfsuid_t vfsuid;
+ vfsgid_t vfsgid;
++ int err;
+
+ if (!mnt_may_suid(file->f_path.mnt))
+ return;
+@@ -1625,12 +1626,17 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
+ /* Be careful if suid/sgid is set */
+ inode_lock(inode);
+
+- /* reload atomically mode/uid/gid now that lock held */
++ /* Atomically reload and check mode/uid/gid now that lock held. */
+ mode = inode->i_mode;
+ vfsuid = i_uid_into_vfsuid(idmap, inode);
+ vfsgid = i_gid_into_vfsgid(idmap, inode);
++ err = inode_permission(idmap, inode, MAY_EXEC);
+ inode_unlock(inode);
+
++ /* Did the exec bit vanish out from under us? Give up. */
++ if (err)
++ return;
++
+ /* We ignore suid/sgid if there are no mappings for them in the ns */
+ if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) ||
+ !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid))
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index cef119a2476bb4..a4ffd1acac6514 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2966,23 +2966,29 @@ static int ext4_da_should_update_i_disksize(struct folio *folio,
+
+ static int ext4_da_do_write_end(struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+- struct page *page)
++ struct folio *folio)
+ {
+ struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
+ bool disksize_changed = false;
+ loff_t new_i_size;
+
++ if (unlikely(!folio_buffers(folio))) {
++ folio_unlock(folio);
++ folio_put(folio);
++ return -EIO;
++ }
+ /*
+ * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
+ * flag, which all that's needed to trigger page writeback.
+ */
+- copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
++ copied = block_write_end(NULL, mapping, pos, len, copied,
++ &folio->page, NULL);
+ new_i_size = pos + copied;
+
+ /*
+- * It's important to update i_size while still holding page lock,
+- * because page writeout could otherwise come in and zero beyond
++ * It's important to update i_size while still holding folio lock,
++ * because folio writeout could otherwise come in and zero beyond
+ * i_size.
+ *
+ * Since we are holding inode lock, we are sure i_disksize <=
+@@ -3000,14 +3006,14 @@ static int ext4_da_do_write_end(struct address_space *mapping,
+
+ i_size_write(inode, new_i_size);
+ end = (new_i_size - 1) & (PAGE_SIZE - 1);
+- if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
++ if (copied && ext4_da_should_update_i_disksize(folio, end)) {
+ ext4_update_i_disksize(inode, new_i_size);
+ disksize_changed = true;
+ }
+ }
+
+- unlock_page(page);
+- put_page(page);
++ folio_unlock(folio);
++ folio_put(folio);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
+@@ -3046,10 +3052,10 @@ static int ext4_da_write_end(struct file *file,
+ return ext4_write_inline_data_end(inode, pos, len, copied,
+ folio);
+
+- if (unlikely(copied < len) && !PageUptodate(page))
++ if (unlikely(copied < len) && !folio_test_uptodate(folio))
+ copied = 0;
+
+- return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
++ return ext4_da_do_write_end(mapping, pos, len, copied, folio);
+ }
+
+ /*
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index c58cbe9f7809c1..c368ff671d7739 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1571,46 +1571,49 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
+ /*
+ * Add value of the EA in an inode.
+ */
+-static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
+- const void *value, size_t value_len,
+- struct inode **ret_inode)
++static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
++ struct inode *inode, const void *value, size_t value_len)
+ {
+ struct inode *ea_inode;
+ u32 hash;
+ int err;
+
++ /* Account inode & space to quota even if sharing... */
++ err = ext4_xattr_inode_alloc_quota(inode, value_len);
++ if (err)
++ return ERR_PTR(err);
++
+ hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
+ ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash);
+ if (ea_inode) {
+ err = ext4_xattr_inode_inc_ref(handle, ea_inode);
+- if (err) {
+- iput(ea_inode);
+- return err;
+- }
+-
+- *ret_inode = ea_inode;
+- return 0;
++ if (err)
++ goto out_err;
++ return ea_inode;
+ }
+
+ /* Create an inode for the EA value */
+ ea_inode = ext4_xattr_inode_create(handle, inode, hash);
+- if (IS_ERR(ea_inode))
+- return PTR_ERR(ea_inode);
++ if (IS_ERR(ea_inode)) {
++ ext4_xattr_inode_free_quota(inode, NULL, value_len);
++ return ea_inode;
++ }
+
+ err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
+ if (err) {
+ if (ext4_xattr_inode_dec_ref(handle, ea_inode))
+ ext4_warning_inode(ea_inode, "cleanup dec ref error %d", err);
+- iput(ea_inode);
+- return err;
++ goto out_err;
+ }
+
+ if (EA_INODE_CACHE(inode))
+ mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
+ ea_inode->i_ino, true /* reusable */);
+-
+- *ret_inode = ea_inode;
+- return 0;
++ return ea_inode;
++out_err:
++ iput(ea_inode);
++ ext4_xattr_inode_free_quota(inode, NULL, value_len);
++ return ERR_PTR(err);
+ }
+
+ /*
+@@ -1622,6 +1625,7 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
+ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ struct ext4_xattr_search *s,
+ handle_t *handle, struct inode *inode,
++ struct inode *new_ea_inode,
+ bool is_block)
+ {
+ struct ext4_xattr_entry *last, *next;
+@@ -1629,7 +1633,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ size_t min_offs = s->end - s->base, name_len = strlen(i->name);
+ int in_inode = i->in_inode;
+ struct inode *old_ea_inode = NULL;
+- struct inode *new_ea_inode = NULL;
+ size_t old_size, new_size;
+ int ret;
+
+@@ -1714,43 +1717,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ old_ea_inode = NULL;
+ goto out;
+ }
+- }
+- if (i->value && in_inode) {
+- WARN_ON_ONCE(!i->value_len);
+-
+- ret = ext4_xattr_inode_alloc_quota(inode, i->value_len);
+- if (ret)
+- goto out;
+-
+- ret = ext4_xattr_inode_lookup_create(handle, inode, i->value,
+- i->value_len,
+- &new_ea_inode);
+- if (ret) {
+- new_ea_inode = NULL;
+- ext4_xattr_inode_free_quota(inode, NULL, i->value_len);
+- goto out;
+- }
+- }
+
+- if (old_ea_inode) {
+ /* We are ready to release ref count on the old_ea_inode. */
+ ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
+- if (ret) {
+- /* Release newly required ref count on new_ea_inode. */
+- if (new_ea_inode) {
+- int err;
+-
+- err = ext4_xattr_inode_dec_ref(handle,
+- new_ea_inode);
+- if (err)
+- ext4_warning_inode(new_ea_inode,
+- "dec ref new_ea_inode err=%d",
+- err);
+- ext4_xattr_inode_free_quota(inode, new_ea_inode,
+- i->value_len);
+- }
++ if (ret)
+ goto out;
+- }
+
+ ext4_xattr_inode_free_quota(inode, old_ea_inode,
+ le32_to_cpu(here->e_value_size));
+@@ -1874,7 +1845,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ ret = 0;
+ out:
+ iput(old_ea_inode);
+- iput(new_ea_inode);
+ return ret;
+ }
+
+@@ -1937,9 +1907,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ size_t old_ea_inode_quota = 0;
+ unsigned int ea_ino;
+
+-
+ #define header(x) ((struct ext4_xattr_header *)(x))
+
++ /* If we need EA inode, prepare it before locking the buffer */
++ if (i->value && i->in_inode) {
++ WARN_ON_ONCE(!i->value_len);
++
++ ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
++ i->value, i->value_len);
++ if (IS_ERR(ea_inode)) {
++ error = PTR_ERR(ea_inode);
++ ea_inode = NULL;
++ goto cleanup;
++ }
++ }
++
+ if (s->base) {
+ int offset = (char *)s->here - bs->bh->b_data;
+
+@@ -1948,6 +1930,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ EXT4_JTR_NONE);
+ if (error)
+ goto cleanup;
++
+ lock_buffer(bs->bh);
+
+ if (header(s->base)->h_refcount == cpu_to_le32(1)) {
+@@ -1974,7 +1957,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ }
+ ea_bdebug(bs->bh, "modifying in-place");
+ error = ext4_xattr_set_entry(i, s, handle, inode,
+- true /* is_block */);
++ ea_inode, true /* is_block */);
+ ext4_xattr_block_csum_set(inode, bs->bh);
+ unlock_buffer(bs->bh);
+ if (error == -EFSCORRUPTED)
+@@ -2042,29 +2025,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ s->end = s->base + sb->s_blocksize;
+ }
+
+- error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
++ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
++ true /* is_block */);
+ if (error == -EFSCORRUPTED)
+ goto bad_block;
+ if (error)
+ goto cleanup;
+
+- if (i->value && s->here->e_value_inum) {
+- /*
+- * A ref count on ea_inode has been taken as part of the call to
+- * ext4_xattr_set_entry() above. We would like to drop this
+- * extra ref but we have to wait until the xattr block is
+- * initialized and has its own ref count on the ea_inode.
+- */
+- ea_ino = le32_to_cpu(s->here->e_value_inum);
+- error = ext4_xattr_inode_iget(inode, ea_ino,
+- le32_to_cpu(s->here->e_hash),
+- &ea_inode);
+- if (error) {
+- ea_inode = NULL;
+- goto cleanup;
+- }
+- }
+-
+ inserted:
+ if (!IS_LAST_ENTRY(s->first)) {
+ new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
+@@ -2217,17 +2184,16 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+
+ cleanup:
+ if (ea_inode) {
+- int error2;
+-
+- error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
+- if (error2)
+- ext4_warning_inode(ea_inode, "dec ref error=%d",
+- error2);
++ if (error) {
++ int error2;
+
+- /* If there was an error, revert the quota charge. */
+- if (error)
++ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
++ if (error2)
++ ext4_warning_inode(ea_inode, "dec ref error=%d",
++ error2);
+ ext4_xattr_inode_free_quota(inode, ea_inode,
+ i_size_read(ea_inode));
++ }
+ iput(ea_inode);
+ }
+ if (ce)
+@@ -2285,14 +2251,38 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+ {
+ struct ext4_xattr_ibody_header *header;
+ struct ext4_xattr_search *s = &is->s;
++ struct inode *ea_inode = NULL;
+ int error;
+
+ if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+ return -ENOSPC;
+
+- error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
+- if (error)
++ /* If we need EA inode, prepare it before locking the buffer */
++ if (i->value && i->in_inode) {
++ WARN_ON_ONCE(!i->value_len);
++
++ ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
++ i->value, i->value_len);
++ if (IS_ERR(ea_inode))
++ return PTR_ERR(ea_inode);
++ }
++ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
++ false /* is_block */);
++ if (error) {
++ if (ea_inode) {
++ int error2;
++
++ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
++ if (error2)
++ ext4_warning_inode(ea_inode, "dec ref error=%d",
++ error2);
++
++ ext4_xattr_inode_free_quota(inode, ea_inode,
++ i_size_read(ea_inode));
++ iput(ea_inode);
++ }
+ return error;
++ }
+ header = IHDR(inode, ext4_raw_inode(&is->iloc));
+ if (!IS_LAST_ENTRY(s->first)) {
+ header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
+@@ -2301,6 +2291,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+ header->h_magic = cpu_to_le32(0);
+ ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
+ }
++ iput(ea_inode);
+ return 0;
+ }
+
+diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
+index ad8dfac73bd446..6a9a470345bfc7 100644
+--- a/fs/f2fs/extent_cache.c
++++ b/fs/f2fs/extent_cache.c
+@@ -19,34 +19,24 @@
+ #include "node.h"
+ #include <trace/events/f2fs.h>
+
+-bool sanity_check_extent_cache(struct inode *inode)
++bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
+ {
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+- struct f2fs_inode_info *fi = F2FS_I(inode);
+- struct extent_tree *et = fi->extent_tree[EX_READ];
+- struct extent_info *ei;
+-
+- if (!et)
+- return true;
++ struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
++ struct extent_info ei;
+
+- ei = &et->largest;
+- if (!ei->len)
+- return true;
++ get_read_extent_info(&ei, i_ext);
+
+- /* Let's drop, if checkpoint got corrupted. */
+- if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) {
+- ei->len = 0;
+- et->largest_updated = true;
++ if (!ei.len)
+ return true;
+- }
+
+- if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
+- !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
++ if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) ||
++ !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1,
+ DATA_GENERIC_ENHANCE)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+- ei->blk, ei->fofs, ei->len);
++ ei.blk, ei.fofs, ei.len);
+ return false;
+ }
+ return true;
+@@ -395,24 +385,22 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
+
+ if (!__may_extent_tree(inode, EX_READ)) {
+ /* drop largest read extent */
+- if (i_ext && i_ext->len) {
++ if (i_ext->len) {
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
+ i_ext->len = 0;
+ set_page_dirty(ipage);
+ }
+- goto out;
++ set_inode_flag(inode, FI_NO_EXTENT);
++ return;
+ }
+
+ et = __grab_extent_tree(inode, EX_READ);
+
+- if (!i_ext || !i_ext->len)
+- goto out;
+-
+ get_read_extent_info(&ei, i_ext);
+
+ write_lock(&et->lock);
+- if (atomic_read(&et->node_cnt))
+- goto unlock_out;
++ if (atomic_read(&et->node_cnt) || !ei.len)
++ goto skip;
+
+ en = __attach_extent_node(sbi, et, &ei, NULL,
+ &et->root.rb_root.rb_node, true);
+@@ -424,11 +412,13 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
+ list_add_tail(&en->list, &eti->extent_list);
+ spin_unlock(&eti->extent_lock);
+ }
+-unlock_out:
++skip:
++ /* Let's drop, if checkpoint got corrupted. */
++ if (f2fs_cp_error(sbi)) {
++ et->largest.len = 0;
++ et->largest_updated = true;
++ }
+ write_unlock(&et->lock);
+-out:
+- if (!F2FS_I(inode)->extent_tree[EX_READ])
+- set_inode_flag(inode, FI_NO_EXTENT);
+ }
+
+ void f2fs_init_age_extent_tree(struct inode *inode)
+diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
+index 19490dd8321943..00eff023cd9d63 100644
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -4189,7 +4189,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
+ /*
+ * extent_cache.c
+ */
+-bool sanity_check_extent_cache(struct inode *inode);
++bool sanity_check_extent_cache(struct inode *inode, struct page *ipage);
+ void f2fs_init_extent_tree(struct inode *inode);
+ void f2fs_drop_extent_tree(struct inode *inode);
+ void f2fs_destroy_extent_node(struct inode *inode);
+diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
+index afb7c88ba06b2c..888c301ffe8f4c 100644
+--- a/fs/f2fs/gc.c
++++ b/fs/f2fs/gc.c
+@@ -1563,6 +1563,16 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+ continue;
+ }
+
++ if (f2fs_has_inline_data(inode)) {
++ iput(inode);
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
++ f2fs_err_ratelimited(sbi,
++ "inode %lx has both inline_data flag and "
++ "data block, nid=%u, ofs_in_node=%u",
++ inode->i_ino, dni.nid, ofs_in_node);
++ continue;
++ }
++
+ err = f2fs_gc_pinned_control(inode, gc_type, segno);
+ if (err == -EAGAIN) {
+ iput(inode);
+diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
+index 0172f4e503061d..26e857fee631d9 100644
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -511,16 +511,16 @@ static int do_read_inode(struct inode *inode)
+
+ init_idisk_time(inode);
+
+- /* Need all the flag bits */
+- f2fs_init_read_extent_tree(inode, node_page);
+- f2fs_init_age_extent_tree(inode);
+-
+- if (!sanity_check_extent_cache(inode)) {
++ if (!sanity_check_extent_cache(inode, node_page)) {
+ f2fs_put_page(node_page, 1);
+ f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
+ return -EFSCORRUPTED;
+ }
+
++ /* Need all the flag bits */
++ f2fs_init_read_extent_tree(inode, node_page);
++ f2fs_init_age_extent_tree(inode);
++
+ f2fs_put_page(node_page, 1);
+
+ stat_inc_inline_xattr(inode);
+diff --git a/fs/fhandle.c b/fs/fhandle.c
+index 99dcf07cfecfe1..c361d7ff1b88dd 100644
+--- a/fs/fhandle.c
++++ b/fs/fhandle.c
+@@ -40,7 +40,7 @@ static long do_sys_name_to_handle(const struct path *path,
+ if (f_handle.handle_bytes > MAX_HANDLE_SZ)
+ return -EINVAL;
+
+- handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
++ handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
+ GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+@@ -75,7 +75,7 @@ static long do_sys_name_to_handle(const struct path *path,
+ /* copy the mount id */
+ if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
+ copy_to_user(ufh, handle,
+- sizeof(struct file_handle) + handle_bytes))
++ struct_size(handle, f_handle, handle_bytes)))
+ retval = -EFAULT;
+ kfree(handle);
+ return retval;
+@@ -196,7 +196,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
+ retval = -EINVAL;
+ goto out_err;
+ }
+- handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
++ handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
+ GFP_KERNEL);
+ if (!handle) {
+ retval = -ENOMEM;
+diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
+index cb3cda1390adb1..5713994328cbcb 100644
+--- a/fs/jfs/jfs_dmap.c
++++ b/fs/jfs/jfs_dmap.c
+@@ -1626,6 +1626,8 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
+ } else if (rc == -ENOSPC) {
+ /* search for next smaller log2 block */
+ l2nb = BLKSTOL2(nblocks) - 1;
++ if (unlikely(l2nb < 0))
++ break;
+ nblocks = 1LL << l2nb;
+ } else {
+ /* Trim any already allocated blocks */
+diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
+index 031d8f570f581f..5d3127ca68a42d 100644
+--- a/fs/jfs/jfs_dtree.c
++++ b/fs/jfs/jfs_dtree.c
+@@ -834,6 +834,8 @@ int dtInsert(tid_t tid, struct inode *ip,
+ * the full page.
+ */
+ DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
++ if (p->header.freelist == 0)
++ return -EINVAL;
+
+ /*
+ * insert entry for new key
+diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
+index e855b8fde76ce1..cb6d1fda66a702 100644
+--- a/fs/jfs/jfs_logmgr.c
++++ b/fs/jfs/jfs_logmgr.c
+@@ -1058,7 +1058,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync)
+ int lmLogOpen(struct super_block *sb)
+ {
+ int rc;
+- struct block_device *bdev;
++ struct bdev_handle *bdev_handle;
+ struct jfs_log *log;
+ struct jfs_sb_info *sbi = JFS_SBI(sb);
+
+@@ -1070,7 +1070,7 @@ int lmLogOpen(struct super_block *sb)
+
+ mutex_lock(&jfs_log_mutex);
+ list_for_each_entry(log, &jfs_external_logs, journal_list) {
+- if (log->bdev->bd_dev == sbi->logdev) {
++ if (log->bdev_handle->bdev->bd_dev == sbi->logdev) {
+ if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
+ jfs_warn("wrong uuid on JFS journal");
+ mutex_unlock(&jfs_log_mutex);
+@@ -1100,14 +1100,14 @@ int lmLogOpen(struct super_block *sb)
+ * file systems to log may have n-to-1 relationship;
+ */
+
+- bdev = blkdev_get_by_dev(sbi->logdev, BLK_OPEN_READ | BLK_OPEN_WRITE,
+- log, NULL);
+- if (IS_ERR(bdev)) {
+- rc = PTR_ERR(bdev);
++ bdev_handle = bdev_open_by_dev(sbi->logdev,
++ BLK_OPEN_READ | BLK_OPEN_WRITE, log, NULL);
++ if (IS_ERR(bdev_handle)) {
++ rc = PTR_ERR(bdev_handle);
+ goto free;
+ }
+
+- log->bdev = bdev;
++ log->bdev_handle = bdev_handle;
+ uuid_copy(&log->uuid, &sbi->loguuid);
+
+ /*
+@@ -1141,7 +1141,7 @@ int lmLogOpen(struct super_block *sb)
+ lbmLogShutdown(log);
+
+ close: /* close external log device */
+- blkdev_put(bdev, log);
++ bdev_release(bdev_handle);
+
+ free: /* free log descriptor */
+ mutex_unlock(&jfs_log_mutex);
+@@ -1162,7 +1162,7 @@ static int open_inline_log(struct super_block *sb)
+ init_waitqueue_head(&log->syncwait);
+
+ set_bit(log_INLINELOG, &log->flag);
+- log->bdev = sb->s_bdev;
++ log->bdev_handle = sb->s_bdev_handle;
+ log->base = addressPXD(&JFS_SBI(sb)->logpxd);
+ log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
+ (L2LOGPSIZE - sb->s_blocksize_bits);
+@@ -1436,7 +1436,7 @@ int lmLogClose(struct super_block *sb)
+ {
+ struct jfs_sb_info *sbi = JFS_SBI(sb);
+ struct jfs_log *log = sbi->log;
+- struct block_device *bdev;
++ struct bdev_handle *bdev_handle;
+ int rc = 0;
+
+ jfs_info("lmLogClose: log:0x%p", log);
+@@ -1482,10 +1482,10 @@ int lmLogClose(struct super_block *sb)
+ * external log as separate logical volume
+ */
+ list_del(&log->journal_list);
+- bdev = log->bdev;
++ bdev_handle = log->bdev_handle;
+ rc = lmLogShutdown(log);
+
+- blkdev_put(bdev, log);
++ bdev_release(bdev_handle);
+
+ kfree(log);
+
+@@ -1972,7 +1972,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
+
+ bp->l_flag |= lbmREAD;
+
+- bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS);
++ bio = bio_alloc(log->bdev_handle->bdev, 1, REQ_OP_READ, GFP_NOFS);
+ bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
+ __bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
+ BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
+@@ -2110,10 +2110,15 @@ static void lbmStartIO(struct lbuf * bp)
+ {
+ struct bio *bio;
+ struct jfs_log *log = bp->l_log;
++ struct block_device *bdev = NULL;
+
+ jfs_info("lbmStartIO");
+
+- bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
++ if (!log->no_integrity)
++ bdev = log->bdev_handle->bdev;
++
++ bio = bio_alloc(bdev, 1, REQ_OP_WRITE | REQ_SYNC,
++ GFP_NOFS);
+ bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
+ __bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
+ BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
+diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
+index 805877ce502044..84aa2d25390743 100644
+--- a/fs/jfs/jfs_logmgr.h
++++ b/fs/jfs/jfs_logmgr.h
+@@ -356,7 +356,7 @@ struct jfs_log {
+ * before writing syncpt.
+ */
+ struct list_head journal_list; /* Global list */
+- struct block_device *bdev; /* 4: log lv pointer */
++ struct bdev_handle *bdev_handle; /* 4: log lv pointer */
+ int serial; /* 4: log mount serial number */
+
+ s64 base; /* @8: log extent address (inline log ) */
+diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
+index 631b8bd3e43849..9b5c6a20b30c83 100644
+--- a/fs/jfs/jfs_mount.c
++++ b/fs/jfs/jfs_mount.c
+@@ -430,7 +430,8 @@ int updateSuper(struct super_block *sb, uint state)
+
+ if (state == FM_MOUNT) {
+ /* record log's dev_t and mount serial number */
+- j_sb->s_logdev = cpu_to_le32(new_encode_dev(sbi->log->bdev->bd_dev));
++ j_sb->s_logdev = cpu_to_le32(
++ new_encode_dev(sbi->log->bdev_handle->bdev->bd_dev));
+ j_sb->s_logserial = cpu_to_le32(sbi->log->serial);
+ } else if (state == FM_CLEAN) {
+ /*
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 6579948070a482..a62331487ebf16 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -712,8 +712,6 @@ static const struct svc_version *nlmsvc_version[] = {
+ #endif
+ };
+
+-static struct svc_stat nlmsvc_stats;
+-
+ #define NLM_NRVERS ARRAY_SIZE(nlmsvc_version)
+ static struct svc_program nlmsvc_program = {
+ .pg_prog = NLM_PROGRAM, /* program number */
+@@ -721,7 +719,6 @@ static struct svc_program nlmsvc_program = {
+ .pg_vers = nlmsvc_version, /* version table */
+ .pg_name = "lockd", /* service name */
+ .pg_class = "nfsd", /* share authentication with nfsd */
+- .pg_stats = &nlmsvc_stats, /* stats table */
+ .pg_authenticate = &lockd_authenticate, /* export authentication */
+ .pg_init_request = svc_generic_init_request,
+ .pg_rpcbind_set = svc_generic_rpcbind_set,
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 466ebf1d41b2b7..869c88978899c0 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -399,15 +399,12 @@ static const struct svc_version *nfs4_callback_version[] = {
+ [4] = &nfs4_callback_version4,
+ };
+
+-static struct svc_stat nfs4_callback_stats;
+-
+ static struct svc_program nfs4_callback_program = {
+ .pg_prog = NFS4_CALLBACK, /* RPC service number */
+ .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
+ .pg_vers = nfs4_callback_version, /* version table */
+ .pg_name = "NFSv4 callback", /* service name */
+ .pg_class = "nfs", /* authentication class */
+- .pg_stats = &nfs4_callback_stats,
+ .pg_authenticate = nfs_callback_authenticate,
+ .pg_init_request = svc_generic_init_request,
+ .pg_rpcbind_set = svc_generic_rpcbind_set,
+diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
+index 4cbe0434cbb8ce..66a05fefae98ea 100644
+--- a/fs/nfsd/cache.h
++++ b/fs/nfsd/cache.h
+@@ -80,8 +80,6 @@ enum {
+
+ int nfsd_drc_slab_create(void);
+ void nfsd_drc_slab_free(void);
+-int nfsd_net_reply_cache_init(struct nfsd_net *nn);
+-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
+ int nfsd_reply_cache_init(struct nfsd_net *);
+ void nfsd_reply_cache_shutdown(struct nfsd_net *);
+ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
+index 11a0eaa2f91407..b7da17e530077e 100644
+--- a/fs/nfsd/export.c
++++ b/fs/nfsd/export.c
+@@ -339,12 +339,16 @@ static int export_stats_init(struct export_stats *stats)
+
+ static void export_stats_reset(struct export_stats *stats)
+ {
+- nfsd_percpu_counters_reset(stats->counter, EXP_STATS_COUNTERS_NUM);
++ if (stats)
++ nfsd_percpu_counters_reset(stats->counter,
++ EXP_STATS_COUNTERS_NUM);
+ }
+
+ static void export_stats_destroy(struct export_stats *stats)
+ {
+- nfsd_percpu_counters_destroy(stats->counter, EXP_STATS_COUNTERS_NUM);
++ if (stats)
++ nfsd_percpu_counters_destroy(stats->counter,
++ EXP_STATS_COUNTERS_NUM);
+ }
+
+ static void svc_export_put(struct kref *ref)
+@@ -353,7 +357,8 @@ static void svc_export_put(struct kref *ref)
+ path_put(&exp->ex_path);
+ auth_domain_put(exp->ex_client);
+ nfsd4_fslocs_free(&exp->ex_fslocs);
+- export_stats_destroy(&exp->ex_stats);
++ export_stats_destroy(exp->ex_stats);
++ kfree(exp->ex_stats);
+ kfree(exp->ex_uuid);
+ kfree_rcu(exp, ex_rcu);
+ }
+@@ -767,13 +772,15 @@ static int svc_export_show(struct seq_file *m,
+ seq_putc(m, '\t');
+ seq_escape(m, exp->ex_client->name, " \t\n\\");
+ if (export_stats) {
+- seq_printf(m, "\t%lld\n", exp->ex_stats.start_time);
++ struct percpu_counter *counter = exp->ex_stats->counter;
++
++ seq_printf(m, "\t%lld\n", exp->ex_stats->start_time);
+ seq_printf(m, "\tfh_stale: %lld\n",
+- percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_FH_STALE]));
++ percpu_counter_sum_positive(&counter[EXP_STATS_FH_STALE]));
+ seq_printf(m, "\tio_read: %lld\n",
+- percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_READ]));
++ percpu_counter_sum_positive(&counter[EXP_STATS_IO_READ]));
+ seq_printf(m, "\tio_write: %lld\n",
+- percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_WRITE]));
++ percpu_counter_sum_positive(&counter[EXP_STATS_IO_WRITE]));
+ seq_putc(m, '\n');
+ return 0;
+ }
+@@ -819,7 +826,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
+ new->ex_layout_types = 0;
+ new->ex_uuid = NULL;
+ new->cd = item->cd;
+- export_stats_reset(&new->ex_stats);
++ export_stats_reset(new->ex_stats);
+ }
+
+ static void export_update(struct cache_head *cnew, struct cache_head *citem)
+@@ -856,7 +863,14 @@ static struct cache_head *svc_export_alloc(void)
+ if (!i)
+ return NULL;
+
+- if (export_stats_init(&i->ex_stats)) {
++ i->ex_stats = kmalloc(sizeof(*(i->ex_stats)), GFP_KERNEL);
++ if (!i->ex_stats) {
++ kfree(i);
++ return NULL;
++ }
++
++ if (export_stats_init(i->ex_stats)) {
++ kfree(i->ex_stats);
+ kfree(i);
+ return NULL;
+ }
+diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
+index 2df8ae25aad302..ca9dc230ae3d0b 100644
+--- a/fs/nfsd/export.h
++++ b/fs/nfsd/export.h
+@@ -64,10 +64,10 @@ struct svc_export {
+ struct cache_head h;
+ struct auth_domain * ex_client;
+ int ex_flags;
++ int ex_fsid;
+ struct path ex_path;
+ kuid_t ex_anon_uid;
+ kgid_t ex_anon_gid;
+- int ex_fsid;
+ unsigned char * ex_uuid; /* 16 byte fsid */
+ struct nfsd4_fs_locations ex_fslocs;
+ uint32_t ex_nflavors;
+@@ -76,8 +76,8 @@ struct svc_export {
+ struct nfsd4_deviceid_map *ex_devid_map;
+ struct cache_detail *cd;
+ struct rcu_head ex_rcu;
+- struct export_stats ex_stats;
+ unsigned long ex_xprtsec_modes;
++ struct export_stats *ex_stats;
+ };
+
+ /* an "export key" (expkey) maps a filehandlefragement to an
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index ec49b200b79762..9bfca3dda63d33 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -11,8 +11,10 @@
+ #include <net/net_namespace.h>
+ #include <net/netns/generic.h>
+ #include <linux/filelock.h>
++#include <linux/nfs4.h>
+ #include <linux/percpu_counter.h>
+ #include <linux/siphash.h>
++#include <linux/sunrpc/stats.h>
+
+ /* Hash tables for nfs4_clientid state */
+ #define CLIENT_HASH_BITS 4
+@@ -26,10 +28,22 @@ struct nfsd4_client_tracking_ops;
+
+ enum {
+ /* cache misses due only to checksum comparison failures */
+- NFSD_NET_PAYLOAD_MISSES,
++ NFSD_STATS_PAYLOAD_MISSES,
+ /* amount of memory (in bytes) currently consumed by the DRC */
+- NFSD_NET_DRC_MEM_USAGE,
+- NFSD_NET_COUNTERS_NUM
++ NFSD_STATS_DRC_MEM_USAGE,
++ NFSD_STATS_RC_HITS, /* repcache hits */
++ NFSD_STATS_RC_MISSES, /* repcache misses */
++ NFSD_STATS_RC_NOCACHE, /* uncached reqs */
++ NFSD_STATS_FH_STALE, /* FH stale error */
++ NFSD_STATS_IO_READ, /* bytes returned to read requests */
++ NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
++#ifdef CONFIG_NFSD_V4
++ NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
++ NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
++#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
++ NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
++#endif
++ NFSD_STATS_COUNTERS_NUM
+ };
+
+ /*
+@@ -169,7 +183,10 @@ struct nfsd_net {
+ atomic_t num_drc_entries;
+
+ /* Per-netns stats counters */
+- struct percpu_counter counter[NFSD_NET_COUNTERS_NUM];
++ struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
++
++ /* sunrpc svc stats */
++ struct svc_stat nfsd_svcstats;
+
+ /* longest hash chain seen */
+ unsigned int longest_chain;
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 451026f9986b61..ae0057c54ef4ed 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2478,10 +2478,10 @@ nfsd4_proc_null(struct svc_rqst *rqstp)
+ return rpc_success;
+ }
+
+-static inline void nfsd4_increment_op_stats(u32 opnum)
++static inline void nfsd4_increment_op_stats(struct nfsd_net *nn, u32 opnum)
+ {
+ if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
+- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]);
++ percpu_counter_inc(&nn->counter[NFSD_STATS_NFS4_OP(opnum)]);
+ }
+
+ static const struct nfsd4_operation nfsd4_ops[];
+@@ -2756,7 +2756,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+ status, nfsd4_op_name(op->opnum));
+
+ nfsd4_cstate_clear_replay(cstate);
+- nfsd4_increment_op_stats(op->opnum);
++ nfsd4_increment_op_stats(nn, op->opnum);
+ }
+
+ fh_put(current_fh);
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index c7e52d980cd75f..cdad1eaa4a3180 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -8422,6 +8422,7 @@ __be32
+ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+ {
+ __be32 status;
++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct file_lock_context *ctx;
+ struct file_lock *fl;
+ struct nfs4_delegation *dp;
+@@ -8451,7 +8452,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+ }
+ break_lease:
+ spin_unlock(&ctx->flc_lock);
+- nfsd_stats_wdeleg_getattr_inc();
++ nfsd_stats_wdeleg_getattr_inc(nn);
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
+index 6cd36af2f97e10..c52132ecb339d5 100644
+--- a/fs/nfsd/nfscache.c
++++ b/fs/nfsd/nfscache.c
+@@ -176,27 +176,6 @@ void nfsd_drc_slab_free(void)
+ kmem_cache_destroy(drc_slab);
+ }
+
+-/**
+- * nfsd_net_reply_cache_init - per net namespace reply cache set-up
+- * @nn: nfsd_net being initialized
+- *
+- * Returns zero on succes; otherwise a negative errno is returned.
+- */
+-int nfsd_net_reply_cache_init(struct nfsd_net *nn)
+-{
+- return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
+-}
+-
+-/**
+- * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
+- * @nn: nfsd_net being freed
+- *
+- */
+-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
+-{
+- nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
+-}
+-
+ int nfsd_reply_cache_init(struct nfsd_net *nn)
+ {
+ unsigned int hashsize;
+@@ -502,7 +481,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key,
+ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ unsigned int len, struct nfsd_cacherep **cacherep)
+ {
+- struct nfsd_net *nn;
++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd_cacherep *rp, *found;
+ __wsum csum;
+ struct nfsd_drc_bucket *b;
+@@ -512,7 +491,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ int rtn = RC_DOIT;
+
+ if (type == RC_NOCACHE) {
+- nfsd_stats_rc_nocache_inc();
++ nfsd_stats_rc_nocache_inc(nn);
+ goto out;
+ }
+
+@@ -522,7 +501,6 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ * Since the common case is a cache miss followed by an insert,
+ * preallocate an entry.
+ */
+- nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ rp = nfsd_cacherep_alloc(rqstp, csum, nn);
+ if (!rp)
+ goto out;
+@@ -540,7 +518,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ freed = nfsd_cacherep_dispose(&dispose);
+ trace_nfsd_drc_gc(nn, freed);
+
+- nfsd_stats_rc_misses_inc();
++ nfsd_stats_rc_misses_inc(nn);
+ atomic_inc(&nn->num_drc_entries);
+ nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
+ goto out;
+@@ -548,7 +526,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ found_entry:
+ /* We found a matching entry which is either in progress or done. */
+ nfsd_reply_cache_free_locked(NULL, rp, nn);
+- nfsd_stats_rc_hits_inc();
++ nfsd_stats_rc_hits_inc(nn);
+ rtn = RC_DROPIT;
+ rp = found;
+
+@@ -690,15 +668,15 @@ int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
+ atomic_read(&nn->num_drc_entries));
+ seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
+ seq_printf(m, "mem usage: %lld\n",
+- percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE]));
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE]));
+ seq_printf(m, "cache hits: %lld\n",
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]));
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]));
+ seq_printf(m, "cache misses: %lld\n",
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]));
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]));
+ seq_printf(m, "not cached: %lld\n",
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]));
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]));
+ seq_printf(m, "payload misses: %lld\n",
+- percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES]));
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]));
+ seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
+ seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
+ return 0;
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index a13e81e450718a..887035b7446763 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1524,14 +1524,17 @@ static __net_init int nfsd_net_init(struct net *net)
+ retval = nfsd_idmap_init(net);
+ if (retval)
+ goto out_idmap_error;
+- retval = nfsd_net_reply_cache_init(nn);
++ retval = nfsd_stat_counters_init(nn);
+ if (retval)
+ goto out_repcache_error;
++ memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats));
++ nn->nfsd_svcstats.program = &nfsd_program;
+ nn->nfsd_versions = NULL;
+ nn->nfsd4_minorversions = NULL;
+ nfsd4_init_leases_net(nn);
+ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
+ seqlock_init(&nn->writeverf_lock);
++ nfsd_proc_stat_init(net);
+
+ return 0;
+
+@@ -1552,7 +1555,8 @@ static __net_exit void nfsd_net_exit(struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+- nfsd_net_reply_cache_destroy(nn);
++ nfsd_proc_stat_shutdown(net);
++ nfsd_stat_counters_destroy(nn);
+ nfsd_idmap_shutdown(net);
+ nfsd_export_shutdown(net);
+ nfsd_netns_free_versions(nn);
+@@ -1575,12 +1579,9 @@ static int __init init_nfsd(void)
+ retval = nfsd4_init_pnfs();
+ if (retval)
+ goto out_free_slabs;
+- retval = nfsd_stat_init(); /* Statistics */
+- if (retval)
+- goto out_free_pnfs;
+ retval = nfsd_drc_slab_create();
+ if (retval)
+- goto out_free_stat;
++ goto out_free_pnfs;
+ nfsd_lockd_init(); /* lockd->nfsd callbacks */
+ retval = create_proc_exports_entry();
+ if (retval)
+@@ -1610,8 +1611,6 @@ static int __init init_nfsd(void)
+ out_free_lockd:
+ nfsd_lockd_shutdown();
+ nfsd_drc_slab_free();
+-out_free_stat:
+- nfsd_stat_shutdown();
+ out_free_pnfs:
+ nfsd4_exit_pnfs();
+ out_free_slabs:
+@@ -1628,7 +1627,6 @@ static void __exit exit_nfsd(void)
+ nfsd_drc_slab_free();
+ remove_proc_entry("fs/nfs/exports", NULL);
+ remove_proc_entry("fs/nfs", NULL);
+- nfsd_stat_shutdown();
+ nfsd_lockd_shutdown();
+ nfsd4_free_slabs();
+ nfsd4_exit_pnfs();
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index fe846a360ae18d..d05bd2b811f377 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -69,6 +69,7 @@ extern struct mutex nfsd_mutex;
+ extern spinlock_t nfsd_drc_lock;
+ extern unsigned long nfsd_drc_max_mem;
+ extern unsigned long nfsd_drc_mem_used;
++extern atomic_t nfsd_th_cnt; /* number of available threads */
+
+ extern const struct seq_operations nfs_exports_op;
+
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index 937be276bb6b48..c2495d98c18928 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -327,6 +327,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ __be32
+ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+ {
++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct svc_export *exp = NULL;
+ struct dentry *dentry;
+ __be32 error;
+@@ -395,7 +396,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+ out:
+ trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
+ if (error == nfserr_stale)
+- nfsd_stats_fh_stale_inc(exp);
++ nfsd_stats_fh_stale_inc(nn, exp);
+ return error;
+ }
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 7ef6af908faacb..7911c4b3b5d355 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -34,6 +34,7 @@
+
+ #define NFSDDBG_FACILITY NFSDDBG_SVC
+
++atomic_t nfsd_th_cnt = ATOMIC_INIT(0);
+ extern struct svc_program nfsd_program;
+ static int nfsd(void *vrqstp);
+ #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+@@ -89,7 +90,6 @@ unsigned long nfsd_drc_max_mem;
+ unsigned long nfsd_drc_mem_used;
+
+ #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+-static struct svc_stat nfsd_acl_svcstats;
+ static const struct svc_version *nfsd_acl_version[] = {
+ # if defined(CONFIG_NFSD_V2_ACL)
+ [2] = &nfsd_acl_version2,
+@@ -108,15 +108,11 @@ static struct svc_program nfsd_acl_program = {
+ .pg_vers = nfsd_acl_version,
+ .pg_name = "nfsacl",
+ .pg_class = "nfsd",
+- .pg_stats = &nfsd_acl_svcstats,
+ .pg_authenticate = &svc_set_client,
+ .pg_init_request = nfsd_acl_init_request,
+ .pg_rpcbind_set = nfsd_acl_rpcbind_set,
+ };
+
+-static struct svc_stat nfsd_acl_svcstats = {
+- .program = &nfsd_acl_program,
+-};
+ #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
+
+ static const struct svc_version *nfsd_version[] = {
+@@ -141,7 +137,6 @@ struct svc_program nfsd_program = {
+ .pg_vers = nfsd_version, /* version table */
+ .pg_name = "nfsd", /* program name */
+ .pg_class = "nfsd", /* authentication class */
+- .pg_stats = &nfsd_svcstats, /* version table */
+ .pg_authenticate = &svc_set_client, /* export authentication */
+ .pg_init_request = nfsd_init_request,
+ .pg_rpcbind_set = nfsd_rpcbind_set,
+@@ -675,7 +670,8 @@ int nfsd_create_serv(struct net *net)
+ if (nfsd_max_blksize == 0)
+ nfsd_max_blksize = nfsd_get_default_max_blksize();
+ nfsd_reset_versions(nn);
+- serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd);
++ serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats,
++ nfsd_max_blksize, nfsd);
+ if (serv == NULL)
+ return -ENOMEM;
+
+@@ -950,7 +946,7 @@ nfsd(void *vrqstp)
+
+ current->fs->umask = 0;
+
+- atomic_inc(&nfsdstats.th_cnt);
++ atomic_inc(&nfsd_th_cnt);
+
+ set_freezable();
+
+@@ -964,7 +960,7 @@ nfsd(void *vrqstp)
+ svc_recv(rqstp);
+ }
+
+- atomic_dec(&nfsdstats.th_cnt);
++ atomic_dec(&nfsd_th_cnt);
+
+ out:
+ /* Release the thread */
+diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
+index 63797635e1c328..9f606fa08bd4b8 100644
+--- a/fs/nfsd/stats.c
++++ b/fs/nfsd/stats.c
+@@ -27,25 +27,22 @@
+
+ #include "nfsd.h"
+
+-struct nfsd_stats nfsdstats;
+-struct svc_stat nfsd_svcstats = {
+- .program = &nfsd_program,
+-};
+-
+ static int nfsd_show(struct seq_file *seq, void *v)
+ {
++ struct net *net = pde_data(file_inode(seq->file));
++ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int i;
+
+ seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n",
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]),
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]),
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]),
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]),
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]),
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]),
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]),
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]),
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]),
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]),
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE]));
+
+ /* thread usage: */
+- seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt));
++ seq_printf(seq, "th %u 0", atomic_read(&nfsd_th_cnt));
+
+ /* deprecated thread usage histogram stats */
+ for (i = 0; i < 10; i++)
+@@ -55,7 +52,7 @@ static int nfsd_show(struct seq_file *seq, void *v)
+ seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n");
+
+ /* show my rpc info */
+- svc_seq_show(seq, &nfsd_svcstats);
++ svc_seq_show(seq, &nn->nfsd_svcstats);
+
+ #ifdef CONFIG_NFSD_V4
+ /* Show count for individual nfsv4 operations */
+@@ -63,10 +60,10 @@ static int nfsd_show(struct seq_file *seq, void *v)
+ seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1);
+ for (i = 0; i <= LAST_NFS4_OP; i++) {
+ seq_printf(seq, " %lld",
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)]));
+ }
+ seq_printf(seq, "\nwdeleg_getattr %lld",
+- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]));
++ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR]));
+
+ seq_putc(seq, '\n');
+ #endif
+@@ -76,7 +73,7 @@ static int nfsd_show(struct seq_file *seq, void *v)
+
+ DEFINE_PROC_SHOW_ATTRIBUTE(nfsd);
+
+-int nfsd_percpu_counters_init(struct percpu_counter counters[], int num)
++int nfsd_percpu_counters_init(struct percpu_counter *counters, int num)
+ {
+ int i, err = 0;
+
+@@ -108,31 +105,24 @@ void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
+ percpu_counter_destroy(&counters[i]);
+ }
+
+-static int nfsd_stat_counters_init(void)
++int nfsd_stat_counters_init(struct nfsd_net *nn)
+ {
+- return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
++ return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM);
+ }
+
+-static void nfsd_stat_counters_destroy(void)
++void nfsd_stat_counters_destroy(struct nfsd_net *nn)
+ {
+- nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
++ nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM);
+ }
+
+-int nfsd_stat_init(void)
++void nfsd_proc_stat_init(struct net *net)
+ {
+- int err;
+-
+- err = nfsd_stat_counters_init();
+- if (err)
+- return err;
++ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+- svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
+-
+- return 0;
++ svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
+ }
+
+-void nfsd_stat_shutdown(void)
++void nfsd_proc_stat_shutdown(struct net *net)
+ {
+- nfsd_stat_counters_destroy();
+- svc_proc_unregister(&init_net, "nfsd");
++ svc_proc_unregister(net, "nfsd");
+ }
+diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
+index cf5524e7ca0623..d2753e975dfd34 100644
+--- a/fs/nfsd/stats.h
++++ b/fs/nfsd/stats.h
+@@ -10,94 +10,72 @@
+ #include <uapi/linux/nfsd/stats.h>
+ #include <linux/percpu_counter.h>
+
+-
+-enum {
+- NFSD_STATS_RC_HITS, /* repcache hits */
+- NFSD_STATS_RC_MISSES, /* repcache misses */
+- NFSD_STATS_RC_NOCACHE, /* uncached reqs */
+- NFSD_STATS_FH_STALE, /* FH stale error */
+- NFSD_STATS_IO_READ, /* bytes returned to read requests */
+- NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
+-#ifdef CONFIG_NFSD_V4
+- NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
+- NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
+-#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
+- NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
+-#endif
+- NFSD_STATS_COUNTERS_NUM
+-};
+-
+-struct nfsd_stats {
+- struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
+-
+- atomic_t th_cnt; /* number of available threads */
+-};
+-
+-extern struct nfsd_stats nfsdstats;
+-
+-extern struct svc_stat nfsd_svcstats;
+-
+-int nfsd_percpu_counters_init(struct percpu_counter counters[], int num);
+-void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num);
+-void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num);
+-int nfsd_stat_init(void);
+-void nfsd_stat_shutdown(void);
+-
+-static inline void nfsd_stats_rc_hits_inc(void)
++int nfsd_percpu_counters_init(struct percpu_counter *counters, int num);
++void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num);
++void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num);
++int nfsd_stat_counters_init(struct nfsd_net *nn);
++void nfsd_stat_counters_destroy(struct nfsd_net *nn);
++void nfsd_proc_stat_init(struct net *net);
++void nfsd_proc_stat_shutdown(struct net *net);
++
++static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn)
+ {
+- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]);
++ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_HITS]);
+ }
+
+-static inline void nfsd_stats_rc_misses_inc(void)
++static inline void nfsd_stats_rc_misses_inc(struct nfsd_net *nn)
+ {
+- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]);
++ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_MISSES]);
+ }
+
+-static inline void nfsd_stats_rc_nocache_inc(void)
++static inline void nfsd_stats_rc_nocache_inc(struct nfsd_net *nn)
+ {
+- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]);
++ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_NOCACHE]);
+ }
+
+-static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp)
++static inline void nfsd_stats_fh_stale_inc(struct nfsd_net *nn,
++ struct svc_export *exp)
+ {
+- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]);
+- if (exp)
+- percpu_counter_inc(&exp->ex_stats.counter[EXP_STATS_FH_STALE]);
++ percpu_counter_inc(&nn->counter[NFSD_STATS_FH_STALE]);
++ if (exp && exp->ex_stats)
++ percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]);
+ }
+
+-static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount)
++static inline void nfsd_stats_io_read_add(struct nfsd_net *nn,
++ struct svc_export *exp, s64 amount)
+ {
+- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount);
+- if (exp)
+- percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_READ], amount);
++ percpu_counter_add(&nn->counter[NFSD_STATS_IO_READ], amount);
++ if (exp && exp->ex_stats)
++ percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount);
+ }
+
+-static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount)
++static inline void nfsd_stats_io_write_add(struct nfsd_net *nn,
++ struct svc_export *exp, s64 amount)
+ {
+- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount);
+- if (exp)
+- percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_WRITE], amount);
++ percpu_counter_add(&nn->counter[NFSD_STATS_IO_WRITE], amount);
++ if (exp && exp->ex_stats)
++ percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount);
+ }
+
+ static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn)
+ {
+- percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]);
++ percpu_counter_inc(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]);
+ }
+
+ static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount)
+ {
+- percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
++ percpu_counter_add(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
+ }
+
+ static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
+ {
+- percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
++ percpu_counter_sub(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
+ }
+
+ #ifdef CONFIG_NFSD_V4
+-static inline void nfsd_stats_wdeleg_getattr_inc(void)
++static inline void nfsd_stats_wdeleg_getattr_inc(struct nfsd_net *nn)
+ {
+- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]);
++ percpu_counter_inc(&nn->counter[NFSD_STATS_WDELEG_GETATTR]);
+ }
+ #endif
+ #endif /* _NFSD_STATS_H */
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index d0fdf70ab20d36..1f2a5b22b6498e 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -985,7 +985,9 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned long *count, u32 *eof, ssize_t host_err)
+ {
+ if (host_err >= 0) {
+- nfsd_stats_io_read_add(fhp->fh_export, host_err);
++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
++
++ nfsd_stats_io_read_add(nn, fhp->fh_export, host_err);
+ *eof = nfsd_eof_on_read(file, offset, host_err, *count);
+ *count = host_err;
+ fsnotify_access(file);
+@@ -1168,7 +1170,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ goto out_nfserr;
+ }
+ *cnt = host_err;
+- nfsd_stats_io_write_add(exp, *cnt);
++ nfsd_stats_io_write_add(nn, exp, *cnt);
+ fsnotify_modify(file);
+ host_err = filemap_check_wb_err(file->f_mapping, since);
+ if (host_err < 0)
+diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
+index 424865dfca74ba..45b687aff700be 100644
+--- a/fs/ntfs3/frecord.c
++++ b/fs/ntfs3/frecord.c
+@@ -1896,6 +1896,47 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr,
+ return REPARSE_LINK;
+ }
+
++/*
++ * fiemap_fill_next_extent_k - a copy of fiemap_fill_next_extent
++ * but it accepts kernel address for fi_extents_start
++ */
++static int fiemap_fill_next_extent_k(struct fiemap_extent_info *fieinfo,
++ u64 logical, u64 phys, u64 len, u32 flags)
++{
++ struct fiemap_extent extent;
++ struct fiemap_extent __user *dest = fieinfo->fi_extents_start;
++
++ /* only count the extents */
++ if (fieinfo->fi_extents_max == 0) {
++ fieinfo->fi_extents_mapped++;
++ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
++ }
++
++ if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
++ return 1;
++
++ if (flags & FIEMAP_EXTENT_DELALLOC)
++ flags |= FIEMAP_EXTENT_UNKNOWN;
++ if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
++ flags |= FIEMAP_EXTENT_ENCODED;
++ if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
++ flags |= FIEMAP_EXTENT_NOT_ALIGNED;
++
++ memset(&extent, 0, sizeof(extent));
++ extent.fe_logical = logical;
++ extent.fe_physical = phys;
++ extent.fe_length = len;
++ extent.fe_flags = flags;
++
++ dest += fieinfo->fi_extents_mapped;
++ memcpy(dest, &extent, sizeof(extent));
++
++ fieinfo->fi_extents_mapped++;
++ if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
++ return 1;
++ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
++}
++
+ /*
+ * ni_fiemap - Helper for file_fiemap().
+ *
+@@ -1906,6 +1947,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+ __u64 vbo, __u64 len)
+ {
+ int err = 0;
++ struct fiemap_extent __user *fe_u = fieinfo->fi_extents_start;
++ struct fiemap_extent *fe_k = NULL;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ u8 cluster_bits = sbi->cluster_bits;
+ struct runs_tree *run;
+@@ -1953,6 +1996,18 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+ goto out;
+ }
+
++ /*
++ * To avoid lock problems replace pointer to user memory by pointer to kernel memory.
++ */
++ fe_k = kmalloc_array(fieinfo->fi_extents_max,
++ sizeof(struct fiemap_extent),
++ GFP_NOFS | __GFP_ZERO);
++ if (!fe_k) {
++ err = -ENOMEM;
++ goto out;
++ }
++ fieinfo->fi_extents_start = fe_k;
++
+ end = vbo + len;
+ alloc_size = le64_to_cpu(attr->nres.alloc_size);
+ if (end > alloc_size)
+@@ -2041,8 +2096,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+ if (vbo + dlen >= end)
+ flags |= FIEMAP_EXTENT_LAST;
+
+- err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen,
+- flags);
++ err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, dlen,
++ flags);
++
+ if (err < 0)
+ break;
+ if (err == 1) {
+@@ -2062,7 +2118,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+ if (vbo + bytes >= end)
+ flags |= FIEMAP_EXTENT_LAST;
+
+- err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags);
++ err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, bytes,
++ flags);
+ if (err < 0)
+ break;
+ if (err == 1) {
+@@ -2075,7 +2132,19 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
+
+ up_read(run_lock);
+
++ /*
++ * Copy to user memory out of lock
++ */
++ if (copy_to_user(fe_u, fe_k,
++ fieinfo->fi_extents_max *
++ sizeof(struct fiemap_extent))) {
++ err = -EFAULT;
++ }
++
+ out:
++ /* Restore original pointer. */
++ fieinfo->fi_extents_start = fe_u;
++ kfree(fe_k);
+ return err;
+ }
+
+diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
+index 0f1493e0f6d059..254f6359b287fa 100644
+--- a/fs/quota/quota_tree.c
++++ b/fs/quota/quota_tree.c
+@@ -21,6 +21,12 @@ MODULE_AUTHOR("Jan Kara");
+ MODULE_DESCRIPTION("Quota trie support");
+ MODULE_LICENSE("GPL");
+
++/*
++ * Maximum quota tree depth we support. Only to limit recursion when working
++ * with the tree.
++ */
++#define MAX_QTREE_DEPTH 6
++
+ #define __QUOTA_QT_PARANOIA
+
+ static int __get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
+@@ -327,27 +333,36 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
+
+ /* Insert reference to structure into the trie */
+ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+- uint *treeblk, int depth)
++ uint *blks, int depth)
+ {
+ char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
+ int ret = 0, newson = 0, newact = 0;
+ __le32 *ref;
+ uint newblk;
++ int i;
+
+ if (!buf)
+ return -ENOMEM;
+- if (!*treeblk) {
++ if (!blks[depth]) {
+ ret = get_free_dqblk(info);
+ if (ret < 0)
+ goto out_buf;
+- *treeblk = ret;
++ for (i = 0; i < depth; i++)
++ if (ret == blks[i]) {
++ quota_error(dquot->dq_sb,
++ "Free block already used in tree: block %u",
++ ret);
++ ret = -EIO;
++ goto out_buf;
++ }
++ blks[depth] = ret;
+ memset(buf, 0, info->dqi_usable_bs);
+ newact = 1;
+ } else {
+- ret = read_blk(info, *treeblk, buf);
++ ret = read_blk(info, blks[depth], buf);
+ if (ret < 0) {
+ quota_error(dquot->dq_sb, "Can't read tree quota "
+- "block %u", *treeblk);
++ "block %u", blks[depth]);
+ goto out_buf;
+ }
+ }
+@@ -357,8 +372,20 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ info->dqi_blocks - 1);
+ if (ret)
+ goto out_buf;
+- if (!newblk)
++ if (!newblk) {
+ newson = 1;
++ } else {
++ for (i = 0; i <= depth; i++)
++ if (newblk == blks[i]) {
++ quota_error(dquot->dq_sb,
++ "Cycle in quota tree detected: block %u index %u",
++ blks[depth],
++ get_index(info, dquot->dq_id, depth));
++ ret = -EIO;
++ goto out_buf;
++ }
++ }
++ blks[depth + 1] = newblk;
+ if (depth == info->dqi_qtree_depth - 1) {
+ #ifdef __QUOTA_QT_PARANOIA
+ if (newblk) {
+@@ -370,16 +397,16 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ goto out_buf;
+ }
+ #endif
+- newblk = find_free_dqentry(info, dquot, &ret);
++ blks[depth + 1] = find_free_dqentry(info, dquot, &ret);
+ } else {
+- ret = do_insert_tree(info, dquot, &newblk, depth+1);
++ ret = do_insert_tree(info, dquot, blks, depth + 1);
+ }
+ if (newson && ret >= 0) {
+ ref[get_index(info, dquot->dq_id, depth)] =
+- cpu_to_le32(newblk);
+- ret = write_blk(info, *treeblk, buf);
++ cpu_to_le32(blks[depth + 1]);
++ ret = write_blk(info, blks[depth], buf);
+ } else if (newact && ret < 0) {
+- put_free_dqblk(info, buf, *treeblk);
++ put_free_dqblk(info, buf, blks[depth]);
+ }
+ out_buf:
+ kfree(buf);
+@@ -390,7 +417,7 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot)
+ {
+- int tmp = QT_TREEOFF;
++ uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF };
+
+ #ifdef __QUOTA_QT_PARANOIA
+ if (info->dqi_blocks <= QT_TREEOFF) {
+@@ -398,7 +425,11 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
+ return -EIO;
+ }
+ #endif
+- return do_insert_tree(info, dquot, &tmp, 0);
++ if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) {
++ quota_error(dquot->dq_sb, "Quota tree depth too big!");
++ return -EIO;
++ }
++ return do_insert_tree(info, dquot, blks, 0);
+ }
+
+ /*
+@@ -511,19 +542,20 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+
+ /* Remove reference to dquot from tree */
+ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+- uint *blk, int depth)
++ uint *blks, int depth)
+ {
+ char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
+ int ret = 0;
+ uint newblk;
+ __le32 *ref = (__le32 *)buf;
++ int i;
+
+ if (!buf)
+ return -ENOMEM;
+- ret = read_blk(info, *blk, buf);
++ ret = read_blk(info, blks[depth], buf);
+ if (ret < 0) {
+ quota_error(dquot->dq_sb, "Can't read quota data block %u",
+- *blk);
++ blks[depth]);
+ goto out_buf;
+ }
+ newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+@@ -532,29 +564,38 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ if (ret)
+ goto out_buf;
+
++ for (i = 0; i <= depth; i++)
++ if (newblk == blks[i]) {
++ quota_error(dquot->dq_sb,
++ "Cycle in quota tree detected: block %u index %u",
++ blks[depth],
++ get_index(info, dquot->dq_id, depth));
++ ret = -EIO;
++ goto out_buf;
++ }
+ if (depth == info->dqi_qtree_depth - 1) {
+ ret = free_dqentry(info, dquot, newblk);
+- newblk = 0;
++ blks[depth + 1] = 0;
+ } else {
+- ret = remove_tree(info, dquot, &newblk, depth+1);
++ blks[depth + 1] = newblk;
++ ret = remove_tree(info, dquot, blks, depth + 1);
+ }
+- if (ret >= 0 && !newblk) {
+- int i;
++ if (ret >= 0 && !blks[depth + 1]) {
+ ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0);
+ /* Block got empty? */
+ for (i = 0; i < (info->dqi_usable_bs >> 2) && !ref[i]; i++)
+ ;
+ /* Don't put the root block into the free block list */
+ if (i == (info->dqi_usable_bs >> 2)
+- && *blk != QT_TREEOFF) {
+- put_free_dqblk(info, buf, *blk);
+- *blk = 0;
++ && blks[depth] != QT_TREEOFF) {
++ put_free_dqblk(info, buf, blks[depth]);
++ blks[depth] = 0;
+ } else {
+- ret = write_blk(info, *blk, buf);
++ ret = write_blk(info, blks[depth], buf);
+ if (ret < 0)
+ quota_error(dquot->dq_sb,
+ "Can't write quota tree block %u",
+- *blk);
++ blks[depth]);
+ }
+ }
+ out_buf:
+@@ -565,11 +606,15 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ /* Delete dquot from tree */
+ int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+ {
+- uint tmp = QT_TREEOFF;
++ uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF };
+
+ if (!dquot->dq_off) /* Even not allocated? */
+ return 0;
+- return remove_tree(info, dquot, &tmp, 0);
++ if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) {
++ quota_error(dquot->dq_sb, "Quota tree depth too big!");
++ return -EIO;
++ }
++ return remove_tree(info, dquot, blks, 0);
+ }
+ EXPORT_SYMBOL(qtree_delete_dquot);
+
+@@ -613,18 +658,20 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
+
+ /* Find entry for given id in the tree */
+ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+- struct dquot *dquot, uint blk, int depth)
++ struct dquot *dquot, uint *blks, int depth)
+ {
+ char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
+ loff_t ret = 0;
+ __le32 *ref = (__le32 *)buf;
++ uint blk;
++ int i;
+
+ if (!buf)
+ return -ENOMEM;
+- ret = read_blk(info, blk, buf);
++ ret = read_blk(info, blks[depth], buf);
+ if (ret < 0) {
+ quota_error(dquot->dq_sb, "Can't read quota tree block %u",
+- blk);
++ blks[depth]);
+ goto out_buf;
+ }
+ ret = 0;
+@@ -636,8 +683,19 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+ if (ret)
+ goto out_buf;
+
++ /* Check for cycles in the tree */
++ for (i = 0; i <= depth; i++)
++ if (blk == blks[i]) {
++ quota_error(dquot->dq_sb,
++ "Cycle in quota tree detected: block %u index %u",
++ blks[depth],
++ get_index(info, dquot->dq_id, depth));
++ ret = -EIO;
++ goto out_buf;
++ }
++ blks[depth + 1] = blk;
+ if (depth < info->dqi_qtree_depth - 1)
+- ret = find_tree_dqentry(info, dquot, blk, depth+1);
++ ret = find_tree_dqentry(info, dquot, blks, depth + 1);
+ else
+ ret = find_block_dqentry(info, dquot, blk);
+ out_buf:
+@@ -649,7 +707,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+ static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot)
+ {
+- return find_tree_dqentry(info, dquot, QT_TREEOFF, 0);
++ uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF };
++
++ if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) {
++ quota_error(dquot->dq_sb, "Quota tree depth too big!");
++ return -EIO;
++ }
++ return find_tree_dqentry(info, dquot, blks, 0);
+ }
+
+ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
+index ae99e7b88205b2..7978ab671e0c6a 100644
+--- a/fs/quota/quota_v2.c
++++ b/fs/quota/quota_v2.c
+@@ -166,14 +166,17 @@ static int v2_read_file_info(struct super_block *sb, int type)
+ i_size_read(sb_dqopt(sb)->files[type]));
+ goto out_free;
+ }
+- if (qinfo->dqi_free_blk >= qinfo->dqi_blocks) {
+- quota_error(sb, "Free block number too big (%u >= %u).",
+- qinfo->dqi_free_blk, qinfo->dqi_blocks);
++ if (qinfo->dqi_free_blk && (qinfo->dqi_free_blk <= QT_TREEOFF ||
++ qinfo->dqi_free_blk >= qinfo->dqi_blocks)) {
++ quota_error(sb, "Free block number %u out of range (%u, %u).",
++ qinfo->dqi_free_blk, QT_TREEOFF, qinfo->dqi_blocks);
+ goto out_free;
+ }
+- if (qinfo->dqi_free_entry >= qinfo->dqi_blocks) {
+- quota_error(sb, "Block with free entry too big (%u >= %u).",
+- qinfo->dqi_free_entry, qinfo->dqi_blocks);
++ if (qinfo->dqi_free_entry && (qinfo->dqi_free_entry <= QT_TREEOFF ||
++ qinfo->dqi_free_entry >= qinfo->dqi_blocks)) {
++ quota_error(sb, "Block with free entry %u out of range (%u, %u).",
++ qinfo->dqi_free_entry, QT_TREEOFF,
++ qinfo->dqi_blocks);
+ goto out_free;
+ }
+ ret = 0;
+diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
+index 3676e02a0232a4..4ab8cab6ea6147 100644
+--- a/fs/reiserfs/stree.c
++++ b/fs/reiserfs/stree.c
+@@ -1407,7 +1407,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
+ INITIALIZE_PATH(path);
+ int item_len = 0;
+ int tb_init = 0;
+- struct cpu_key cpu_key;
++ struct cpu_key cpu_key = {};
+ int retval;
+ int quota_cut_bytes = 0;
+
+diff --git a/fs/romfs/super.c b/fs/romfs/super.c
+index 5c35f6c760377e..b1bdfbc211c3c0 100644
+--- a/fs/romfs/super.c
++++ b/fs/romfs/super.c
+@@ -593,7 +593,7 @@ static void romfs_kill_sb(struct super_block *sb)
+ #ifdef CONFIG_ROMFS_ON_BLOCK
+ if (sb->s_bdev) {
+ sync_blockdev(sb->s_bdev);
+- blkdev_put(sb->s_bdev, sb);
++ bdev_release(sb->s_bdev_handle);
+ }
+ #endif
+ }
+diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
+index 581ce951933901..2dc730800f448d 100644
+--- a/fs/squashfs/block.c
++++ b/fs/squashfs/block.c
+@@ -321,7 +321,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
+ TRACE("Block @ 0x%llx, %scompressed size %d\n", index - 2,
+ compressed ? "" : "un", length);
+ }
+- if (length < 0 || length > output->length ||
++ if (length <= 0 || length > output->length ||
+ (index + length) > msblk->bytes_used) {
+ res = -EIO;
+ goto out;
+diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
+index 8ba8c4c5077078..e8df6430444b01 100644
+--- a/fs/squashfs/file.c
++++ b/fs/squashfs/file.c
+@@ -544,7 +544,8 @@ static void squashfs_readahead(struct readahead_control *ractl)
+ struct squashfs_page_actor *actor;
+ unsigned int nr_pages = 0;
+ struct page **pages;
+- int i, file_end = i_size_read(inode) >> msblk->block_log;
++ int i;
++ loff_t file_end = i_size_read(inode) >> msblk->block_log;
+ unsigned int max_pages = 1UL << shift;
+
+ readahead_expand(ractl, start, (len | mask) + 1);
+diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
+index f1ccad519e28cc..763a3f7a75f6dd 100644
+--- a/fs/squashfs/file_direct.c
++++ b/fs/squashfs/file_direct.c
+@@ -26,10 +26,10 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
+ struct inode *inode = target_page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+
+- int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
++ loff_t file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+ int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
+- int start_index = target_page->index & ~mask;
+- int end_index = start_index | mask;
++ loff_t start_index = target_page->index & ~mask;
++ loff_t end_index = start_index | mask;
+ int i, n, pages, bytes, res = -ENOMEM;
+ struct page **page;
+ struct squashfs_page_actor *actor;
+diff --git a/fs/super.c b/fs/super.c
+index 576abb1ff0403d..b142e71eb8dfdd 100644
+--- a/fs/super.c
++++ b/fs/super.c
+@@ -1490,14 +1490,16 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
+ struct fs_context *fc)
+ {
+ blk_mode_t mode = sb_open_mode(sb_flags);
++ struct bdev_handle *bdev_handle;
+ struct block_device *bdev;
+
+- bdev = blkdev_get_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
+- if (IS_ERR(bdev)) {
++ bdev_handle = bdev_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
++ if (IS_ERR(bdev_handle)) {
+ if (fc)
+ errorf(fc, "%s: Can't open blockdev", fc->source);
+- return PTR_ERR(bdev);
++ return PTR_ERR(bdev_handle);
+ }
++ bdev = bdev_handle->bdev;
+
+ /*
+ * This really should be in blkdev_get_by_dev, but right now can't due
+@@ -1505,7 +1507,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
+ * writable from userspace even for a read-only block device.
+ */
+ if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
+- blkdev_put(bdev, sb);
++ bdev_release(bdev_handle);
+ return -EACCES;
+ }
+
+@@ -1521,10 +1523,11 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ if (fc)
+ warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
+- blkdev_put(bdev, sb);
++ bdev_release(bdev_handle);
+ return -EBUSY;
+ }
+ spin_lock(&sb_lock);
++ sb->s_bdev_handle = bdev_handle;
+ sb->s_bdev = bdev;
+ sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
+ if (bdev_stable_writes(bdev))
+@@ -1657,7 +1660,7 @@ void kill_block_super(struct super_block *sb)
+ generic_shutdown_super(sb);
+ if (bdev) {
+ sync_blockdev(bdev);
+- blkdev_put(bdev, sb);
++ bdev_release(sb->s_bdev_handle);
+ }
+ }
+
+diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
+index 265da00a1a8b1b..6eefe5153a6ff7 100644
+--- a/include/linux/cgroup-defs.h
++++ b/include/linux/cgroup-defs.h
+@@ -543,6 +543,10 @@ struct cgroup_root {
+ /* Unique id for this hierarchy. */
+ int hierarchy_id;
+
++ /* A list running through the active hierarchies */
++ struct list_head root_list;
++ struct rcu_head rcu; /* Must be near the top */
++
+ /*
+ * The root cgroup. The containing cgroup_root will be destroyed on its
+ * release. cgrp->ancestors[0] will be used overflowing into the
+@@ -556,9 +560,6 @@ struct cgroup_root {
+ /* Number of cgroups in the hierarchy, used only for /proc/cgroups */
+ atomic_t nr_cgrps;
+
+- /* A list running through the active hierarchies */
+- struct list_head root_list;
+-
+ /* Hierarchy-specific flags */
+ unsigned int flags;
+
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 56dce38c478627..43e640fb4a7f77 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1036,7 +1036,7 @@ struct file_handle {
+ __u32 handle_bytes;
+ int handle_type;
+ /* file identifier */
+- unsigned char f_handle[];
++ unsigned char f_handle[] __counted_by(handle_bytes);
+ };
+
+ static inline struct file *get_file(struct file *f)
+@@ -1223,6 +1223,7 @@ struct super_block {
+ struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
+ struct list_head s_mounts; /* list of mounts; _not_ for fs use */
+ struct block_device *s_bdev;
++ struct bdev_handle *s_bdev_handle;
+ struct backing_dev_info *s_bdi;
+ struct mtd_info *s_mtd;
+ struct hlist_node s_instances;
+diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
+index bae5e2369b4f7a..1c1a5d926b1713 100644
+--- a/include/linux/sockptr.h
++++ b/include/linux/sockptr.h
+@@ -50,11 +50,36 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src,
+ return 0;
+ }
+
++/* Deprecated.
++ * This is unsafe, unless caller checked user provided optlen.
++ * Prefer copy_safe_from_sockptr() instead.
++ */
+ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
+ {
+ return copy_from_sockptr_offset(dst, src, 0, size);
+ }
+
++/**
++ * copy_safe_from_sockptr: copy a struct from sockptr
++ * @dst: Destination address, in kernel space. This buffer must be @ksize
++ * bytes long.
++ * @ksize: Size of @dst struct.
++ * @optval: Source address. (in user or kernel space)
++ * @optlen: Size of @optval data.
++ *
++ * Returns:
++ * * -EINVAL: @optlen < @ksize
++ * * -EFAULT: access to userspace failed.
++ * * 0 : @ksize bytes were copied
++ */
++static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
++ sockptr_t optval, unsigned int optlen)
++{
++ if (optlen < ksize)
++ return -EINVAL;
++ return copy_from_sockptr(dst, optval, ksize);
++}
++
+ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset,
+ const void *src, size_t size)
+ {
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index dbf5b21feafe48..3d8b215f32d5b0 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -336,7 +336,6 @@ struct svc_program {
+ const struct svc_version **pg_vers; /* version array */
+ char * pg_name; /* service name */
+ char * pg_class; /* class name: services sharing authentication */
+- struct svc_stat * pg_stats; /* rpc statistics */
+ enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp);
+ __be32 (*pg_init_request)(struct svc_rqst *,
+ const struct svc_program *,
+@@ -408,7 +407,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp,
+ void svc_rqst_release_pages(struct svc_rqst *rqstp);
+ void svc_rqst_free(struct svc_rqst *);
+ void svc_exit_thread(struct svc_rqst *);
+-struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
++struct svc_serv * svc_create_pooled(struct svc_program *prog,
++ struct svc_stat *stats,
++ unsigned int bufsize,
+ int (*threadfn)(void *data));
+ int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+ int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
+diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
+index fb09fd1767f289..ba6e346c8d669a 100644
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -77,12 +77,29 @@ struct bpf_insn {
+ __s32 imm; /* signed immediate constant */
+ };
+
+-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
++/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
++ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
++ * the trailing flexible array member) instead.
++ */
+ struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+ };
+
++/* Header for bpf_lpm_trie_key structs */
++struct bpf_lpm_trie_key_hdr {
++ __u32 prefixlen;
++};
++
++/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
++struct bpf_lpm_trie_key_u8 {
++ union {
++ struct bpf_lpm_trie_key_hdr hdr;
++ __u32 prefixlen;
++ };
++ __u8 data[]; /* Arbitrary size */
++};
++
+ struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type (enum bpf_attach_type) */
+diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
+index b32be680da6cdc..d0febf07051edf 100644
+--- a/kernel/bpf/lpm_trie.c
++++ b/kernel/bpf/lpm_trie.c
+@@ -164,13 +164,13 @@ static inline int extract_bit(const u8 *data, size_t index)
+ */
+ static size_t longest_prefix_match(const struct lpm_trie *trie,
+ const struct lpm_trie_node *node,
+- const struct bpf_lpm_trie_key *key)
++ const struct bpf_lpm_trie_key_u8 *key)
+ {
+ u32 limit = min(node->prefixlen, key->prefixlen);
+ u32 prefixlen = 0, i = 0;
+
+ BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32));
+- BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32));
++ BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key_u8, data) % sizeof(u32));
+
+ #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT)
+
+@@ -229,7 +229,7 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
+ {
+ struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+ struct lpm_trie_node *node, *found = NULL;
+- struct bpf_lpm_trie_key *key = _key;
++ struct bpf_lpm_trie_key_u8 *key = _key;
+
+ if (key->prefixlen > trie->max_prefixlen)
+ return NULL;
+@@ -308,8 +308,9 @@ static long trie_update_elem(struct bpf_map *map,
+ {
+ struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+ struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
++ struct lpm_trie_node *free_node = NULL;
+ struct lpm_trie_node __rcu **slot;
+- struct bpf_lpm_trie_key *key = _key;
++ struct bpf_lpm_trie_key_u8 *key = _key;
+ unsigned long irq_flags;
+ unsigned int next_bit;
+ size_t matchlen = 0;
+@@ -382,7 +383,7 @@ static long trie_update_elem(struct bpf_map *map,
+ trie->n_entries--;
+
+ rcu_assign_pointer(*slot, new_node);
+- kfree_rcu(node, rcu);
++ free_node = node;
+
+ goto out;
+ }
+@@ -429,6 +430,7 @@ static long trie_update_elem(struct bpf_map *map,
+ }
+
+ spin_unlock_irqrestore(&trie->lock, irq_flags);
++ kfree_rcu(free_node, rcu);
+
+ return ret;
+ }
+@@ -437,7 +439,8 @@ static long trie_update_elem(struct bpf_map *map,
+ static long trie_delete_elem(struct bpf_map *map, void *_key)
+ {
+ struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+- struct bpf_lpm_trie_key *key = _key;
++ struct lpm_trie_node *free_node = NULL, *free_parent = NULL;
++ struct bpf_lpm_trie_key_u8 *key = _key;
+ struct lpm_trie_node __rcu **trim, **trim2;
+ struct lpm_trie_node *node, *parent;
+ unsigned long irq_flags;
+@@ -506,8 +509,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
+ else
+ rcu_assign_pointer(
+ *trim2, rcu_access_pointer(parent->child[0]));
+- kfree_rcu(parent, rcu);
+- kfree_rcu(node, rcu);
++ free_parent = parent;
++ free_node = node;
+ goto out;
+ }
+
+@@ -521,10 +524,12 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
+ rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1]));
+ else
+ RCU_INIT_POINTER(*trim, NULL);
+- kfree_rcu(node, rcu);
++ free_node = node;
+
+ out:
+ spin_unlock_irqrestore(&trie->lock, irq_flags);
++ kfree_rcu(free_parent, rcu);
++ kfree_rcu(free_node, rcu);
+
+ return ret;
+ }
+@@ -536,7 +541,7 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
+ sizeof(struct lpm_trie_node))
+ #define LPM_VAL_SIZE_MIN 1
+
+-#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key) + (X))
++#define LPM_KEY_SIZE(X) (sizeof(struct bpf_lpm_trie_key_u8) + (X))
+ #define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX)
+ #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
+
+@@ -565,7 +570,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
+ /* copy mandatory map attributes */
+ bpf_map_init_from_attr(&trie->map, attr);
+ trie->data_size = attr->key_size -
+- offsetof(struct bpf_lpm_trie_key, data);
++ offsetof(struct bpf_lpm_trie_key_u8, data);
+ trie->max_prefixlen = trie->data_size * 8;
+
+ spin_lock_init(&trie->lock);
+@@ -616,7 +621,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
+ {
+ struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root;
+ struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+- struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
++ struct bpf_lpm_trie_key_u8 *key = _key, *next_key = _next_key;
+ struct lpm_trie_node **node_stack = NULL;
+ int err = 0, stack_ptr = -1;
+ unsigned int next_bit;
+@@ -703,7 +708,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
+ }
+ do_copy:
+ next_key->prefixlen = next_node->prefixlen;
+- memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data),
++ memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key_u8, data),
+ next_node->data, trie->data_size);
+ free_stack:
+ kfree(node_stack);
+@@ -715,7 +720,7 @@ static int trie_check_btf(const struct bpf_map *map,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type)
+ {
+- /* Keys must have struct bpf_lpm_trie_key embedded. */
++ /* Keys must have struct bpf_lpm_trie_key_u8 embedded. */
+ return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ?
+ -EINVAL : 0;
+ }
+diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
+index c56071f150f2ae..5e17f01ced9fd2 100644
+--- a/kernel/cgroup/cgroup-internal.h
++++ b/kernel/cgroup/cgroup-internal.h
+@@ -170,7 +170,8 @@ extern struct list_head cgroup_roots;
+
+ /* iterate across the hierarchies */
+ #define for_each_root(root) \
+- list_for_each_entry((root), &cgroup_roots, root_list)
++ list_for_each_entry_rcu((root), &cgroup_roots, root_list, \
++ lockdep_is_held(&cgroup_mutex))
+
+ /**
+ * for_each_subsys - iterate all enabled cgroup subsystems
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index 094f513319259d..d872fff901073f 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -1313,7 +1313,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root)
+
+ void cgroup_free_root(struct cgroup_root *root)
+ {
+- kfree(root);
++ kfree_rcu(root, rcu);
+ }
+
+ static void cgroup_destroy_root(struct cgroup_root *root)
+@@ -1346,7 +1346,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
+ spin_unlock_irq(&css_set_lock);
+
+ if (!list_empty(&root->root_list)) {
+- list_del(&root->root_list);
++ list_del_rcu(&root->root_list);
+ cgroup_root_count--;
+ }
+
+@@ -1386,7 +1386,15 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
+ }
+ }
+
+- BUG_ON(!res_cgroup);
++ /*
++ * If cgroup_mutex is not held, the cgrp_cset_link will be freed
++ * before we remove the cgroup root from the root_list. Consequently,
++ * when accessing a cgroup root, the cset_link may have already been
++ * freed, resulting in a NULL res_cgroup. However, by holding the
++ * cgroup_mutex, we ensure that res_cgroup can't be NULL.
++ * If we don't hold cgroup_mutex in the caller, we must do the NULL
++ * check.
++ */
+ return res_cgroup;
+ }
+
+@@ -1445,7 +1453,6 @@ static struct cgroup *current_cgns_cgroup_dfl(void)
+ static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
+ struct cgroup_root *root)
+ {
+- lockdep_assert_held(&cgroup_mutex);
+ lockdep_assert_held(&css_set_lock);
+
+ return __cset_cgroup_from_root(cset, root);
+@@ -1453,7 +1460,9 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
+
+ /*
+ * Return the cgroup for "task" from the given hierarchy. Must be
+- * called with cgroup_mutex and css_set_lock held.
++ * called with css_set_lock held to prevent task's groups from being modified.
++ * Must be called with either cgroup_mutex or rcu read lock to prevent the
++ * cgroup root from being destroyed.
+ */
+ struct cgroup *task_cgroup_from_root(struct task_struct *task,
+ struct cgroup_root *root)
+@@ -2014,7 +2023,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
+ struct cgroup_root *root = ctx->root;
+ struct cgroup *cgrp = &root->cgrp;
+
+- INIT_LIST_HEAD(&root->root_list);
++ INIT_LIST_HEAD_RCU(&root->root_list);
+ atomic_set(&root->nr_cgrps, 1);
+ cgrp->root = root;
+ init_cgroup_housekeeping(cgrp);
+@@ -2097,7 +2106,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
+ * care of subsystems' refcounts, which are explicitly dropped in
+ * the failure exit path.
+ */
+- list_add(&root->root_list, &cgroup_roots);
++ list_add_rcu(&root->root_list, &cgroup_roots);
+ cgroup_root_count++;
+
+ /*
+diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
+index 5ecd072a34fe72..eb86283901565b 100644
+--- a/kernel/irq/cpuhotplug.c
++++ b/kernel/irq/cpuhotplug.c
+@@ -130,6 +130,22 @@ static bool migrate_one_irq(struct irq_desc *desc)
+ * CPU.
+ */
+ err = irq_do_set_affinity(d, affinity, false);
++
++ /*
++ * If there are online CPUs in the affinity mask, but they have no
++ * vectors left to make the migration work, try to break the
++ * affinity by migrating to any online CPU.
++ */
++ if (err == -ENOSPC && !irqd_affinity_is_managed(d) && affinity != cpu_online_mask) {
++ pr_debug("IRQ%u: set affinity failed for %*pbl, re-try with online CPUs\n",
++ d->irq, cpumask_pr_args(affinity));
++
++ affinity = cpu_online_mask;
++ brokeaff = true;
++
++ err = irq_do_set_affinity(d, affinity, false);
++ }
++
+ if (err) {
+ pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
+ d->irq, err);
+@@ -195,10 +211,15 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
+ !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
+ return;
+
+- if (irqd_is_managed_and_shutdown(data)) {
+- irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
++ /*
++ * Don't restore suspended interrupts here when a system comes back
++ * from S3. They are reenabled via resume_device_irqs().
++ */
++ if (desc->istate & IRQS_SUSPENDED)
+ return;
+- }
++
++ if (irqd_is_managed_and_shutdown(data))
++ irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+
+ /*
+ * If the interrupt can only be directed to a single target
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index a054cd5ec08bce..8a936c1ffad390 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -796,10 +796,14 @@ void __enable_irq(struct irq_desc *desc)
+ irq_settings_set_noprobe(desc);
+ /*
+ * Call irq_startup() not irq_enable() here because the
+- * interrupt might be marked NOAUTOEN. So irq_startup()
+- * needs to be invoked when it gets enabled the first
+- * time. If it was already started up, then irq_startup()
+- * will invoke irq_enable() under the hood.
++ * interrupt might be marked NOAUTOEN so irq_startup()
++ * needs to be invoked when it gets enabled the first time.
++ * This is also required when __enable_irq() is invoked for
++ * a managed and shutdown interrupt from the S3 resume
++ * path.
++ *
++ * If it was already started up, then irq_startup() will
++ * invoke irq_enable() under the hood.
+ */
+ irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE);
+ break;
+diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
+index 13f0d11927074a..68af76ca8bc992 100644
+--- a/mm/debug_vm_pgtable.c
++++ b/mm/debug_vm_pgtable.c
+@@ -39,22 +39,7 @@
+ * Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics
+ * expectations that are being validated here. All future changes in here
+ * or the documentation need to be in sync.
+- *
+- * On s390 platform, the lower 4 bits are used to identify given page table
+- * entry type. But these bits might affect the ability to clear entries with
+- * pxx_clear() because of how dynamic page table folding works on s390. So
+- * while loading up the entries do not change the lower 4 bits. It does not
+- * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
+- * used to mark a pte entry.
+ */
+-#define S390_SKIP_MASK GENMASK(3, 0)
+-#if __BITS_PER_LONG == 64
+-#define PPC64_SKIP_MASK GENMASK(62, 62)
+-#else
+-#define PPC64_SKIP_MASK 0x0
+-#endif
+-#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
+-#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
+ #define RANDOM_NZVALUE GENMASK(7, 0)
+
+ struct pgtable_debug_args {
+@@ -510,8 +495,7 @@ static void __init pud_clear_tests(struct pgtable_debug_args *args)
+ return;
+
+ pr_debug("Validating PUD clear\n");
+- pud = __pud(pud_val(pud) | RANDOM_ORVALUE);
+- WRITE_ONCE(*args->pudp, pud);
++ WARN_ON(pud_none(pud));
+ pud_clear(args->pudp);
+ pud = READ_ONCE(*args->pudp);
+ WARN_ON(!pud_none(pud));
+@@ -547,8 +531,7 @@ static void __init p4d_clear_tests(struct pgtable_debug_args *args)
+ return;
+
+ pr_debug("Validating P4D clear\n");
+- p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE);
+- WRITE_ONCE(*args->p4dp, p4d);
++ WARN_ON(p4d_none(p4d));
+ p4d_clear(args->p4dp);
+ p4d = READ_ONCE(*args->p4dp);
+ WARN_ON(!p4d_none(p4d));
+@@ -581,8 +564,7 @@ static void __init pgd_clear_tests(struct pgtable_debug_args *args)
+ return;
+
+ pr_debug("Validating PGD clear\n");
+- pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE);
+- WRITE_ONCE(*args->pgdp, pgd);
++ WARN_ON(pgd_none(pgd));
+ pgd_clear(args->pgdp);
+ pgd = READ_ONCE(*args->pgdp);
+ WARN_ON(!pgd_none(pgd));
+@@ -633,10 +615,8 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args)
+ if (WARN_ON(!args->ptep))
+ return;
+
+-#ifndef CONFIG_RISCV
+- pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
+-#endif
+ set_pte_at(args->mm, args->vaddr, args->ptep, pte);
++ WARN_ON(pte_none(pte));
+ flush_dcache_page(page);
+ barrier();
+ ptep_clear(args->mm, args->vaddr, args->ptep);
+@@ -649,8 +629,7 @@ static void __init pmd_clear_tests(struct pgtable_debug_args *args)
+ pmd_t pmd = READ_ONCE(*args->pmdp);
+
+ pr_debug("Validating PMD clear\n");
+- pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE);
+- WRITE_ONCE(*args->pmdp, pmd);
++ WARN_ON(pmd_none(pmd));
+ pmd_clear(args->pmdp);
+ pmd = READ_ONCE(*args->pmdp);
+ WARN_ON(!pmd_none(pmd));
+diff --git a/mm/gup.c b/mm/gup.c
+index f50fe2219a13b6..fdd75384160d8d 100644
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -97,95 +97,6 @@ static inline struct folio *try_get_folio(struct page *page, int refs)
+ return folio;
+ }
+
+-/**
+- * try_grab_folio() - Attempt to get or pin a folio.
+- * @page: pointer to page to be grabbed
+- * @refs: the value to (effectively) add to the folio's refcount
+- * @flags: gup flags: these are the FOLL_* flag values.
+- *
+- * "grab" names in this file mean, "look at flags to decide whether to use
+- * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
+- *
+- * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
+- * same time. (That's true throughout the get_user_pages*() and
+- * pin_user_pages*() APIs.) Cases:
+- *
+- * FOLL_GET: folio's refcount will be incremented by @refs.
+- *
+- * FOLL_PIN on large folios: folio's refcount will be incremented by
+- * @refs, and its pincount will be incremented by @refs.
+- *
+- * FOLL_PIN on single-page folios: folio's refcount will be incremented by
+- * @refs * GUP_PIN_COUNTING_BIAS.
+- *
+- * Return: The folio containing @page (with refcount appropriately
+- * incremented) for success, or NULL upon failure. If neither FOLL_GET
+- * nor FOLL_PIN was set, that's considered failure, and furthermore,
+- * a likely bug in the caller, so a warning is also emitted.
+- */
+-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
+-{
+- struct folio *folio;
+-
+- if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
+- return NULL;
+-
+- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
+- return NULL;
+-
+- if (flags & FOLL_GET)
+- return try_get_folio(page, refs);
+-
+- /* FOLL_PIN is set */
+-
+- /*
+- * Don't take a pin on the zero page - it's not going anywhere
+- * and it is used in a *lot* of places.
+- */
+- if (is_zero_page(page))
+- return page_folio(page);
+-
+- folio = try_get_folio(page, refs);
+- if (!folio)
+- return NULL;
+-
+- /*
+- * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
+- * right zone, so fail and let the caller fall back to the slow
+- * path.
+- */
+- if (unlikely((flags & FOLL_LONGTERM) &&
+- !folio_is_longterm_pinnable(folio))) {
+- if (!put_devmap_managed_page_refs(&folio->page, refs))
+- folio_put_refs(folio, refs);
+- return NULL;
+- }
+-
+- /*
+- * When pinning a large folio, use an exact count to track it.
+- *
+- * However, be sure to *also* increment the normal folio
+- * refcount field at least once, so that the folio really
+- * is pinned. That's why the refcount from the earlier
+- * try_get_folio() is left intact.
+- */
+- if (folio_test_large(folio))
+- atomic_add(refs, &folio->_pincount);
+- else
+- folio_ref_add(folio,
+- refs * (GUP_PIN_COUNTING_BIAS - 1));
+- /*
+- * Adjust the pincount before re-checking the PTE for changes.
+- * This is essentially a smp_mb() and is paired with a memory
+- * barrier in page_try_share_anon_rmap().
+- */
+- smp_mb__after_atomic();
+-
+- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
+-
+- return folio;
+-}
+-
+ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
+ {
+ if (flags & FOLL_PIN) {
+@@ -203,58 +114,59 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
+ }
+
+ /**
+- * try_grab_page() - elevate a page's refcount by a flag-dependent amount
+- * @page: pointer to page to be grabbed
+- * @flags: gup flags: these are the FOLL_* flag values.
++ * try_grab_folio() - add a folio's refcount by a flag-dependent amount
++ * @folio: pointer to folio to be grabbed
++ * @refs: the value to (effectively) add to the folio's refcount
++ * @flags: gup flags: these are the FOLL_* flag values
+ *
+ * This might not do anything at all, depending on the flags argument.
+ *
+ * "grab" names in this file mean, "look at flags to decide whether to use
+- * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
++ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
+ *
+ * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
+- * time. Cases: please see the try_grab_folio() documentation, with
+- * "refs=1".
++ * time.
+ *
+ * Return: 0 for success, or if no action was required (if neither FOLL_PIN
+ * nor FOLL_GET was set, nothing is done). A negative error code for failure:
+ *
+- * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not
++ * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not
+ * be grabbed.
++ *
++ * It is called when we have a stable reference for the folio, typically in
++ * GUP slow path.
+ */
+-int __must_check try_grab_page(struct page *page, unsigned int flags)
++int __must_check try_grab_folio(struct folio *folio, int refs,
++ unsigned int flags)
+ {
+- struct folio *folio = page_folio(page);
+-
+ if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
+ return -ENOMEM;
+
+- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
++ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page)))
+ return -EREMOTEIO;
+
+ if (flags & FOLL_GET)
+- folio_ref_inc(folio);
++ folio_ref_add(folio, refs);
+ else if (flags & FOLL_PIN) {
+ /*
+ * Don't take a pin on the zero page - it's not going anywhere
+ * and it is used in a *lot* of places.
+ */
+- if (is_zero_page(page))
++ if (is_zero_folio(folio))
+ return 0;
+
+ /*
+- * Similar to try_grab_folio(): be sure to *also*
+- * increment the normal page refcount field at least once,
++ * Increment the normal page refcount field at least once,
+ * so that the page really is pinned.
+ */
+ if (folio_test_large(folio)) {
+- folio_ref_add(folio, 1);
+- atomic_add(1, &folio->_pincount);
++ folio_ref_add(folio, refs);
++ atomic_add(refs, &folio->_pincount);
+ } else {
+- folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
++ folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS);
+ }
+
+- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
++ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
+ }
+
+ return 0;
+@@ -647,8 +559,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
+ VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
+ !PageAnonExclusive(page), page);
+
+- /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
+- ret = try_grab_page(page, flags);
++ /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
++ ret = try_grab_folio(page_folio(page), 1, flags);
+ if (unlikely(ret)) {
+ page = ERR_PTR(ret);
+ goto out;
+@@ -899,7 +811,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
+ goto unmap;
+ *page = pte_page(entry);
+ }
+- ret = try_grab_page(*page, gup_flags);
++ ret = try_grab_folio(page_folio(*page), 1, gup_flags);
+ if (unlikely(ret))
+ goto unmap;
+ out:
+@@ -1302,20 +1214,19 @@ static long __get_user_pages(struct mm_struct *mm,
+ * pages.
+ */
+ if (page_increm > 1) {
+- struct folio *folio;
++ struct folio *folio = page_folio(page);
+
+ /*
+ * Since we already hold refcount on the
+ * large folio, this should never fail.
+ */
+- folio = try_grab_folio(page, page_increm - 1,
+- foll_flags);
+- if (WARN_ON_ONCE(!folio)) {
++ if (try_grab_folio(folio, page_increm - 1,
++ foll_flags)) {
+ /*
+ * Release the 1st page ref if the
+ * folio is problematic, fail hard.
+ */
+- gup_put_folio(page_folio(page), 1,
++ gup_put_folio(folio, 1,
+ foll_flags);
+ ret = -EFAULT;
+ goto out;
+@@ -2541,6 +2452,102 @@ static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
+ }
+ }
+
++/**
++ * try_grab_folio_fast() - Attempt to get or pin a folio in fast path.
++ * @page: pointer to page to be grabbed
++ * @refs: the value to (effectively) add to the folio's refcount
++ * @flags: gup flags: these are the FOLL_* flag values.
++ *
++ * "grab" names in this file mean, "look at flags to decide whether to use
++ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
++ *
++ * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
++ * same time. (That's true throughout the get_user_pages*() and
++ * pin_user_pages*() APIs.) Cases:
++ *
++ * FOLL_GET: folio's refcount will be incremented by @refs.
++ *
++ * FOLL_PIN on large folios: folio's refcount will be incremented by
++ * @refs, and its pincount will be incremented by @refs.
++ *
++ * FOLL_PIN on single-page folios: folio's refcount will be incremented by
++ * @refs * GUP_PIN_COUNTING_BIAS.
++ *
++ * Return: The folio containing @page (with refcount appropriately
++ * incremented) for success, or NULL upon failure. If neither FOLL_GET
++ * nor FOLL_PIN was set, that's considered failure, and furthermore,
++ * a likely bug in the caller, so a warning is also emitted.
++ *
++ * It uses add ref unless zero to elevate the folio refcount and must be called
++ * in fast path only.
++ */
++static struct folio *try_grab_folio_fast(struct page *page, int refs,
++ unsigned int flags)
++{
++ struct folio *folio;
++
++ /* Raise warn if it is not called in fast GUP */
++ VM_WARN_ON_ONCE(!irqs_disabled());
++
++ if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))
++ return NULL;
++
++ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
++ return NULL;
++
++ if (flags & FOLL_GET)
++ return try_get_folio(page, refs);
++
++ /* FOLL_PIN is set */
++
++ /*
++ * Don't take a pin on the zero page - it's not going anywhere
++ * and it is used in a *lot* of places.
++ */
++ if (is_zero_page(page))
++ return page_folio(page);
++
++ folio = try_get_folio(page, refs);
++ if (!folio)
++ return NULL;
++
++ /*
++ * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
++ * right zone, so fail and let the caller fall back to the slow
++ * path.
++ */
++ if (unlikely((flags & FOLL_LONGTERM) &&
++ !folio_is_longterm_pinnable(folio))) {
++ if (!put_devmap_managed_page_refs(&folio->page, refs))
++ folio_put_refs(folio, refs);
++ return NULL;
++ }
++
++ /*
++ * When pinning a large folio, use an exact count to track it.
++ *
++ * However, be sure to *also* increment the normal folio
++ * refcount field at least once, so that the folio really
++ * is pinned. That's why the refcount from the earlier
++ * try_get_folio() is left intact.
++ */
++ if (folio_test_large(folio))
++ atomic_add(refs, &folio->_pincount);
++ else
++ folio_ref_add(folio,
++ refs * (GUP_PIN_COUNTING_BIAS - 1));
++ /*
++ * Adjust the pincount before re-checking the PTE for changes.
++ * This is essentially a smp_mb() and is paired with a memory
++ * barrier in folio_try_share_anon_rmap_*().
++ */
++ smp_mb__after_atomic();
++
++ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
++
++ return folio;
++}
++
+ #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
+ /*
+ * Fast-gup relies on pte change detection to avoid concurrent pgtable
+@@ -2605,7 +2612,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
+
+- folio = try_grab_folio(page, 1, flags);
++ folio = try_grab_folio_fast(page, 1, flags);
+ if (!folio)
+ goto pte_unmap;
+
+@@ -2699,7 +2706,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
+
+ SetPageReferenced(page);
+ pages[*nr] = page;
+- if (unlikely(try_grab_page(page, flags))) {
++ if (unlikely(try_grab_folio(page_folio(page), 1, flags))) {
+ undo_dev_pagemap(nr, nr_start, flags, pages);
+ break;
+ }
+@@ -2808,7 +2815,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+ page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT);
+ refs = record_subpages(page, addr, end, pages + *nr);
+
+- folio = try_grab_folio(page, refs, flags);
++ folio = try_grab_folio_fast(page, refs, flags);
+ if (!folio)
+ return 0;
+
+@@ -2879,7 +2886,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
+ page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT);
+ refs = record_subpages(page, addr, end, pages + *nr);
+
+- folio = try_grab_folio(page, refs, flags);
++ folio = try_grab_folio_fast(page, refs, flags);
+ if (!folio)
+ return 0;
+
+@@ -2923,7 +2930,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
+ page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT);
+ refs = record_subpages(page, addr, end, pages + *nr);
+
+- folio = try_grab_folio(page, refs, flags);
++ folio = try_grab_folio_fast(page, refs, flags);
+ if (!folio)
+ return 0;
+
+@@ -2963,7 +2970,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
+ page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT);
+ refs = record_subpages(page, addr, end, pages + *nr);
+
+- folio = try_grab_folio(page, refs, flags);
++ folio = try_grab_folio_fast(page, refs, flags);
+ if (!folio)
+ return 0;
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 7ac2877e76629b..f2816c9a1f3ec8 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1056,7 +1056,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
+ if (!*pgmap)
+ return ERR_PTR(-EFAULT);
+ page = pfn_to_page(pfn);
+- ret = try_grab_page(page, flags);
++ ret = try_grab_folio(page_folio(page), 1, flags);
+ if (ret)
+ page = ERR_PTR(ret);
+
+@@ -1214,7 +1214,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
+ return ERR_PTR(-EFAULT);
+ page = pfn_to_page(pfn);
+
+- ret = try_grab_page(page, flags);
++ ret = try_grab_folio(page_folio(page), 1, flags);
+ if (ret)
+ page = ERR_PTR(ret);
+
+@@ -1475,7 +1475,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
+ VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
+ !PageAnonExclusive(page), page);
+
+- ret = try_grab_page(page, flags);
++ ret = try_grab_folio(page_folio(page), 1, flags);
+ if (ret)
+ return ERR_PTR(ret);
+
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index fb7a531fce7174..0acb04c3e95291 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -6532,7 +6532,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+ * try_grab_page() should always be able to get the page here,
+ * because we hold the ptl lock and have verified pte_present().
+ */
+- ret = try_grab_page(page, flags);
++ ret = try_grab_folio(page_folio(page), 1, flags);
+
+ if (WARN_ON_ONCE(ret)) {
+ page = ERR_PTR(ret);
+diff --git a/mm/internal.h b/mm/internal.h
+index abed947f784b7b..ef8d787a510c5c 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -938,8 +938,8 @@ int migrate_device_coherent_page(struct page *page);
+ /*
+ * mm/gup.c
+ */
+-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
+-int __must_check try_grab_page(struct page *page, unsigned int flags);
++int __must_check try_grab_folio(struct folio *folio, int refs,
++ unsigned int flags);
+
+ /*
+ * mm/huge_memory.c
+diff --git a/mm/page_table_check.c b/mm/page_table_check.c
+index 6363f93a47c691..509c6ef8de400e 100644
+--- a/mm/page_table_check.c
++++ b/mm/page_table_check.c
+@@ -7,6 +7,8 @@
+ #include <linux/kstrtox.h>
+ #include <linux/mm.h>
+ #include <linux/page_table_check.h>
++#include <linux/swap.h>
++#include <linux/swapops.h>
+
+ #undef pr_fmt
+ #define pr_fmt(fmt) "page_table_check: " fmt
+@@ -191,6 +193,22 @@ void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud)
+ }
+ EXPORT_SYMBOL(__page_table_check_pud_clear);
+
++/* Whether the swap entry cached writable information */
++static inline bool swap_cached_writable(swp_entry_t entry)
++{
++ return is_writable_device_exclusive_entry(entry) ||
++ is_writable_device_private_entry(entry) ||
++ is_writable_migration_entry(entry);
++}
++
++static inline void page_table_check_pte_flags(pte_t pte)
++{
++ if (pte_present(pte) && pte_uffd_wp(pte))
++ WARN_ON_ONCE(pte_write(pte));
++ else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte))
++ WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte)));
++}
++
+ void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
+ unsigned int nr)
+ {
+@@ -199,6 +217,8 @@ void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
+ if (&init_mm == mm)
+ return;
+
++ page_table_check_pte_flags(pte);
++
+ for (i = 0; i < nr; i++)
+ __page_table_check_pte_clear(mm, ptep_get(ptep + i));
+ if (pte_user_accessible_page(pte))
+@@ -206,11 +226,21 @@ void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
+ }
+ EXPORT_SYMBOL(__page_table_check_ptes_set);
+
++static inline void page_table_check_pmd_flags(pmd_t pmd)
++{
++ if (pmd_present(pmd) && pmd_uffd_wp(pmd))
++ WARN_ON_ONCE(pmd_write(pmd));
++ else if (is_swap_pmd(pmd) && pmd_swp_uffd_wp(pmd))
++ WARN_ON_ONCE(swap_cached_writable(pmd_to_swp_entry(pmd)));
++}
++
+ void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd)
+ {
+ if (&init_mm == mm)
+ return;
+
++ page_table_check_pmd_flags(pmd);
++
+ __page_table_check_pmd_clear(mm, *pmdp);
+ if (pmd_user_accessible_page(pmd)) {
+ page_table_check_set(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT,
+diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
+index b54e8a530f55a1..29aa07e9db9d71 100644
+--- a/net/bluetooth/rfcomm/sock.c
++++ b/net/bluetooth/rfcomm/sock.c
+@@ -629,7 +629,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
+
+ switch (optname) {
+ case RFCOMM_LM:
+- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
++ if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) {
+ err = -EFAULT;
+ break;
+ }
+@@ -664,7 +664,6 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
+ struct sock *sk = sock->sk;
+ struct bt_security sec;
+ int err = 0;
+- size_t len;
+ u32 opt;
+
+ BT_DBG("sk %p", sk);
+@@ -686,11 +685,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
+
+ sec.level = BT_SECURITY_LOW;
+
+- len = min_t(unsigned int, sizeof(sec), optlen);
+- if (copy_from_sockptr(&sec, optval, len)) {
+- err = -EFAULT;
++ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
++ if (err)
+ break;
+- }
+
+ if (sec.level > BT_SECURITY_HIGH) {
+ err = -EINVAL;
+@@ -706,10 +703,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
+ break;
+ }
+
+- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+- err = -EFAULT;
++ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
++ if (err)
+ break;
+- }
+
+ if (opt)
+ set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 8cb44cd29967bb..be313928d272c6 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -2271,12 +2271,12 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+
+ err = bpf_out_neigh_v6(net, skb, dev, nh);
+ if (unlikely(net_xmit_eval(err)))
+- dev->stats.tx_errors++;
++ DEV_STATS_INC(dev, tx_errors);
+ else
+ ret = NET_XMIT_SUCCESS;
+ goto out_xmit;
+ out_drop:
+- dev->stats.tx_errors++;
++ DEV_STATS_INC(dev, tx_errors);
+ kfree_skb(skb);
+ out_xmit:
+ return ret;
+@@ -2378,12 +2378,12 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+
+ err = bpf_out_neigh_v4(net, skb, dev, nh);
+ if (unlikely(net_xmit_eval(err)))
+- dev->stats.tx_errors++;
++ DEV_STATS_INC(dev, tx_errors);
+ else
+ ret = NET_XMIT_SUCCESS;
+ goto out_xmit;
+ out_drop:
+- dev->stats.tx_errors++;
++ DEV_STATS_INC(dev, tx_errors);
+ kfree_skb(skb);
+ out_xmit:
+ return ret;
+diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
+index 0c41076e31edad..b38b82ae903de0 100644
+--- a/net/ipv4/fou_core.c
++++ b/net/ipv4/fou_core.c
+@@ -433,7 +433,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
+
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[proto]);
+- if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
++ if (!ops || !ops->callbacks.gro_receive)
+ goto out;
+
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index b71f94a5932ac0..e0883ba709b0bf 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -899,11 +899,13 @@ static void tcp_metrics_flush_all(struct net *net)
+ unsigned int row;
+
+ for (row = 0; row < max_rows; row++, hb++) {
+- struct tcp_metrics_block __rcu **pp;
++ struct tcp_metrics_block __rcu **pp = &hb->chain;
+ bool match;
+
++ if (!rcu_access_pointer(*pp))
++ continue;
++
+ spin_lock_bh(&tcp_metrics_lock);
+- pp = &hb->chain;
+ for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
+ match = net ? net_eq(tm_net(tm), net) :
+ !refcount_read(&tm_net(tm)->ns.count);
+@@ -915,6 +917,7 @@ static void tcp_metrics_flush_all(struct net *net)
+ }
+ }
+ spin_unlock_bh(&tcp_metrics_lock);
++ cond_resched();
+ }
+ }
+
+diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
+index 6e3bfb46af44d3..52b048807feae5 100644
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -251,9 +251,9 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata
+ return ret;
+ }
+
+-static int ieee80211_change_mac(struct net_device *dev, void *addr)
++static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata,
++ void *addr)
+ {
+- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct sockaddr *sa = addr;
+ bool check_dup = true;
+@@ -278,7 +278,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
+
+ if (live)
+ drv_remove_interface(local, sdata);
+- ret = eth_mac_addr(dev, sa);
++ ret = eth_mac_addr(sdata->dev, sa);
+
+ if (ret == 0) {
+ memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN);
+@@ -294,6 +294,27 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
+ return ret;
+ }
+
++static int ieee80211_change_mac(struct net_device *dev, void *addr)
++{
++ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
++ struct ieee80211_local *local = sdata->local;
++ int ret;
++
++ /*
++ * This happens during unregistration if there's a bond device
++ * active (maybe other cases?) and we must get removed from it.
++ * But we really don't care anymore if it's not registered now.
++ */
++ if (!dev->ieee80211_ptr->registered)
++ return 0;
++
++ wiphy_lock(local->hw.wiphy);
++ ret = _ieee80211_change_mac(sdata, addr);
++ wiphy_unlock(local->hw.wiphy);
++
++ return ret;
++}
++
+ static inline int identical_mac_addr_allowed(int type1, int type2)
+ {
+ return type1 == NL80211_IFTYPE_MONITOR ||
+diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
+index 819157bbb5a2c6..d5344563e525c9 100644
+--- a/net/nfc/llcp_sock.c
++++ b/net/nfc/llcp_sock.c
+@@ -252,10 +252,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
+ break;
+ }
+
+- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+- err = -EFAULT;
++ err = copy_safe_from_sockptr(&opt, sizeof(opt),
++ optval, optlen);
++ if (err)
+ break;
+- }
+
+ if (opt > LLCP_MAX_RW) {
+ err = -EINVAL;
+@@ -274,10 +274,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
+ break;
+ }
+
+- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+- err = -EFAULT;
++ err = copy_safe_from_sockptr(&opt, sizeof(opt),
++ optval, optlen);
++ if (err)
+ break;
+- }
+
+ if (opt > LLCP_MAX_MIUX) {
+ err = -EINVAL;
+diff --git a/net/rds/recv.c b/net/rds/recv.c
+index c71b923764fd7c..5627f80013f8b1 100644
+--- a/net/rds/recv.c
++++ b/net/rds/recv.c
+@@ -425,6 +425,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
+ struct sock *sk = rds_rs_to_sk(rs);
+ int ret = 0;
+ unsigned long flags;
++ struct rds_incoming *to_drop = NULL;
+
+ write_lock_irqsave(&rs->rs_recv_lock, flags);
+ if (!list_empty(&inc->i_item)) {
+@@ -435,11 +436,14 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
+ -be32_to_cpu(inc->i_hdr.h_len),
+ inc->i_hdr.h_dport);
+ list_del_init(&inc->i_item);
+- rds_inc_put(inc);
++ to_drop = inc;
+ }
+ }
+ write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+
++ if (to_drop)
++ rds_inc_put(to_drop);
++
+ rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop);
+ return ret;
+ }
+@@ -758,16 +762,21 @@ void rds_clear_recv_queue(struct rds_sock *rs)
+ struct sock *sk = rds_rs_to_sk(rs);
+ struct rds_incoming *inc, *tmp;
+ unsigned long flags;
++ LIST_HEAD(to_drop);
+
+ write_lock_irqsave(&rs->rs_recv_lock, flags);
+ list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) {
+ rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
+ -be32_to_cpu(inc->i_hdr.h_len),
+ inc->i_hdr.h_dport);
++ list_move(&inc->i_item, &to_drop);
++ }
++ write_unlock_irqrestore(&rs->rs_recv_lock, flags);
++
++ list_for_each_entry_safe(inc, tmp, &to_drop, i_item) {
+ list_del_init(&inc->i_item);
+ rds_inc_put(inc);
+ }
+- write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+ }
+
+ /*
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index 4023c955036b12..6ab9359c1706f1 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t)
+
+ if (unlikely(timedout_ms)) {
+ trace_net_dev_xmit_timeout(dev, i);
+- WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n",
+- dev->name, netdev_drivername(dev), i, timedout_ms);
++ netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n",
++ raw_smp_processor_id(),
++ i, timedout_ms);
+ netif_freeze_queues(dev);
+ dev->netdev_ops->ndo_tx_timeout(dev, i);
+ netif_unfreeze_queues(dev);
+diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
+index 7182c5a450fb5b..5c165218180588 100644
+--- a/net/sctp/inqueue.c
++++ b/net/sctp/inqueue.c
+@@ -38,6 +38,14 @@ void sctp_inq_init(struct sctp_inq *queue)
+ INIT_WORK(&queue->immediate, NULL);
+ }
+
++/* Properly release the chunk which is being worked on. */
++static inline void sctp_inq_chunk_free(struct sctp_chunk *chunk)
++{
++ if (chunk->head_skb)
++ chunk->skb = chunk->head_skb;
++ sctp_chunk_free(chunk);
++}
++
+ /* Release the memory associated with an SCTP inqueue. */
+ void sctp_inq_free(struct sctp_inq *queue)
+ {
+@@ -53,7 +61,7 @@ void sctp_inq_free(struct sctp_inq *queue)
+ * free it as well.
+ */
+ if (queue->in_progress) {
+- sctp_chunk_free(queue->in_progress);
++ sctp_inq_chunk_free(queue->in_progress);
+ queue->in_progress = NULL;
+ }
+ }
+@@ -130,9 +138,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
+ goto new_skb;
+ }
+
+- if (chunk->head_skb)
+- chunk->skb = chunk->head_skb;
+- sctp_chunk_free(chunk);
++ sctp_inq_chunk_free(chunk);
+ chunk = queue->in_progress = NULL;
+ } else {
+ /* Nothing to do. Next chunk in the packet, please. */
+diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
+index 65fc1297c6dfa4..383860cb1d5b0f 100644
+--- a/net/sunrpc/stats.c
++++ b/net/sunrpc/stats.c
+@@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister);
+ struct proc_dir_entry *
+ svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops)
+ {
+- return do_register(net, statp->program->pg_name, statp, proc_ops);
++ return do_register(net, statp->program->pg_name, net, proc_ops);
+ }
+ EXPORT_SYMBOL_GPL(svc_proc_register);
+
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 691499d1d2315c..029c49065016ac 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -453,8 +453,8 @@ __svc_init_bc(struct svc_serv *serv)
+ * Create an RPC service
+ */
+ static struct svc_serv *
+-__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
+- int (*threadfn)(void *data))
++__svc_create(struct svc_program *prog, struct svc_stat *stats,
++ unsigned int bufsize, int npools, int (*threadfn)(void *data))
+ {
+ struct svc_serv *serv;
+ unsigned int vers;
+@@ -466,7 +466,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
+ serv->sv_name = prog->pg_name;
+ serv->sv_program = prog;
+ kref_init(&serv->sv_refcnt);
+- serv->sv_stats = prog->pg_stats;
++ serv->sv_stats = stats;
+ if (bufsize > RPCSVC_MAXPAYLOAD)
+ bufsize = RPCSVC_MAXPAYLOAD;
+ serv->sv_max_payload = bufsize? bufsize : 4096;
+@@ -532,26 +532,28 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
+ struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
+ int (*threadfn)(void *data))
+ {
+- return __svc_create(prog, bufsize, 1, threadfn);
++ return __svc_create(prog, NULL, bufsize, 1, threadfn);
+ }
+ EXPORT_SYMBOL_GPL(svc_create);
+
+ /**
+ * svc_create_pooled - Create an RPC service with pooled threads
+ * @prog: the RPC program the new service will handle
++ * @stats: the stats struct if desired
+ * @bufsize: maximum message size for @prog
+ * @threadfn: a function to service RPC requests for @prog
+ *
+ * Returns an instantiated struct svc_serv object or NULL.
+ */
+ struct svc_serv *svc_create_pooled(struct svc_program *prog,
++ struct svc_stat *stats,
+ unsigned int bufsize,
+ int (*threadfn)(void *data))
+ {
+ struct svc_serv *serv;
+ unsigned int npools = svc_pool_map_get();
+
+- serv = __svc_create(prog, bufsize, npools, threadfn);
++ serv = __svc_create(prog, stats, bufsize, npools, threadfn);
+ if (!serv)
+ goto out_err;
+ return serv;
+@@ -1377,7 +1379,8 @@ svc_process_common(struct svc_rqst *rqstp)
+ goto err_bad_proc;
+
+ /* Syntactic check complete */
+- serv->sv_stats->rpccnt++;
++ if (serv->sv_stats)
++ serv->sv_stats->rpccnt++;
+ trace_svc_process(rqstp, progp->pg_name);
+
+ aoffset = xdr_stream_pos(xdr);
+@@ -1429,7 +1432,8 @@ svc_process_common(struct svc_rqst *rqstp)
+ goto close_xprt;
+
+ err_bad_rpc:
+- serv->sv_stats->rpcbadfmt++;
++ if (serv->sv_stats)
++ serv->sv_stats->rpcbadfmt++;
+ xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
+ xdr_stream_encode_u32(xdr, RPC_MISMATCH);
+ /* Only RPCv2 supported */
+@@ -1440,7 +1444,8 @@ svc_process_common(struct svc_rqst *rqstp)
+ err_bad_auth:
+ dprintk("svc: authentication failed (%d)\n",
+ be32_to_cpu(rqstp->rq_auth_stat));
+- serv->sv_stats->rpcbadauth++;
++ if (serv->sv_stats)
++ serv->sv_stats->rpcbadauth++;
+ /* Restore write pointer to location of reply status: */
+ xdr_truncate_encode(xdr, XDR_UNIT * 2);
+ xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
+@@ -1450,7 +1455,8 @@ svc_process_common(struct svc_rqst *rqstp)
+
+ err_bad_prog:
+ dprintk("svc: unknown program %d\n", rqstp->rq_prog);
+- serv->sv_stats->rpcbadfmt++;
++ if (serv->sv_stats)
++ serv->sv_stats->rpcbadfmt++;
+ *rqstp->rq_accept_statp = rpc_prog_unavail;
+ goto sendit;
+
+@@ -1458,7 +1464,8 @@ svc_process_common(struct svc_rqst *rqstp)
+ svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
+ rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
+
+- serv->sv_stats->rpcbadfmt++;
++ if (serv->sv_stats)
++ serv->sv_stats->rpcbadfmt++;
+ *rqstp->rq_accept_statp = rpc_prog_mismatch;
+
+ /*
+@@ -1472,19 +1479,22 @@ svc_process_common(struct svc_rqst *rqstp)
+ err_bad_proc:
+ svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
+
+- serv->sv_stats->rpcbadfmt++;
++ if (serv->sv_stats)
++ serv->sv_stats->rpcbadfmt++;
+ *rqstp->rq_accept_statp = rpc_proc_unavail;
+ goto sendit;
+
+ err_garbage_args:
+ svc_printk(rqstp, "failed to decode RPC header\n");
+
+- serv->sv_stats->rpcbadfmt++;
++ if (serv->sv_stats)
++ serv->sv_stats->rpcbadfmt++;
+ *rqstp->rq_accept_statp = rpc_garbage_args;
+ goto sendit;
+
+ err_system_err:
+- serv->sv_stats->rpcbadfmt++;
++ if (serv->sv_stats)
++ serv->sv_stats->rpcbadfmt++;
+ *rqstp->rq_accept_statp = rpc_system_err;
+ goto sendit;
+ }
+@@ -1536,7 +1546,8 @@ void svc_process(struct svc_rqst *rqstp)
+ out_baddir:
+ svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
+ be32_to_cpu(*p));
+- rqstp->rq_server->sv_stats->rpcbadfmt++;
++ if (rqstp->rq_server->sv_stats)
++ rqstp->rq_server->sv_stats->rpcbadfmt++;
+ out_drop:
+ svc_drop(rqstp);
+ }
+diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
+index be5c42d6ffbeab..2b2dc46dc701f9 100644
+--- a/net/wireless/nl80211.c
++++ b/net/wireless/nl80211.c
+@@ -468,6 +468,10 @@ static struct netlink_range_validation nl80211_punct_bitmap_range = {
+ .max = 0xffff,
+ };
+
++static struct netlink_range_validation q_range = {
++ .max = INT_MAX,
++};
++
+ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
+ [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
+ [NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
+@@ -750,7 +754,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
+
+ [NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 },
+ [NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
+- [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
++ [NL80211_ATTR_TXQ_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &q_range),
+ [NL80211_ATTR_HE_CAPABILITY] =
+ NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa,
+ NL80211_HE_MAX_CAPABILITY_LEN),
+diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
+index d2fbcf963cdf6d..07ff471ed6aee0 100644
+--- a/samples/bpf/map_perf_test_user.c
++++ b/samples/bpf/map_perf_test_user.c
+@@ -370,7 +370,7 @@ static void run_perf_test(int tasks)
+
+ static void fill_lpm_trie(void)
+ {
+- struct bpf_lpm_trie_key *key;
++ struct bpf_lpm_trie_key_u8 *key;
+ unsigned long value = 0;
+ unsigned int i;
+ int r;
+diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
+index 9d41db09c4800f..266fdd0b025dc6 100644
+--- a/samples/bpf/xdp_router_ipv4_user.c
++++ b/samples/bpf/xdp_router_ipv4_user.c
+@@ -91,7 +91,7 @@ static int recv_msg(struct sockaddr_nl sock_addr, int sock)
+ static void read_route(struct nlmsghdr *nh, int nll)
+ {
+ char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
+- struct bpf_lpm_trie_key *prefix_key;
++ struct bpf_lpm_trie_key_u8 *prefix_key;
+ struct rtattr *rt_attr;
+ struct rtmsg *rt_msg;
+ int rtm_family;
+diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
+index 8b58a7864703ee..7e8fca0b066280 100644
+--- a/sound/soc/soc-topology.c
++++ b/sound/soc/soc-topology.c
+@@ -1021,6 +1021,7 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg,
+ struct snd_soc_tplg_hdr *hdr)
+ {
+ struct snd_soc_dapm_context *dapm = &tplg->comp->dapm;
++ const size_t maxlen = SNDRV_CTL_ELEM_ID_NAME_MAXLEN;
+ struct snd_soc_tplg_dapm_graph_elem *elem;
+ struct snd_soc_dapm_route *route;
+ int count, i;
+@@ -1044,39 +1045,22 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg,
+ tplg->pos += sizeof(struct snd_soc_tplg_dapm_graph_elem);
+
+ /* validate routes */
+- if (strnlen(elem->source, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) ==
+- SNDRV_CTL_ELEM_ID_NAME_MAXLEN) {
+- ret = -EINVAL;
+- break;
+- }
+- if (strnlen(elem->sink, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) ==
+- SNDRV_CTL_ELEM_ID_NAME_MAXLEN) {
+- ret = -EINVAL;
+- break;
+- }
+- if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) ==
+- SNDRV_CTL_ELEM_ID_NAME_MAXLEN) {
++ if ((strnlen(elem->source, maxlen) == maxlen) ||
++ (strnlen(elem->sink, maxlen) == maxlen) ||
++ (strnlen(elem->control, maxlen) == maxlen)) {
+ ret = -EINVAL;
+ break;
+ }
+
+- route->source = devm_kmemdup(tplg->dev, elem->source,
+- min(strlen(elem->source),
+- SNDRV_CTL_ELEM_ID_NAME_MAXLEN),
+- GFP_KERNEL);
+- route->sink = devm_kmemdup(tplg->dev, elem->sink,
+- min(strlen(elem->sink), SNDRV_CTL_ELEM_ID_NAME_MAXLEN),
+- GFP_KERNEL);
++ route->source = devm_kstrdup(tplg->dev, elem->source, GFP_KERNEL);
++ route->sink = devm_kstrdup(tplg->dev, elem->sink, GFP_KERNEL);
+ if (!route->source || !route->sink) {
+ ret = -ENOMEM;
+ break;
+ }
+
+- if (strnlen(elem->control, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) != 0) {
+- route->control = devm_kmemdup(tplg->dev, elem->control,
+- min(strlen(elem->control),
+- SNDRV_CTL_ELEM_ID_NAME_MAXLEN),
+- GFP_KERNEL);
++ if (strnlen(elem->control, maxlen) != 0) {
++ route->control = devm_kstrdup(tplg->dev, elem->control, GFP_KERNEL);
+ if (!route->control) {
+ ret = -ENOMEM;
+ break;
+diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
+index d1bdb0b93bda0c..8cc2d4937f3403 100644
+--- a/sound/usb/mixer.c
++++ b/sound/usb/mixer.c
+@@ -2021,6 +2021,13 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid,
+ bmaControls = ftr->bmaControls;
+ }
+
++ if (channels > 32) {
++ usb_audio_info(state->chip,
++ "usbmixer: too many channels (%d) in unit %d\n",
++ channels, unitid);
++ return -EINVAL;
++ }
++
+ /* parse the source unit */
+ err = parse_audio_unit(state, hdr->bSourceID);
+ if (err < 0)
+diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
+index fb09fd1767f289..ba6e346c8d669a 100644
+--- a/tools/include/uapi/linux/bpf.h
++++ b/tools/include/uapi/linux/bpf.h
+@@ -77,12 +77,29 @@ struct bpf_insn {
+ __s32 imm; /* signed immediate constant */
+ };
+
+-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
++/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
++ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
++ * the trailing flexible array member) instead.
++ */
+ struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+ };
+
++/* Header for bpf_lpm_trie_key structs */
++struct bpf_lpm_trie_key_hdr {
++ __u32 prefixlen;
++};
++
++/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
++struct bpf_lpm_trie_key_u8 {
++ union {
++ struct bpf_lpm_trie_key_hdr hdr;
++ __u32 prefixlen;
++ };
++ __u8 data[]; /* Arbitrary size */
++};
++
+ struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type (enum bpf_attach_type) */
+diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+index 3325da17ec81af..efaf622c28ddec 100644
+--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
++++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+@@ -316,7 +316,7 @@ struct lpm_trie {
+ } __attribute__((preserve_access_index));
+
+ struct lpm_key {
+- struct bpf_lpm_trie_key trie_key;
++ struct bpf_lpm_trie_key_hdr trie_key;
+ __u32 data;
+ };
+
+diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
+index c028d621c744da..d98c72dc563eaf 100644
+--- a/tools/testing/selftests/bpf/test_lpm_map.c
++++ b/tools/testing/selftests/bpf/test_lpm_map.c
+@@ -211,7 +211,7 @@ static void test_lpm_map(int keysize)
+ volatile size_t n_matches, n_matches_after_delete;
+ size_t i, j, n_nodes, n_lookups;
+ struct tlpm_node *t, *list = NULL;
+- struct bpf_lpm_trie_key *key;
++ struct bpf_lpm_trie_key_u8 *key;
+ uint8_t *data, *value;
+ int r, map;
+
+@@ -331,8 +331,8 @@ static void test_lpm_map(int keysize)
+ static void test_lpm_ipaddr(void)
+ {
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+- struct bpf_lpm_trie_key *key_ipv4;
+- struct bpf_lpm_trie_key *key_ipv6;
++ struct bpf_lpm_trie_key_u8 *key_ipv4;
++ struct bpf_lpm_trie_key_u8 *key_ipv6;
+ size_t key_size_ipv4;
+ size_t key_size_ipv6;
+ int map_fd_ipv4;
+@@ -423,7 +423,7 @@ static void test_lpm_ipaddr(void)
+ static void test_lpm_delete(void)
+ {
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+- struct bpf_lpm_trie_key *key;
++ struct bpf_lpm_trie_key_u8 *key;
+ size_t key_size;
+ int map_fd;
+ __u64 value;
+@@ -532,7 +532,7 @@ static void test_lpm_delete(void)
+ static void test_lpm_get_next_key(void)
+ {
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+- struct bpf_lpm_trie_key *key_p, *next_key_p;
++ struct bpf_lpm_trie_key_u8 *key_p, *next_key_p;
+ size_t key_size;
+ __u32 value = 0;
+ int map_fd;
+@@ -693,9 +693,9 @@ static void *lpm_test_command(void *arg)
+ {
+ int i, j, ret, iter, key_size;
+ struct lpm_mt_test_info *info = arg;
+- struct bpf_lpm_trie_key *key_p;
++ struct bpf_lpm_trie_key_u8 *key_p;
+
+- key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
++ key_size = sizeof(*key_p) + sizeof(__u32);
+ key_p = alloca(key_size);
+ for (iter = 0; iter < info->iter; iter++)
+ for (i = 0; i < MAX_TEST_KEYS; i++) {
+@@ -717,7 +717,7 @@ static void *lpm_test_command(void *arg)
+ ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
+ assert(ret == 0 || errno == ENOENT);
+ } else {
+- struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
++ struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size);
+ ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
+ assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
+ }
+@@ -752,7 +752,7 @@ static void test_lpm_multi_thread(void)
+
+ /* create a trie */
+ value_size = sizeof(__u32);
+- key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
++ key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts);
+
+ /* create 4 threads to test update, delete, lookup and get_next_key */
+diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
+index ad993ab3ac1819..bc36c91c4480f5 100644
+--- a/tools/testing/selftests/net/tls.c
++++ b/tools/testing/selftests/net/tls.c
+@@ -707,6 +707,20 @@ TEST_F(tls, splice_from_pipe)
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+ }
+
++TEST_F(tls, splice_more)
++{
++ unsigned int f = SPLICE_F_NONBLOCK | SPLICE_F_MORE | SPLICE_F_GIFT;
++ int send_len = TLS_PAYLOAD_MAX_LEN;
++ char mem_send[TLS_PAYLOAD_MAX_LEN];
++ int i, send_pipe = 1;
++ int p[2];
++
++ ASSERT_GE(pipe(p), 0);
++ EXPECT_GE(write(p[1], mem_send, send_len), 0);
++ for (i = 0; i < 32; i++)
++ EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, send_pipe, f), 1);
++}
++
+ TEST_F(tls, splice_from_pipe2)
+ {
+ int send_len = 16000;