From: Miaohe Lin Subject: mm/swapfile: unuse_pte can map random data if swap read fails There is a bug in unuse_pte(): when swap page happens to be unreadable, page filled with random data is mapped into user address space. In case of error, a special swap entry indicating swap read fails is set to the page table. So the swapcache page can be freed and the user won't end up with a permanently mounted swap because a sector is bad. And if the page is accessed later, the user process will be killed so that corrupted data is never consumed. On the other hand, if the page is never accessed, the user won't even notice it. Link: https://lkml.kernel.org/r/20220424091105.48374-2-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: David Hildenbrand Cc: Peter Xu Cc: Alistair Popple Cc: David Howells Cc: Matthew Wilcox Cc: Minchan Kim Cc: Naoya Horiguchi Cc: NeilBrown Cc: Stephen Rothwell Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: "Huang, Ying" Signed-off-by: Andrew Morton --- include/linux/swap.h | 7 ++++++- include/linux/swapops.h | 10 ++++++++++ mm/memory.c | 5 ++++- mm/swapfile.c | 11 +++++++++++ 4 files changed, 31 insertions(+), 2 deletions(-) --- a/include/linux/swap.h~mm-swapfile-unuse_pte-can-map-random-data-if-swap-read-fails +++ a/include/linux/swap.h @@ -55,6 +55,10 @@ static inline int current_is_kswapd(void * actions on faults. */ +#define SWP_SWAPIN_ERROR_NUM 1 +#define SWP_SWAPIN_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \ + SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \ + SWP_PTE_MARKER_NUM) /* * PTE markers are used to persist information onto PTEs that are mapped with * file-backed memories. As its name "PTE" hints, it should only be applied to @@ -120,7 +124,8 @@ static inline int current_is_kswapd(void #define MAX_SWAPFILES \ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM) + SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \ + SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM) /* * Magic header for a swap area. The first part of the union is --- a/include/linux/swapops.h~mm-swapfile-unuse_pte-can-map-random-data-if-swap-read-fails +++ a/include/linux/swapops.h @@ -108,6 +108,16 @@ static inline void *swp_to_radix_entry(s return xa_mk_value(entry.val); } +static inline swp_entry_t make_swapin_error_entry(struct page *page) +{ + return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page)); +} + +static inline int is_swapin_error_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_SWAPIN_ERROR; +} + #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { --- a/mm/memory.c~mm-swapfile-unuse_pte-can-map-random-data-if-swap-read-fails +++ a/mm/memory.c @@ -1488,7 +1488,8 @@ again: /* Only drop the uffd-wp marker if explicitly requested */ if (!zap_drop_file_uffd_wp(details)) continue; - } else if (is_hwpoison_entry(entry)) { + } else if (is_hwpoison_entry(entry) || + is_swapin_error_entry(entry)) { if (!should_zap_cows(details)) continue; } else { @@ -3728,6 +3729,8 @@ vm_fault_t do_swap_page(struct vm_fault ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; + } else if (is_swapin_error_entry(entry)) { + ret = VM_FAULT_SIGBUS; } else if (is_pte_marker_entry(entry)) { ret = handle_pte_marker(vmf); } else { --- a/mm/swapfile.c~mm-swapfile-unuse_pte-can-map-random-data-if-swap-read-fails +++ a/mm/swapfile.c @@ -1797,6 +1797,17 @@ static int unuse_pte(struct vm_area_stru goto out; } + if (unlikely(!PageUptodate(page))) { + pte_t pteval; + + dec_mm_counter(vma->vm_mm, MM_SWAPENTS); + pteval = swp_entry_to_pte(make_swapin_error_entry(page)); + set_pte_at(vma->vm_mm, addr, pte, pteval); + swap_free(entry); + ret = 0; + goto out; + } + /* See do_swap_page() */ BUG_ON(!PageAnon(page) && PageMappedToDisk(page)); BUG_ON(PageAnon(page) && PageAnonExclusive(page)); _