Skip to content

Commit

Permalink
Merge branch 'akpm' (patches from Andrew)
Browse files Browse the repository at this point in the history
Merge misc fixes from Andrew Morton:
 "10 fixes"

* emailed patches from Andrew Morton <[email protected]>:
  x86/mm: split vmalloc_sync_all()
  mm, slub: prevent kmalloc_node crashes and memory leaks
  mm/mmu_notifier: silence PROVE_RCU_LIST warnings
  epoll: fix possible lost wakeup on epoll_ctl() path
  mm: do not allow MADV_PAGEOUT for CoW pages
  mm, memcg: throttle allocators based on ancestral memory.high
  mm, memcg: fix corruption on 64-bit divisor in memory.high throttling
  page-flags: fix a crash at SetPageError(THP_SWAP)
  mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case
  memcg: fix NULL pointer dereference in __mem_cgroup_usage_unregister_event
  • Loading branch information
torvalds committed Mar 22, 2020
2 parents b74b991 + 763802b commit b3c03db
Show file tree
Hide file tree
Showing 13 changed files with 164 additions and 78 deletions.
26 changes: 24 additions & 2 deletions arch/x86/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}

void vmalloc_sync_all(void)
static void vmalloc_sync(void)
{
unsigned long address;

Expand All @@ -217,6 +217,16 @@ void vmalloc_sync_all(void)
}
}

void vmalloc_sync_mappings(void)
{
vmalloc_sync();
}

void vmalloc_sync_unmappings(void)
{
vmalloc_sync();
}

/*
* 32-bit:
*
Expand Down Expand Up @@ -319,11 +329,23 @@ static void dump_pagetable(unsigned long address)

#else /* CONFIG_X86_64: */

void vmalloc_sync_all(void)
void vmalloc_sync_mappings(void)
{
/*
* 64-bit mappings might allocate new p4d/pud pages
* that need to be propagated to all tasks' PGDs.
*/
sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
}

void vmalloc_sync_unmappings(void)
{
/*
* Unmappings never allocate or free p4d/pud pages.
* No work is required here.
*/
}

/*
* 64-bit:
*
Expand Down
2 changes: 1 addition & 1 deletion drivers/acpi/apei/ghes.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes)
* New allocation must be visible in all pgd before it can be found by
* an NMI allocating from the pool.
*/
vmalloc_sync_all();
vmalloc_sync_mappings();

rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
if (rc)
Expand Down
8 changes: 4 additions & 4 deletions fs/eventpoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -1854,9 +1854,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
waiter = true;
init_waitqueue_entry(&wait, current);

spin_lock_irq(&ep->wq.lock);
write_lock_irq(&ep->lock);
__add_wait_queue_exclusive(&ep->wq, &wait);
spin_unlock_irq(&ep->wq.lock);
write_unlock_irq(&ep->lock);
}

for (;;) {
Expand Down Expand Up @@ -1904,9 +1904,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
goto fetch_events;

if (waiter) {
spin_lock_irq(&ep->wq.lock);
write_lock_irq(&ep->lock);
__remove_wait_queue(&ep->wq, &wait);
spin_unlock_irq(&ep->wq.lock);
write_unlock_irq(&ep->lock);
}

return res;
Expand Down
2 changes: 1 addition & 1 deletion include/linux/page-flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }

__PAGEFLAG(Locked, locked, PF_NO_TAIL)
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
PAGEFLAG(Referenced, referenced, PF_HEAD)
TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
__SETPAGEFLAG(Referenced, referenced, PF_HEAD)
Expand Down
5 changes: 3 additions & 2 deletions include/linux/vmalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,

extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff);
void vmalloc_sync_all(void);

void vmalloc_sync_mappings(void);
void vmalloc_sync_unmappings(void);

/*
* Lowlevel-APIs (not for driver use!)
*/
Expand Down
2 changes: 1 addition & 1 deletion kernel/notifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die);

int register_die_notifier(struct notifier_block *nb)
{
vmalloc_sync_all();
vmalloc_sync_mappings();
return atomic_notifier_chain_register(&die_chain, nb);
}
EXPORT_SYMBOL_GPL(register_die_notifier);
Expand Down
12 changes: 9 additions & 3 deletions mm/madvise.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,14 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
}

page = pmd_page(orig_pmd);

/* Do not interfere with other mappings of this page */
if (page_mapcount(page) != 1)
goto huge_unlock;

if (next - addr != HPAGE_PMD_SIZE) {
int err;

if (page_mapcount(page) != 1)
goto huge_unlock;

get_page(page);
spin_unlock(ptl);
lock_page(page);
Expand Down Expand Up @@ -426,6 +428,10 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
continue;
}

/* Do not interfere with other mappings of this page */
if (page_mapcount(page) != 1)
continue;

VM_BUG_ON_PAGE(PageTransCompound(page), page);

if (pte_young(ptent)) {
Expand Down
103 changes: 66 additions & 37 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -2297,53 +2297,51 @@ static void high_work_func(struct work_struct *work)
#define MEMCG_DELAY_SCALING_SHIFT 14

/*
* Scheduled by try_charge() to be executed from the userland return path
* and reclaims memory over the high limit.
* Get the number of jiffies that we should penalise a mischievous cgroup which
* is exceeding its memory.high by checking both it and its ancestors.
*/
void mem_cgroup_handle_over_high(void)
static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
unsigned int nr_pages)
{
unsigned long usage, high, clamped_high;
unsigned long pflags;
unsigned long penalty_jiffies, overage;
unsigned int nr_pages = current->memcg_nr_pages_over_high;
struct mem_cgroup *memcg;
unsigned long penalty_jiffies;
u64 max_overage = 0;

if (likely(!nr_pages))
return;
do {
unsigned long usage, high;
u64 overage;

memcg = get_mem_cgroup_from_mm(current->mm);
reclaim_high(memcg, nr_pages, GFP_KERNEL);
current->memcg_nr_pages_over_high = 0;
usage = page_counter_read(&memcg->memory);
high = READ_ONCE(memcg->high);

/*
* Prevent division by 0 in overage calculation by acting as if
* it was a threshold of 1 page
*/
high = max(high, 1UL);

overage = usage - high;
overage <<= MEMCG_DELAY_PRECISION_SHIFT;
overage = div64_u64(overage, high);

if (overage > max_overage)
max_overage = overage;
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));

if (!max_overage)
return 0;

/*
* memory.high is breached and reclaim is unable to keep up. Throttle
* allocators proactively to slow down excessive growth.
*
* We use overage compared to memory.high to calculate the number of
* jiffies to sleep (penalty_jiffies). Ideally this value should be
* fairly lenient on small overages, and increasingly harsh when the
* memcg in question makes it clear that it has no intention of stopping
* its crazy behaviour, so we exponentially increase the delay based on
* overage amount.
*/

usage = page_counter_read(&memcg->memory);
high = READ_ONCE(memcg->high);

if (usage <= high)
goto out;

/*
* Prevent division by 0 in overage calculation by acting as if it was a
* threshold of 1 page
*/
clamped_high = max(high, 1UL);

overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT,
clamped_high);

penalty_jiffies = ((u64)overage * overage * HZ)
>> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT);
penalty_jiffies = max_overage * max_overage * HZ;
penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT;
penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT;

/*
* Factor in the task's own contribution to the overage, such that four
Expand All @@ -2360,7 +2358,32 @@ void mem_cgroup_handle_over_high(void)
* application moving forwards and also permit diagnostics, albeit
* extremely slowly.
*/
penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
}

/*
* Scheduled by try_charge() to be executed from the userland return path
* and reclaims memory over the high limit.
*/
void mem_cgroup_handle_over_high(void)
{
unsigned long penalty_jiffies;
unsigned long pflags;
unsigned int nr_pages = current->memcg_nr_pages_over_high;
struct mem_cgroup *memcg;

if (likely(!nr_pages))
return;

memcg = get_mem_cgroup_from_mm(current->mm);
reclaim_high(memcg, nr_pages, GFP_KERNEL);
current->memcg_nr_pages_over_high = 0;

/*
* memory.high is breached and reclaim is unable to keep up. Throttle
* allocators proactively to slow down excessive growth.
*/
penalty_jiffies = calculate_high_delay(memcg, nr_pages);

/*
* Don't sleep if the amount of jiffies this memcg owes us is so low
Expand Down Expand Up @@ -4027,7 +4050,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
struct mem_cgroup_thresholds *thresholds;
struct mem_cgroup_threshold_ary *new;
unsigned long usage;
int i, j, size;
int i, j, size, entries;

mutex_lock(&memcg->thresholds_lock);

Expand All @@ -4047,14 +4070,20 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
__mem_cgroup_threshold(memcg, type == _MEMSWAP);

/* Calculate new number of threshold */
size = 0;
size = entries = 0;
for (i = 0; i < thresholds->primary->size; i++) {
if (thresholds->primary->entries[i].eventfd != eventfd)
size++;
else
entries++;
}

new = thresholds->spare;

/* If no items related to eventfd have been cleared, nothing to do */
if (!entries)
goto unlock;

/* Set thresholds array to NULL if we don't have thresholds */
if (!size) {
kfree(new);
Expand Down
27 changes: 18 additions & 9 deletions mm/mmu_notifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ static void mn_hlist_release(struct mmu_notifier_subscriptions *subscriptions,
* ->release returns.
*/
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist)
hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist,
srcu_read_lock_held(&srcu))
/*
* If ->release runs before mmu_notifier_unregister it must be
* handled, as it's the only way for the driver to flush all
Expand Down Expand Up @@ -370,7 +371,8 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,

id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
&mm->notifier_subscriptions->list, hlist) {
&mm->notifier_subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) {
if (subscription->ops->clear_flush_young)
young |= subscription->ops->clear_flush_young(
subscription, mm, start, end);
Expand All @@ -389,7 +391,8 @@ int __mmu_notifier_clear_young(struct mm_struct *mm,

id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
&mm->notifier_subscriptions->list, hlist) {
&mm->notifier_subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) {
if (subscription->ops->clear_young)
young |= subscription->ops->clear_young(subscription,
mm, start, end);
Expand All @@ -407,7 +410,8 @@ int __mmu_notifier_test_young(struct mm_struct *mm,

id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
&mm->notifier_subscriptions->list, hlist) {
&mm->notifier_subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) {
if (subscription->ops->test_young) {
young = subscription->ops->test_young(subscription, mm,
address);
Expand All @@ -428,7 +432,8 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,

id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
&mm->notifier_subscriptions->list, hlist) {
&mm->notifier_subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) {
if (subscription->ops->change_pte)
subscription->ops->change_pte(subscription, mm, address,
pte);
Expand Down Expand Up @@ -476,7 +481,8 @@ static int mn_hlist_invalidate_range_start(
int id;

id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist) {
hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) {
const struct mmu_notifier_ops *ops = subscription->ops;

if (ops->invalidate_range_start) {
Expand Down Expand Up @@ -528,7 +534,8 @@ mn_hlist_invalidate_end(struct mmu_notifier_subscriptions *subscriptions,
int id;

id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist) {
hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) {
/*
* Call invalidate_range here too to avoid the need for the
* subsystem of having to register an invalidate_range_end
Expand Down Expand Up @@ -582,7 +589,8 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,

id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(subscription,
&mm->notifier_subscriptions->list, hlist) {
&mm->notifier_subscriptions->list, hlist,
srcu_read_lock_held(&srcu)) {
if (subscription->ops->invalidate_range)
subscription->ops->invalidate_range(subscription, mm,
start, end);
Expand Down Expand Up @@ -714,7 +722,8 @@ find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops)

spin_lock(&mm->notifier_subscriptions->lock);
hlist_for_each_entry_rcu(subscription,
&mm->notifier_subscriptions->list, hlist) {
&mm->notifier_subscriptions->list, hlist,
lockdep_is_held(&mm->notifier_subscriptions->lock)) {
if (subscription->ops != ops)
continue;

Expand Down
Loading

0 comments on commit b3c03db

Please sign in to comment.