diff options
author | Alex Shi <alex.shi@linaro.org> | 2018-01-18 15:26:28 +0800 |
---|---|---|
committer | Alex Shi <alex.shi@linaro.org> | 2018-01-18 15:26:28 +0800 |
commit | 2ba51c6d36ee1b43c1f898c6c48ddf3d33e9ab30 (patch) | |
tree | ff3302dc45a91d7bfcaabd37bfa8fa391a954180 /mm/huge_memory.c | |
parent | 28610abf4a574c33ca70e3d7b0e523fdede488d1 (diff) | |
parent | 90816cc1d4a1d23efe37b74866c6174dd5eab6b5 (diff) |
Merge remote-tracking branch 'rt-stable/v4.9-rt' into linux-linaro-lsk-v4.9-rtlsk-v4.9-18.02-rt
Conflicts:
arch/arm64/mm/init.c
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 84 |
1 files changed, 58 insertions, 26 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3cae1dcf069c..c234c078693c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1509,37 +1509,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; - int ret = 0; + pmd_t entry; + bool preserve_write; + int ret; ptl = __pmd_trans_huge_lock(pmd, vma); - if (ptl) { - pmd_t entry; - bool preserve_write = prot_numa && pmd_write(*pmd); - ret = 1; + if (!ptl) + return 0; - /* - * Avoid trapping faults against the zero page. The read-only - * data is likely to be read-cached on the local CPU and - * local/remote hits to the zero page are not interesting. - */ - if (prot_numa && is_huge_zero_pmd(*pmd)) { - spin_unlock(ptl); - return ret; - } + preserve_write = prot_numa && pmd_write(*pmd); + ret = 1; - if (!prot_numa || !pmd_protnone(*pmd)) { - entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); - entry = pmd_modify(entry, newprot); - if (preserve_write) - entry = pmd_mkwrite(entry); - ret = HPAGE_PMD_NR; - set_pmd_at(mm, addr, pmd, entry); - BUG_ON(vma_is_anonymous(vma) && !preserve_write && - pmd_write(entry)); - } - spin_unlock(ptl); - } + /* + * Avoid trapping faults against the zero page. The read-only + * data is likely to be read-cached on the local CPU and + * local/remote hits to the zero page are not interesting. + */ + if (prot_numa && is_huge_zero_pmd(*pmd)) + goto unlock; + + if (prot_numa && pmd_protnone(*pmd)) + goto unlock; + + /* + * In case prot_numa, we are under down_read(mmap_sem). It's critical + * to not clear pmd intermittently to avoid race with MADV_DONTNEED + * which is also under down_read(mmap_sem): + * + * CPU0: CPU1: + * change_huge_pmd(prot_numa=1) + * pmdp_huge_get_and_clear_notify() + * madvise_dontneed() + * zap_pmd_range() + * pmd_trans_huge(*pmd) == 0 (without ptl) + * // skip the pmd + * set_pmd_at(); + * // pmd is re-established + * + * The race makes MADV_DONTNEED miss the huge pmd and don't clear it + * which may break userspace. + * + * pmdp_invalidate() is required to make sure we don't miss + * dirty/young flags set by hardware. + */ + entry = *pmd; + pmdp_invalidate(vma, addr, pmd); + /* + * Recover dirty/young flags. It relies on pmdp_invalidate to not + * corrupt them. + */ + if (pmd_dirty(*pmd)) + entry = pmd_mkdirty(entry); + if (pmd_young(*pmd)) + entry = pmd_mkyoung(entry); + + entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mkwrite(entry); + ret = HPAGE_PMD_NR; + set_pmd_at(mm, addr, pmd, entry); + BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); +unlock: + spin_unlock(ptl); return ret; } |