Newer
Older
set_pte_at(mm, address, page_table, entry);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, entry);
pte_unmap_unlock(page_table, ptl);
release:
page_cache_release(page);
goto unlock;
return VM_FAULT_OOM;
}
/*
* do_no_page() tries to create a new page mapping. It aggressively
* tries to share with existing pages, but makes a separate copy if
* the "write_access" parameter is true in order to avoid the next
* page fault.
*
* As this is called only for pages that do not currently exist, we
* do not need to flush old virtual caches or the TLB.
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
int write_access)
struct address_space *mapping = NULL;
pte_t entry;
unsigned int sequence = 0;
int ret = VM_FAULT_MINOR;
int anon = 0;
pte_unmap(page_table);
BUG_ON(vma->vm_flags & VM_PFNMAP);
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
if (vma->vm_file) {
mapping = vma->vm_file->f_mapping;
sequence = mapping->truncate_count;
smp_rmb(); /* serializes i_size against truncate_count */
}
retry:
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
/*
* No smp_rmb is needed here as long as there's a full
* spin_lock/unlock sequence inside the ->nopage callback
* (for the pagecache lookup) that acts as an implicit
* smp_mb() and prevents the i_size read to happen
* after the next truncate_count read.
*/
/* no page was available -- either SIGBUS or OOM */
if (new_page == NOPAGE_SIGBUS)
return VM_FAULT_SIGBUS;
if (new_page == NOPAGE_OOM)
return VM_FAULT_OOM;
/*
* Should we do an early C-O-W break?
*/
if (write_access && !(vma->vm_flags & VM_SHARED)) {
struct page *page;
if (unlikely(anon_vma_prepare(vma)))
goto oom;
page = alloc_page_vma(GFP_HIGHUSER, vma, address);
if (!page)
goto oom;
copy_user_highpage(page, new_page, address);
page_cache_release(new_page);
new_page = page;
anon = 1;
}
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
/*
* For a file-backed vma, someone could have truncated or otherwise
* invalidated this page. If unmap_mapping_range got called,
* retry getting the page.
*/
if (mapping && unlikely(sequence != mapping->truncate_count)) {
pte_unmap_unlock(page_table, ptl);
cond_resched();
sequence = mapping->truncate_count;
smp_rmb();
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
goto retry;
}
/*
* This silly early PAGE_DIRTY setting removes a race
* due to the bad i386 page protection. But it's valid
* for other architectures too.
*
* Note that if write_access is true, we either now have
* an exclusive copy of the page, or this is a shared mapping,
* so we can make it writable and dirty to avoid having to
* handle that later.
*/
/* Only go through if we didn't race with anybody else... */
if (pte_none(*page_table)) {
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
set_pte_at(mm, address, page_table, entry);
if (anon) {
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
} else {
/* One of our sibling threads was faster, back out. */
page_cache_release(new_page);
}
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
pte_unmap_unlock(page_table, ptl);
return ret;
oom:
page_cache_release(new_page);
}
/*
* Fault of a previously existing named mapping. Repopulate the pte
* from the encoded file_pte if possible. This enables swappable
* nonlinear vmas.
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
int write_access, pte_t orig_pte)
if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
/*
* Page table corrupted: show pte and kill process.
*/
return VM_FAULT_OOM;
}
/* We can then assume vm->vm_ops && vma->vm_ops->populate */
pgoff = pte_to_pgoff(orig_pte);
err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
vma->vm_page_prot, pgoff, 0);
if (err == -ENOMEM)
return VM_FAULT_OOM;
if (err)
return VM_FAULT_SIGBUS;
return VM_FAULT_MAJOR;
}
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
* RISC architectures). The early dirtying is also good on the i386.
*
* There is also a hook called "update_mmu_cache()" that architectures
* with external mmu caches can use to update those (ie the Sparc or
* PowerPC hashed page tables that act as extended TLBs).
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
pte_t *pte, pmd_t *pmd, int write_access)
pte_t old_entry;
old_entry = entry = *pte;
if (pte_none(entry)) {
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, address,
pte, pmd, write_access);
return do_no_page(mm, vma, address,
pte, pmd, write_access);
}
return do_file_page(mm, vma, address,
pte, pmd, write_access, entry);
return do_swap_page(mm, vma, address,
pte, pmd, write_access, entry);
spin_lock(ptl);
if (unlikely(!pte_same(*pte, entry)))
goto unlock;
return do_wp_page(mm, vma, address,
pte, pmd, ptl, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
if (!pte_same(old_entry, entry)) {
ptep_set_access_flags(vma, address, pte, entry, write_access);
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
} else {
/*
* This is needed only for protection faults but the arch code
* is not yet telling us if this is a protection fault or not.
* This still avoids useless tlb flushes for .text page faults
* with threads.
*/
if (write_access)
flush_tlb_page(vma, address);
}
unlock:
pte_unmap_unlock(pte, ptl);
return VM_FAULT_MINOR;
}
/*
* By the time we get here, we already hold the mm semaphore
*/
int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write_access)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
__set_current_state(TASK_RUNNING);
inc_page_state(pgfault);
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, write_access);
pgd = pgd_offset(mm, address);
pud = pud_alloc(mm, pgd, address);
if (!pud)
pte = pte_alloc_map(mm, pmd, address);
if (!pte)
return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
}
#ifndef __PAGETABLE_PUD_FOLDED
/*
* Allocate page upper directory.
* We've already handled the fast-path in-line.
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
pud_t *new = pud_alloc_one(mm, address);
if (!new)
if (pgd_present(*pgd)) /* Another has populated it */
else
pgd_populate(mm, pgd, new);
#else
/* Workaround for gcc 2.96 */
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
return 0;
}
#endif /* __PAGETABLE_PUD_FOLDED */
#ifndef __PAGETABLE_PMD_FOLDED
/*
* Allocate page middle directory.
* We've already handled the fast-path in-line.
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
pmd_t *new = pmd_alloc_one(mm, address);
if (!new)
if (pud_present(*pud)) /* Another has populated it */
else
pud_populate(mm, pud, new);
if (pgd_present(*pud)) /* Another has populated it */
else
pgd_populate(mm, pud, new);
}
#else
/* Workaround for gcc 2.96 */
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
return 0;
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
}
#endif /* __PAGETABLE_PMD_FOLDED */
int make_pages_present(unsigned long addr, unsigned long end)
{
int ret, len, write;
struct vm_area_struct * vma;
vma = find_vma(current->mm, addr);
if (!vma)
return -1;
write = (vma->vm_flags & VM_WRITE) != 0;
if (addr >= end)
BUG();
if (end > vma->vm_end)
BUG();
len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
ret = get_user_pages(current, current->mm, addr,
len, write, 0, NULL, NULL);
if (ret < 0)
return ret;
return ret == len ? 0 : -1;
}
/*
* Map a vmalloc()-space virtual address to the physical page.
*/
struct page * vmalloc_to_page(void * vmalloc_addr)
{
unsigned long addr = (unsigned long) vmalloc_addr;
struct page *page = NULL;
pgd_t *pgd = pgd_offset_k(addr);
pud_t *pud;
pmd_t *pmd;
pte_t *ptep, pte;
if (!pgd_none(*pgd)) {
pud = pud_offset(pgd, addr);
if (!pud_none(*pud)) {
pmd = pmd_offset(pud, addr);
if (!pmd_none(*pmd)) {
ptep = pte_offset_map(pmd, addr);
pte = *ptep;
if (pte_present(pte))
page = pte_page(pte);
pte_unmap(ptep);
}
}
}
return page;
}
EXPORT_SYMBOL(vmalloc_to_page);
/*
* Map a vmalloc()-space virtual address to the physical page frame number.
*/
unsigned long vmalloc_to_pfn(void * vmalloc_addr)
{
return page_to_pfn(vmalloc_to_page(vmalloc_addr));
}
EXPORT_SYMBOL(vmalloc_to_pfn);
#if !defined(__HAVE_ARCH_GATE_AREA)
#if defined(AT_SYSINFO_EHDR)
static struct vm_area_struct gate_vma;
static int __init gate_vma_init(void)
{
gate_vma.vm_mm = NULL;
gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_page_prot = PAGE_READONLY;
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
return 0;
}
__initcall(gate_vma_init);
#endif
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
#ifdef AT_SYSINFO_EHDR
return &gate_vma;
#else
return NULL;
#endif
}
int in_gate_area_no_task(unsigned long addr)
{
#ifdef AT_SYSINFO_EHDR
if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
return 1;
#endif
return 0;
}
#endif /* __HAVE_ARCH_GATE_AREA */