Skip to content
Snippets Groups Projects
memory.c 64.6 KiB
Newer Older
	set_pte_at(mm, address, page_table, entry);
Linus Torvalds's avatar
Linus Torvalds committed

	/* No need to invalidate - it was non-present before */
	update_mmu_cache(vma, address, entry);
Linus Torvalds's avatar
Linus Torvalds committed
	lazy_mmu_prot_update(entry);
	pte_unmap_unlock(page_table, ptl);
Linus Torvalds's avatar
Linus Torvalds committed
	return VM_FAULT_MINOR;
release:
	page_cache_release(page);
	goto unlock;
Linus Torvalds's avatar
Linus Torvalds committed
	return VM_FAULT_OOM;
}

/*
 * do_no_page() tries to create a new page mapping. It aggressively
 * tries to share with existing pages, but makes a separate copy if
 * the "write_access" parameter is true in order to avoid the next
 * page fault.
 *
 * As this is called only for pages that do not currently exist, we
 * do not need to flush old virtual caches or the TLB.
 *
 * We enter with non-exclusive mmap_sem (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
 * We return with mmap_sem still held, but pte unmapped and unlocked.
Linus Torvalds's avatar
Linus Torvalds committed
 */
static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
		unsigned long address, pte_t *page_table, pmd_t *pmd,
		int write_access)
Linus Torvalds's avatar
Linus Torvalds committed
{
	spinlock_t *ptl;
	struct page *new_page;
Linus Torvalds's avatar
Linus Torvalds committed
	struct address_space *mapping = NULL;
	pte_t entry;
	unsigned int sequence = 0;
	int ret = VM_FAULT_MINOR;
	int anon = 0;

	pte_unmap(page_table);
	BUG_ON(vma->vm_flags & VM_PFNMAP);

Linus Torvalds's avatar
Linus Torvalds committed
	if (vma->vm_file) {
		mapping = vma->vm_file->f_mapping;
		sequence = mapping->truncate_count;
		smp_rmb(); /* serializes i_size against truncate_count */
	}
retry:
	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
	/*
	 * No smp_rmb is needed here as long as there's a full
	 * spin_lock/unlock sequence inside the ->nopage callback
	 * (for the pagecache lookup) that acts as an implicit
	 * smp_mb() and prevents the i_size read to happen
	 * after the next truncate_count read.
	 */

	/* no page was available -- either SIGBUS or OOM */
	if (new_page == NOPAGE_SIGBUS)
		return VM_FAULT_SIGBUS;
	if (new_page == NOPAGE_OOM)
		return VM_FAULT_OOM;

	/*
	 * Should we do an early C-O-W break?
	 */
	if (write_access && !(vma->vm_flags & VM_SHARED)) {
		struct page *page;

		if (unlikely(anon_vma_prepare(vma)))
			goto oom;
		page = alloc_page_vma(GFP_HIGHUSER, vma, address);
		if (!page)
			goto oom;
		copy_user_highpage(page, new_page, address);
Linus Torvalds's avatar
Linus Torvalds committed
		page_cache_release(new_page);
		new_page = page;
		anon = 1;
	}

	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
Linus Torvalds's avatar
Linus Torvalds committed
	/*
	 * For a file-backed vma, someone could have truncated or otherwise
	 * invalidated this page.  If unmap_mapping_range got called,
	 * retry getting the page.
	 */
	if (mapping && unlikely(sequence != mapping->truncate_count)) {
		pte_unmap_unlock(page_table, ptl);
Linus Torvalds's avatar
Linus Torvalds committed
		page_cache_release(new_page);
		cond_resched();
		sequence = mapping->truncate_count;
		smp_rmb();
Linus Torvalds's avatar
Linus Torvalds committed
		goto retry;
	}

	/*
	 * This silly early PAGE_DIRTY setting removes a race
	 * due to the bad i386 page protection. But it's valid
	 * for other architectures too.
	 *
	 * Note that if write_access is true, we either now have
	 * an exclusive copy of the page, or this is a shared mapping,
	 * so we can make it writable and dirty to avoid having to
	 * handle that later.
	 */
	/* Only go through if we didn't race with anybody else... */
	if (pte_none(*page_table)) {
		flush_icache_page(vma, new_page);
		entry = mk_pte(new_page, vma->vm_page_prot);
		if (write_access)
			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
		set_pte_at(mm, address, page_table, entry);
		if (anon) {
			inc_mm_counter(mm, anon_rss);
Linus Torvalds's avatar
Linus Torvalds committed
			lru_cache_add_active(new_page);
			page_add_anon_rmap(new_page, vma, address);
			inc_mm_counter(mm, file_rss);
Linus Torvalds's avatar
Linus Torvalds committed
			page_add_file_rmap(new_page);
Linus Torvalds's avatar
Linus Torvalds committed
	} else {
		/* One of our sibling threads was faster, back out. */
		page_cache_release(new_page);
		goto unlock;
Linus Torvalds's avatar
Linus Torvalds committed
	}

	/* no need to invalidate: a not-present page shouldn't be cached */
	update_mmu_cache(vma, address, entry);
	lazy_mmu_prot_update(entry);
	pte_unmap_unlock(page_table, ptl);
Linus Torvalds's avatar
Linus Torvalds committed
	return ret;
oom:
	page_cache_release(new_page);
	return VM_FAULT_OOM;
Linus Torvalds's avatar
Linus Torvalds committed
}

/*
 * Fault of a previously existing named mapping. Repopulate the pte
 * from the encoded file_pte if possible. This enables swappable
 * nonlinear vmas.
 *
 * We enter with non-exclusive mmap_sem (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
 * We return with mmap_sem still held, but pte unmapped and unlocked.
Linus Torvalds's avatar
Linus Torvalds committed
 */
static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
		unsigned long address, pte_t *page_table, pmd_t *pmd,
		int write_access, pte_t orig_pte)
Linus Torvalds's avatar
Linus Torvalds committed
{
	pgoff_t pgoff;
Linus Torvalds's avatar
Linus Torvalds committed
	int err;

	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
		return VM_FAULT_MINOR;
Linus Torvalds's avatar
Linus Torvalds committed

	if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
		/*
		 * Page table corrupted: show pte and kill process.
		 */
		print_bad_pte(vma, orig_pte, address);
		return VM_FAULT_OOM;
	}
	/* We can then assume vm->vm_ops && vma->vm_ops->populate */

	pgoff = pte_to_pgoff(orig_pte);
	err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
					vma->vm_page_prot, pgoff, 0);
Linus Torvalds's avatar
Linus Torvalds committed
	if (err == -ENOMEM)
		return VM_FAULT_OOM;
	if (err)
		return VM_FAULT_SIGBUS;
	return VM_FAULT_MAJOR;
}

/*
 * These routines also need to handle stuff like marking pages dirty
 * and/or accessed for architectures that don't do it in hardware (most
 * RISC architectures).  The early dirtying is also good on the i386.
 *
 * There is also a hook called "update_mmu_cache()" that architectures
 * with external mmu caches can use to update those (ie the Sparc or
 * PowerPC hashed page tables that act as extended TLBs).
 *
 * We enter with non-exclusive mmap_sem (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
 * We return with mmap_sem still held, but pte unmapped and unlocked.
Linus Torvalds's avatar
Linus Torvalds committed
 */
static inline int handle_pte_fault(struct mm_struct *mm,
		struct vm_area_struct *vma, unsigned long address,
		pte_t *pte, pmd_t *pmd, int write_access)
Linus Torvalds's avatar
Linus Torvalds committed
{
	pte_t entry;
	spinlock_t *ptl;
Linus Torvalds's avatar
Linus Torvalds committed

	old_entry = entry = *pte;
Linus Torvalds's avatar
Linus Torvalds committed
	if (!pte_present(entry)) {
		if (pte_none(entry)) {
			if (!vma->vm_ops || !vma->vm_ops->nopage)
				return do_anonymous_page(mm, vma, address,
					pte, pmd, write_access);
			return do_no_page(mm, vma, address,
					pte, pmd, write_access);
		}
Linus Torvalds's avatar
Linus Torvalds committed
		if (pte_file(entry))
			return do_file_page(mm, vma, address,
					pte, pmd, write_access, entry);
		return do_swap_page(mm, vma, address,
					pte, pmd, write_access, entry);
	ptl = pte_lockptr(mm, pmd);
	spin_lock(ptl);
	if (unlikely(!pte_same(*pte, entry)))
		goto unlock;
Linus Torvalds's avatar
Linus Torvalds committed
	if (write_access) {
		if (!pte_write(entry))
			return do_wp_page(mm, vma, address,
					pte, pmd, ptl, entry);
Linus Torvalds's avatar
Linus Torvalds committed
		entry = pte_mkdirty(entry);
	}
	entry = pte_mkyoung(entry);
	if (!pte_same(old_entry, entry)) {
		ptep_set_access_flags(vma, address, pte, entry, write_access);
		update_mmu_cache(vma, address, entry);
		lazy_mmu_prot_update(entry);
	} else {
		/*
		 * This is needed only for protection faults but the arch code
		 * is not yet telling us if this is a protection fault or not.
		 * This still avoids useless tlb flushes for .text page faults
		 * with threads.
		 */
		if (write_access)
			flush_tlb_page(vma, address);
	}
unlock:
	pte_unmap_unlock(pte, ptl);
Linus Torvalds's avatar
Linus Torvalds committed
	return VM_FAULT_MINOR;
}

/*
 * By the time we get here, we already hold the mm semaphore
 */
int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
Linus Torvalds's avatar
Linus Torvalds committed
		unsigned long address, int write_access)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;

	__set_current_state(TASK_RUNNING);

	inc_page_state(pgfault);

	if (unlikely(is_vm_hugetlb_page(vma)))
		return hugetlb_fault(mm, vma, address, write_access);
Linus Torvalds's avatar
Linus Torvalds committed

	pgd = pgd_offset(mm, address);
	pud = pud_alloc(mm, pgd, address);
	if (!pud)
		return VM_FAULT_OOM;
Linus Torvalds's avatar
Linus Torvalds committed
	pmd = pmd_alloc(mm, pud, address);
	if (!pmd)
		return VM_FAULT_OOM;
Linus Torvalds's avatar
Linus Torvalds committed
	pte = pte_alloc_map(mm, pmd, address);
	if (!pte)
		return VM_FAULT_OOM;
Linus Torvalds's avatar
Linus Torvalds committed

	return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
Linus Torvalds's avatar
Linus Torvalds committed
}

#ifndef __PAGETABLE_PUD_FOLDED
/*
 * Allocate page upper directory.
 * We've already handled the fast-path in-line.
Linus Torvalds's avatar
Linus Torvalds committed
 */
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
Linus Torvalds's avatar
Linus Torvalds committed
{
	pud_t *new = pud_alloc_one(mm, address);
	if (!new)
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed

	spin_lock(&mm->page_table_lock);
	if (pgd_present(*pgd))		/* Another has populated it */
Linus Torvalds's avatar
Linus Torvalds committed
		pud_free(new);
	else
		pgd_populate(mm, pgd, new);
	spin_unlock(&mm->page_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
}
#else
/* Workaround for gcc 2.96 */
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
#endif /* __PAGETABLE_PUD_FOLDED */

#ifndef __PAGETABLE_PMD_FOLDED
/*
 * Allocate page middle directory.
 * We've already handled the fast-path in-line.
Linus Torvalds's avatar
Linus Torvalds committed
 */
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
Linus Torvalds's avatar
Linus Torvalds committed
{
	pmd_t *new = pmd_alloc_one(mm, address);
	if (!new)
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed

	spin_lock(&mm->page_table_lock);
Linus Torvalds's avatar
Linus Torvalds committed
#ifndef __ARCH_HAS_4LEVEL_HACK
	if (pud_present(*pud))		/* Another has populated it */
Linus Torvalds's avatar
Linus Torvalds committed
		pmd_free(new);
	else
		pud_populate(mm, pud, new);
Linus Torvalds's avatar
Linus Torvalds committed
#else
	if (pgd_present(*pud))		/* Another has populated it */
Linus Torvalds's avatar
Linus Torvalds committed
		pmd_free(new);
	else
		pgd_populate(mm, pud, new);
Linus Torvalds's avatar
Linus Torvalds committed
#endif /* __ARCH_HAS_4LEVEL_HACK */
	spin_unlock(&mm->page_table_lock);
}
#else
/* Workaround for gcc 2.96 */
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
}
#endif /* __PAGETABLE_PMD_FOLDED */

int make_pages_present(unsigned long addr, unsigned long end)
{
	int ret, len, write;
	struct vm_area_struct * vma;

	vma = find_vma(current->mm, addr);
	if (!vma)
		return -1;
	write = (vma->vm_flags & VM_WRITE) != 0;
	if (addr >= end)
		BUG();
	if (end > vma->vm_end)
		BUG();
	len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
	ret = get_user_pages(current, current->mm, addr,
			len, write, 0, NULL, NULL);
	if (ret < 0)
		return ret;
	return ret == len ? 0 : -1;
}

/* 
 * Map a vmalloc()-space virtual address to the physical page.
 */
struct page * vmalloc_to_page(void * vmalloc_addr)
{
	unsigned long addr = (unsigned long) vmalloc_addr;
	struct page *page = NULL;
	pgd_t *pgd = pgd_offset_k(addr);
	pud_t *pud;
	pmd_t *pmd;
	pte_t *ptep, pte;
  
	if (!pgd_none(*pgd)) {
		pud = pud_offset(pgd, addr);
		if (!pud_none(*pud)) {
			pmd = pmd_offset(pud, addr);
			if (!pmd_none(*pmd)) {
				ptep = pte_offset_map(pmd, addr);
				pte = *ptep;
				if (pte_present(pte))
					page = pte_page(pte);
				pte_unmap(ptep);
			}
		}
	}
	return page;
}

EXPORT_SYMBOL(vmalloc_to_page);

/*
 * Map a vmalloc()-space virtual address to the physical page frame number.
 */
unsigned long vmalloc_to_pfn(void * vmalloc_addr)
{
	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
}

EXPORT_SYMBOL(vmalloc_to_pfn);

#if !defined(__HAVE_ARCH_GATE_AREA)

#if defined(AT_SYSINFO_EHDR)
static struct vm_area_struct gate_vma;
Linus Torvalds's avatar
Linus Torvalds committed

static int __init gate_vma_init(void)
{
	gate_vma.vm_mm = NULL;
	gate_vma.vm_start = FIXADDR_USER_START;
	gate_vma.vm_end = FIXADDR_USER_END;
	gate_vma.vm_page_prot = PAGE_READONLY;
	gate_vma.vm_flags = 0;
Linus Torvalds's avatar
Linus Torvalds committed
	return 0;
}
__initcall(gate_vma_init);
#endif

struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
#ifdef AT_SYSINFO_EHDR
	return &gate_vma;
#else
	return NULL;
#endif
}

int in_gate_area_no_task(unsigned long addr)
{
#ifdef AT_SYSINFO_EHDR
	if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
		return 1;
#endif
	return 0;
}

#endif	/* __HAVE_ARCH_GATE_AREA */