Skip to content
Snippets Groups Projects
shmem.c 103 KiB
Newer Older
	unsigned long inflated_offset;

	if (len > TASK_SIZE)
		return -ENOMEM;

	get_area = current->mm->get_unmapped_area;
	addr = get_area(file, uaddr, len, pgoff, flags);

	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
		return addr;
	if (IS_ERR_VALUE(addr))
		return addr;
	if (addr & ~PAGE_MASK)
		return addr;
	if (addr > TASK_SIZE - len)
		return addr;

	if (shmem_huge == SHMEM_HUGE_DENY)
		return addr;
	if (len < HPAGE_PMD_SIZE)
		return addr;
	if (flags & MAP_FIXED)
		return addr;
	/*
	 * Our priority is to support MAP_SHARED mapped hugely;
	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
	 * But if caller specified an address hint, respect that as before.
	 */
	if (uaddr)
		return addr;

	if (shmem_huge != SHMEM_HUGE_FORCE) {
		struct super_block *sb;

		if (file) {
			VM_BUG_ON(file->f_op != &shmem_file_operations);
			sb = file_inode(file)->i_sb;
		} else {
			/*
			 * Called directly from mm/mmap.c, or drivers/char/mem.c
			 * for "/dev/zero", to create a shared anonymous object.
			 */
			if (IS_ERR(shm_mnt))
				return addr;
			sb = shm_mnt->mnt_sb;
		}
		if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
			return addr;
	}

	offset = (pgoff << PAGE_SHIFT) & (HPAGE_PMD_SIZE-1);
	if (offset && offset + len < 2 * HPAGE_PMD_SIZE)
		return addr;
	if ((addr & (HPAGE_PMD_SIZE-1)) == offset)
		return addr;

	inflated_len = len + HPAGE_PMD_SIZE - PAGE_SIZE;
	if (inflated_len > TASK_SIZE)
		return addr;
	if (inflated_len < len)
		return addr;

	inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
	if (IS_ERR_VALUE(inflated_addr))
		return addr;
	if (inflated_addr & ~PAGE_MASK)
		return addr;

	inflated_offset = inflated_addr & (HPAGE_PMD_SIZE-1);
	inflated_addr += offset - inflated_offset;
	if (inflated_offset > offset)
		inflated_addr += HPAGE_PMD_SIZE;

	if (inflated_addr > TASK_SIZE - len)
		return addr;
	return inflated_addr;
}

Linus Torvalds's avatar
Linus Torvalds committed
#ifdef CONFIG_NUMA
static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
Linus Torvalds's avatar
Linus Torvalds committed
{
Al Viro's avatar
Al Viro committed
	struct inode *inode = file_inode(vma->vm_file);
	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
					  unsigned long addr)
Linus Torvalds's avatar
Linus Torvalds committed
{
Al Viro's avatar
Al Viro committed
	struct inode *inode = file_inode(vma->vm_file);
	pgoff_t index;
Linus Torvalds's avatar
Linus Torvalds committed

	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
Linus Torvalds's avatar
Linus Torvalds committed
}
#endif

int shmem_lock(struct file *file, int lock, struct user_struct *user)
{
Al Viro's avatar
Al Viro committed
	struct inode *inode = file_inode(file);
Linus Torvalds's avatar
Linus Torvalds committed
	struct shmem_inode_info *info = SHMEM_I(inode);
	int retval = -ENOMEM;

	spin_lock_irq(&info->lock);
Linus Torvalds's avatar
Linus Torvalds committed
	if (lock && !(info->flags & VM_LOCKED)) {
		if (!user_shm_lock(inode->i_size, user))
			goto out_nomem;
		info->flags |= VM_LOCKED;
		mapping_set_unevictable(file->f_mapping);
Linus Torvalds's avatar
Linus Torvalds committed
	}
	if (!lock && (info->flags & VM_LOCKED) && user) {
		user_shm_unlock(inode->i_size, user);
		info->flags &= ~VM_LOCKED;
		mapping_clear_unevictable(file->f_mapping);
Linus Torvalds's avatar
Linus Torvalds committed
	}
	retval = 0;
Linus Torvalds's avatar
Linus Torvalds committed
out_nomem:
	spin_unlock_irq(&info->lock);
Linus Torvalds's avatar
Linus Torvalds committed
	return retval;
}

static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
Linus Torvalds's avatar
Linus Torvalds committed
{
	file_accessed(file);
	vma->vm_ops = &shmem_vm_ops;
	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
			((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
			(vma->vm_end & HPAGE_PMD_MASK)) {
		khugepaged_enter(vma, vma->vm_flags);
	}
Linus Torvalds's avatar
Linus Torvalds committed
	return 0;
}

static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
				     umode_t mode, dev_t dev, unsigned long flags)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct inode *inode;
	struct shmem_inode_info *info;
	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);

	if (shmem_reserve_inode(sb))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed

	inode = new_inode(sb);
	if (inode) {
		inode->i_ino = get_next_ino();
		inode_init_owner(inode, dir, mode);
Linus Torvalds's avatar
Linus Torvalds committed
		inode->i_blocks = 0;
		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
		inode->i_generation = prandom_u32();
Linus Torvalds's avatar
Linus Torvalds committed
		info = SHMEM_I(inode);
		memset(info, 0, (char *)inode - (char *)info);
		spin_lock_init(&info->lock);
David Herrmann's avatar
David Herrmann committed
		info->seals = F_SEAL_SEAL;
		info->flags = flags & VM_NORESERVE;
		INIT_LIST_HEAD(&info->shrinklist);
Linus Torvalds's avatar
Linus Torvalds committed
		INIT_LIST_HEAD(&info->swaplist);
		simple_xattrs_init(&info->xattrs);
		cache_no_acl(inode);
Linus Torvalds's avatar
Linus Torvalds committed

		switch (mode & S_IFMT) {
		default:
			inode->i_op = &shmem_special_inode_operations;
Linus Torvalds's avatar
Linus Torvalds committed
			init_special_inode(inode, mode, dev);
			break;
		case S_IFREG:
			inode->i_mapping->a_ops = &shmem_aops;
Linus Torvalds's avatar
Linus Torvalds committed
			inode->i_op = &shmem_inode_operations;
			inode->i_fop = &shmem_file_operations;
			mpol_shared_policy_init(&info->policy,
						 shmem_get_sbmpol(sbinfo));
Linus Torvalds's avatar
Linus Torvalds committed
			break;
		case S_IFDIR:
Linus Torvalds's avatar
Linus Torvalds committed
			/* Some things misbehave if size == 0 on a directory */
			inode->i_size = 2 * BOGO_DIRENT_SIZE;
			inode->i_op = &shmem_dir_inode_operations;
			inode->i_fop = &simple_dir_operations;
			break;
		case S_IFLNK:
			/*
			 * Must not load anything in the rbtree,
			 * mpol_free_shared_policy will not be called.
			 */
			mpol_shared_policy_init(&info->policy, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
			break;
		}

		lockdep_annotate_inode_mutex_key(inode);
	} else
		shmem_free_inode(sb);
Linus Torvalds's avatar
Linus Torvalds committed
	return inode;
}

bool shmem_mapping(struct address_space *mapping)
{
	return mapping->a_ops == &shmem_aops;
static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
				  pmd_t *dst_pmd,
				  struct vm_area_struct *dst_vma,
				  unsigned long dst_addr,
				  unsigned long src_addr,
				  bool zeropage,
				  struct page **pagep)
{
	struct inode *inode = file_inode(dst_vma->vm_file);
	struct shmem_inode_info *info = SHMEM_I(inode);
	struct address_space *mapping = inode->i_mapping;
	gfp_t gfp = mapping_gfp_mask(mapping);
	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
	struct mem_cgroup *memcg;
	spinlock_t *ptl;
	void *page_kaddr;
	struct page *page;
	pte_t _dst_pte, *dst_pte;
	int ret;

	if (!shmem_inode_acct_block(inode, 1))
		page = shmem_alloc_page(gfp, info, pgoff);
		if (!page)
			goto out_unacct_blocks;
		if (!zeropage) {	/* mcopy_atomic */
			page_kaddr = kmap_atomic(page);
			ret = copy_from_user(page_kaddr,
					     (const void __user *)src_addr,
					     PAGE_SIZE);
			kunmap_atomic(page_kaddr);

			/* fallback to copy_from_user outside mmap_sem */
			if (unlikely(ret)) {
				*pagep = page;
				shmem_inode_unacct_blocks(inode, 1);
				/* don't free the page */
			}
		} else {		/* mfill_zeropage_atomic */
			clear_highpage(page);
	VM_BUG_ON(PageLocked(page) || PageSwapBacked(page));
	__SetPageLocked(page);
	__SetPageSwapBacked(page);
	ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
	ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL,
						gfp & GFP_RECLAIM_MASK);
	if (ret)
		goto out_release_uncharge;

	mem_cgroup_commit_charge(page, memcg, false, false);

	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
	if (dst_vma->vm_flags & VM_WRITE)
		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));

	ret = -EEXIST;
	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
	if (!pte_none(*dst_pte))
		goto out_release_uncharge_unlock;

	lru_cache_add_anon(page);

	spin_lock(&info->lock);
	info->alloced++;
	inode->i_blocks += BLOCKS_PER_PAGE;
	shmem_recalc_inode(inode);
	spin_unlock(&info->lock);

	inc_mm_counter(dst_mm, mm_counter_file(page));
	page_add_file_rmap(page, false);
	set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);

	/* No need to invalidate - it was non-present before */
	update_mmu_cache(dst_vma, dst_addr, dst_pte);
	unlock_page(page);
	pte_unmap_unlock(dst_pte, ptl);
	ret = 0;
out:
	return ret;
out_release_uncharge_unlock:
	pte_unmap_unlock(dst_pte, ptl);
out_release_uncharge:
	mem_cgroup_cancel_charge(page, memcg, false);
out_release:
	shmem_inode_unacct_blocks(inode, 1);
int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
			   pmd_t *dst_pmd,
			   struct vm_area_struct *dst_vma,
			   unsigned long dst_addr,
			   unsigned long src_addr,
			   struct page **pagep)
{
	return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
				      dst_addr, src_addr, false, pagep);
}

int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
			     pmd_t *dst_pmd,
			     struct vm_area_struct *dst_vma,
			     unsigned long dst_addr)
{
	struct page *page = NULL;

	return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
				      dst_addr, 0, true, &page);
}

Linus Torvalds's avatar
Linus Torvalds committed
#ifdef CONFIG_TMPFS
static const struct inode_operations shmem_symlink_inode_operations;
static const struct inode_operations shmem_short_symlink_operations;
Linus Torvalds's avatar
Linus Torvalds committed

#ifdef CONFIG_TMPFS_XATTR
static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
#else
#define shmem_initxattrs NULL
#endif

Linus Torvalds's avatar
Linus Torvalds committed
static int
Nick Piggin's avatar
Nick Piggin committed
shmem_write_begin(struct file *file, struct address_space *mapping,
			loff_t pos, unsigned len, unsigned flags,
			struct page **pagep, void **fsdata)
Linus Torvalds's avatar
Linus Torvalds committed
{
Nick Piggin's avatar
Nick Piggin committed
	struct inode *inode = mapping->host;
David Herrmann's avatar
David Herrmann committed
	struct shmem_inode_info *info = SHMEM_I(inode);
David Herrmann's avatar
David Herrmann committed

	/* i_mutex is held by caller */
	if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) {
David Herrmann's avatar
David Herrmann committed
		if (info->seals & F_SEAL_WRITE)
			return -EPERM;
		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
			return -EPERM;
	}

	return shmem_getpage(inode, index, pagep, SGP_WRITE);
Nick Piggin's avatar
Nick Piggin committed
}

static int
shmem_write_end(struct file *file, struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = mapping->host;

	if (pos + copied > inode->i_size)
		i_size_write(inode, pos + copied);

	if (!PageUptodate(page)) {
		struct page *head = compound_head(page);
		if (PageTransCompound(page)) {
			int i;

			for (i = 0; i < HPAGE_PMD_NR; i++) {
				if (head + i == page)
					continue;
				clear_highpage(head + i);
				flush_dcache_page(head + i);
			}
		}
		if (copied < PAGE_SIZE) {
			unsigned from = pos & (PAGE_SIZE - 1);
			zero_user_segments(page, 0, from,
		SetPageUptodate(head);
Nick Piggin's avatar
Nick Piggin committed
	set_page_dirty(page);
Nick Piggin's avatar
Nick Piggin committed

	return copied;
Al Viro's avatar
Al Viro committed
static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file_inode(file);
Linus Torvalds's avatar
Linus Torvalds committed
	struct address_space *mapping = inode->i_mapping;
	pgoff_t index;
	unsigned long offset;
	enum sgp_type sgp = SGP_READ;
Al Viro's avatar
Al Viro committed
	ssize_t retval = 0;

	/*
	 * Might this read be for a stacking filesystem?  Then when reading
	 * holes of a sparse file, we actually need to allocate those pages,
	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
	 */
Al Viro's avatar
Al Viro committed
	if (!iter_is_iovec(to))
		sgp = SGP_CACHE;
Linus Torvalds's avatar
Linus Torvalds committed

	index = *ppos >> PAGE_SHIFT;
	offset = *ppos & ~PAGE_MASK;
Linus Torvalds's avatar
Linus Torvalds committed

	for (;;) {
		struct page *page = NULL;
		pgoff_t end_index;
		unsigned long nr, ret;
Linus Torvalds's avatar
Linus Torvalds committed
		loff_t i_size = i_size_read(inode);

Linus Torvalds's avatar
Linus Torvalds committed
		if (index > end_index)
			break;
		if (index == end_index) {
Linus Torvalds's avatar
Linus Torvalds committed
			if (nr <= offset)
				break;
		}

		error = shmem_getpage(inode, index, &page, sgp);
		if (error) {
			if (error == -EINVAL)
				error = 0;
Linus Torvalds's avatar
Linus Torvalds committed
			break;
		}
		if (page) {
			if (sgp == SGP_CACHE)
				set_page_dirty(page);
			unlock_page(page);
Linus Torvalds's avatar
Linus Torvalds committed

		/*
		 * We must evaluate after, since reads (unlike writes)
		 * are called without i_mutex protection against truncate
Linus Torvalds's avatar
Linus Torvalds committed
		 */
Linus Torvalds's avatar
Linus Torvalds committed
		i_size = i_size_read(inode);
Linus Torvalds's avatar
Linus Torvalds committed
		if (index == end_index) {
Linus Torvalds's avatar
Linus Torvalds committed
			if (nr <= offset) {
				if (page)
Linus Torvalds's avatar
Linus Torvalds committed
				break;
			}
		}
		nr -= offset;

		if (page) {
			/*
			 * If users can be writing to this page using arbitrary
			 * virtual addresses, take care about potential aliasing
			 * before reading the page on the kernel side.
			 */
			if (mapping_writably_mapped(mapping))
				flush_dcache_page(page);
			/*
			 * Mark the page accessed if we read the beginning.
			 */
			if (!offset)
				mark_page_accessed(page);
		} else {
Linus Torvalds's avatar
Linus Torvalds committed
			page = ZERO_PAGE(0);
Linus Torvalds's avatar
Linus Torvalds committed

		/*
		 * Ok, we have the page, and it's up-to-date, so
		 * now we can copy it to user space...
		 */
Al Viro's avatar
Al Viro committed
		ret = copy_page_to_iter(page, offset, nr, to);
Linus Torvalds's avatar
Linus Torvalds committed
		offset += ret;
		index += offset >> PAGE_SHIFT;
		offset &= ~PAGE_MASK;
Linus Torvalds's avatar
Linus Torvalds committed

Al Viro's avatar
Al Viro committed
		if (!iov_iter_count(to))
Linus Torvalds's avatar
Linus Torvalds committed
			break;
Linus Torvalds's avatar
Linus Torvalds committed
		cond_resched();
	}

	*ppos = ((loff_t) index << PAGE_SHIFT) + offset;
	file_accessed(file);
	return retval ? retval : error;
Matthew Wilcox's avatar
Matthew Wilcox committed
 * llseek SEEK_DATA or SEEK_HOLE through the page cache.
 */
static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
				    pgoff_t index, pgoff_t end, int whence)
{
	struct page *page;
	struct pagevec pvec;
	pgoff_t indices[PAGEVEC_SIZE];
	bool done = false;
	int i;

	pagevec_init(&pvec);
	pvec.nr = 1;		/* start small: we may be there already */
	while (!done) {
		pvec.nr = find_get_entries(mapping, index,
					pvec.nr, pvec.pages, indices);
		if (!pvec.nr) {
			if (whence == SEEK_DATA)
				index = end;
			break;
		}
		for (i = 0; i < pvec.nr; i++, index++) {
			if (index < indices[i]) {
				if (whence == SEEK_HOLE) {
					done = true;
					break;
				}
				index = indices[i];
			}
			page = pvec.pages[i];
			if (page && !xa_is_value(page)) {
				if (!PageUptodate(page))
					page = NULL;
			}
			if (index >= end ||
			    (page && whence == SEEK_DATA) ||
			    (!page && whence == SEEK_HOLE)) {
		pagevec_remove_exceptionals(&pvec);
		pagevec_release(&pvec);
		pvec.nr = PAGEVEC_SIZE;
		cond_resched();
	}
	return index;
}

static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
{
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	pgoff_t start, end;
	loff_t new_offset;

	if (whence != SEEK_DATA && whence != SEEK_HOLE)
		return generic_file_llseek_size(file, offset, whence,
					MAX_LFS_FILESIZE, i_size_read(inode));
Al Viro's avatar
Al Viro committed
	inode_lock(inode);
	/* We're holding i_mutex so we can access i_size directly */

	if (offset < 0 || offset >= inode->i_size)
		start = offset >> PAGE_SHIFT;
		end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
		new_offset = shmem_seek_hole_data(mapping, start, end, whence);
		if (new_offset > offset) {
			if (new_offset < inode->i_size)
				offset = new_offset;
			else if (whence == SEEK_DATA)
				offset = -ENXIO;
			else
				offset = inode->i_size;
		}
	}

	if (offset >= 0)
		offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
Al Viro's avatar
Al Viro committed
	inode_unlock(inode);
static long shmem_fallocate(struct file *file, int mode, loff_t offset,
							 loff_t len)
{
Al Viro's avatar
Al Viro committed
	struct inode *inode = file_inode(file);
	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
David Herrmann's avatar
David Herrmann committed
	struct shmem_inode_info *info = SHMEM_I(inode);
	struct shmem_falloc shmem_falloc;
	pgoff_t start, index, end;
	int error;
	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
		return -EOPNOTSUPP;

Al Viro's avatar
Al Viro committed
	inode_lock(inode);

	if (mode & FALLOC_FL_PUNCH_HOLE) {
		struct address_space *mapping = file->f_mapping;
		loff_t unmap_start = round_up(offset, PAGE_SIZE);
		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
David Herrmann's avatar
David Herrmann committed
		/* protected by i_mutex */
		if (info->seals & F_SEAL_WRITE) {
			error = -EPERM;
			goto out;
		}

		shmem_falloc.waitq = &shmem_falloc_waitq;
		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
		spin_lock(&inode->i_lock);
		inode->i_private = &shmem_falloc;
		spin_unlock(&inode->i_lock);

		if ((u64)unmap_end > (u64)unmap_start)
			unmap_mapping_range(mapping, unmap_start,
					    1 + unmap_end - unmap_start, 0);
		shmem_truncate_range(inode, offset, offset + len - 1);
		/* No need to unmap again: hole-punching leaves COWed pages */

		spin_lock(&inode->i_lock);
		inode->i_private = NULL;
		wake_up_all(&shmem_falloc_waitq);
		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
		spin_unlock(&inode->i_lock);
	}

	/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
	error = inode_newsize_ok(inode, offset + len);
	if (error)
		goto out;

David Herrmann's avatar
David Herrmann committed
	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
		error = -EPERM;
		goto out;
	}

	start = offset >> PAGE_SHIFT;
	end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	/* Try to avoid a swapstorm if len is impossible to satisfy */
	if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
		error = -ENOSPC;
		goto out;
	shmem_falloc.waitq = NULL;
	shmem_falloc.start = start;
	shmem_falloc.next  = start;
	shmem_falloc.nr_falloced = 0;
	shmem_falloc.nr_unswapped = 0;
	spin_lock(&inode->i_lock);
	inode->i_private = &shmem_falloc;
	spin_unlock(&inode->i_lock);

	for (index = start; index < end; index++) {
		struct page *page;

		/*
		 * Good, the fallocate(2) manpage permits EINTR: we may have
		 * been interrupted because we are using up too much memory.
		 */
		if (signal_pending(current))
			error = -EINTR;
		else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
			error = -ENOMEM;
			error = shmem_getpage(inode, index, &page, SGP_FALLOC);
		if (error) {
			/* Remove the !PageUptodate pages we added */
			if (index > start) {
				shmem_undo_range(inode,
				    (loff_t)start << PAGE_SHIFT,
				    ((loff_t)index << PAGE_SHIFT) - 1, true);
			}
		/*
		 * Inform shmem_writepage() how far we have reached.
		 * No need for lock or barrier: we have the page lock.
		 */
		shmem_falloc.next++;
		if (!PageUptodate(page))
			shmem_falloc.nr_falloced++;

		 * If !PageUptodate, leave it that way so that freeable pages
		 * can be recognized if we need to rollback on error later.
		 * But set_page_dirty so that memory pressure will swap rather
		 * than free the pages we are allocating (and SGP_CACHE pages
		 * might still be clean: we now need to mark those dirty too).
		 */
		set_page_dirty(page);
		unlock_page(page);
		cond_resched();
	}

	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
		i_size_write(inode, offset + len);
	inode->i_ctime = current_time(inode);
undone:
	spin_lock(&inode->i_lock);
	inode->i_private = NULL;
	spin_unlock(&inode->i_lock);
Al Viro's avatar
Al Viro committed
	inode_unlock(inode);
static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
Linus Torvalds's avatar
Linus Torvalds committed

	buf->f_type = TMPFS_MAGIC;
Linus Torvalds's avatar
Linus Torvalds committed
	buf->f_namelen = NAME_MAX;
	if (sbinfo->max_blocks) {
Linus Torvalds's avatar
Linus Torvalds committed
		buf->f_blocks = sbinfo->max_blocks;
		buf->f_bavail =
		buf->f_bfree  = sbinfo->max_blocks -
				percpu_counter_sum(&sbinfo->used_blocks);
	}
	if (sbinfo->max_inodes) {
Linus Torvalds's avatar
Linus Torvalds committed
		buf->f_files = sbinfo->max_inodes;
		buf->f_ffree = sbinfo->free_inodes;
	}
	/* else leave those fields 0 like simple_statfs */
	return 0;
}

/*
 * File creation. Allocate an inode, and we're done..
 */
static int
Al Viro's avatar
Al Viro committed
shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct inode *inode;
Linus Torvalds's avatar
Linus Torvalds committed
	int error = -ENOSPC;

	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
Linus Torvalds's avatar
Linus Torvalds committed
	if (inode) {
		error = simple_acl_create(dir, inode);
		if (error)
			goto out_iput;
		error = security_inode_init_security(inode, dir,
						     shmem_initxattrs, NULL);
		if (error && error != -EOPNOTSUPP)
			goto out_iput;
Al Viro's avatar
Al Viro committed
		error = 0;
Linus Torvalds's avatar
Linus Torvalds committed
		dir->i_size += BOGO_DIRENT_SIZE;
		dir->i_ctime = dir->i_mtime = current_time(dir);
Linus Torvalds's avatar
Linus Torvalds committed
		d_instantiate(dentry, inode);
		dget(dentry); /* Extra count - pin the dentry in core */
	}
	return error;
out_iput:
	iput(inode);
	return error;
static int
shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
{
	struct inode *inode;
	int error = -ENOSPC;

	inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
	if (inode) {
		error = security_inode_init_security(inode, dir,
						     NULL,
						     shmem_initxattrs, NULL);
		if (error && error != -EOPNOTSUPP)
			goto out_iput;
		error = simple_acl_create(dir, inode);
		if (error)
			goto out_iput;
out_iput:
	iput(inode);
	return error;
static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
Linus Torvalds's avatar
Linus Torvalds committed
{
	int error;

	if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
		return error;
Linus Torvalds's avatar
Linus Torvalds committed
	return 0;
}

Al Viro's avatar
Al Viro committed
static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
		bool excl)
Linus Torvalds's avatar
Linus Torvalds committed
{
	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
}

/*
 * Link a file..
 */
static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
	struct inode *inode = d_inode(old_dentry);
Linus Torvalds's avatar
Linus Torvalds committed

	/*
	 * No ordinary (disk based) filesystem counts links as inodes;
	 * but each new link needs a new dentry, pinning lowmem, and
	 * tmpfs dentries cannot be pruned until they are unlinked.
	 */
	ret = shmem_reserve_inode(inode->i_sb);
	if (ret)
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed

	dir->i_size += BOGO_DIRENT_SIZE;
	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
Al Viro's avatar
Al Viro committed
	ihold(inode);	/* New dentry reference */
Linus Torvalds's avatar
Linus Torvalds committed
	dget(dentry);		/* Extra pinning count for the created dentry */
	d_instantiate(dentry, inode);
Linus Torvalds's avatar
Linus Torvalds committed
}

static int shmem_unlink(struct inode *dir, struct dentry *dentry)
{
	struct inode *inode = d_inode(dentry);
Linus Torvalds's avatar
Linus Torvalds committed

	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
		shmem_free_inode(inode->i_sb);
Linus Torvalds's avatar
Linus Torvalds committed

	dir->i_size -= BOGO_DIRENT_SIZE;
	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
	drop_nlink(inode);
Linus Torvalds's avatar
Linus Torvalds committed
	dput(dentry);	/* Undo the count from "create" - this does all the work */
	return 0;
}

static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{
	if (!simple_empty(dentry))
		return -ENOTEMPTY;

	drop_nlink(d_inode(dentry));
Linus Torvalds's avatar
Linus Torvalds committed
	return shmem_unlink(dir, dentry);
}

static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
{
	bool old_is_dir = d_is_dir(old_dentry);
	bool new_is_dir = d_is_dir(new_dentry);

	if (old_dir != new_dir && old_is_dir != new_is_dir) {
		if (old_is_dir) {
			drop_nlink(old_dir);
			inc_nlink(new_dir);
		} else {
			drop_nlink(new_dir);
			inc_nlink(old_dir);
		}
	}
	old_dir->i_ctime = old_dir->i_mtime =
	new_dir->i_ctime = new_dir->i_mtime =
	d_inode(old_dentry)->i_ctime =
	d_inode(new_dentry)->i_ctime = current_time(old_dir);
static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
{
	struct dentry *whiteout;
	int error;

	whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
	if (!whiteout)
		return -ENOMEM;

	error = shmem_mknod(old_dir, whiteout,
			    S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
	dput(whiteout);
	if (error)
		return error;

	/*
	 * Cheat and hash the whiteout while the old dentry is still in
	 * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
	 *
	 * d_lookup() will consistently find one of them at this point,
	 * not sure which one, but that isn't even important.
	 */
	d_rehash(whiteout);
	return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
/*
 * The VFS layer already does all the dentry stuff for rename,
 * we just have to decrement the usage count for the target if
 * it exists so that the VFS layer correctly free's it when it
 * gets overwritten.
 */
static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct inode *inode = d_inode(old_dentry);
Linus Torvalds's avatar
Linus Torvalds committed
	int they_are_dirs = S_ISDIR(inode->i_mode);

	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
		return -EINVAL;

	if (flags & RENAME_EXCHANGE)
		return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);

Linus Torvalds's avatar
Linus Torvalds committed
	if (!simple_empty(new_dentry))
		return -ENOTEMPTY;

	if (flags & RENAME_WHITEOUT) {
		int error;

		error = shmem_whiteout(old_dir, old_dentry);
		if (error)
			return error;
	}

	if (d_really_is_positive(new_dentry)) {
Linus Torvalds's avatar
Linus Torvalds committed
		(void) shmem_unlink(new_dir, new_dentry);
		if (they_are_dirs) {
			drop_nlink(d_inode(new_dentry));
			drop_nlink(old_dir);
Linus Torvalds's avatar
Linus Torvalds committed
	} else if (they_are_dirs) {
		drop_nlink(old_dir);
Linus Torvalds's avatar
Linus Torvalds committed
	}

	old_dir->i_size -= BOGO_DIRENT_SIZE;
	new_dir->i_size += BOGO_DIRENT_SIZE;
	old_dir->i_ctime = old_dir->i_mtime =
	new_dir->i_ctime = new_dir->i_mtime =
	inode->i_ctime = current_time(old_dir);
Linus Torvalds's avatar
Linus Torvalds committed
	return 0;
}

static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
	int error;
	int len;
	struct inode *inode;
	struct page *page;
Linus Torvalds's avatar
Linus Torvalds committed

	len = strlen(symname) + 1;
Linus Torvalds's avatar
Linus Torvalds committed
		return -ENAMETOOLONG;

	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK | 0777, 0,
				VM_NORESERVE);
Linus Torvalds's avatar
Linus Torvalds committed
	if (!inode)
		return -ENOSPC;

	error = security_inode_init_security(inode, dir, &dentry->d_name,
					     shmem_initxattrs, NULL);
	if (error) {
		if (error != -EOPNOTSUPP) {
			iput(inode);
			return error;
		}
		error = 0;
	}

Linus Torvalds's avatar
Linus Torvalds committed
	inode->i_size = len-1;
	if (len <= SHORT_SYMLINK_LEN) {
		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
		if (!inode->i_link) {
			iput(inode);
			return -ENOMEM;
		}
		inode->i_op = &shmem_short_symlink_operations;
Linus Torvalds's avatar
Linus Torvalds committed
	} else {
		error = shmem_getpage(inode, 0, &page, SGP_WRITE);
Linus Torvalds's avatar
Linus Torvalds committed
		if (error) {
			iput(inode);