Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corporation, 2021
*
* Author: Mike Rapoport <rppt@linux.ibm.com>
*/
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/swap.h>
#include <linux/mount.h>
#include <linux/memfd.h>
#include <linux/bitops.h>
#include <linux/printk.h>
#include <linux/pagemap.h>
#include <linux/syscalls.h>
#include <linux/pseudo_fs.h>
#include <linux/secretmem.h>
#include <linux/set_memory.h>
#include <linux/sched/signal.h>
#include <uapi/linux/magic.h>
#include <asm/tlbflush.h>
#include "internal.h"
#undef pr_fmt
#define pr_fmt(fmt) "secretmem: " fmt
/*
* Define mode and flag masks to allow validation of the system call
* parameters.
*/
#define SECRETMEM_MODE_MASK (0x0)
#define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK
static bool secretmem_enable __ro_after_init;
module_param_named(enable, secretmem_enable, bool, 0400);
MODULE_PARM_DESC(secretmem_enable,
"Enable secretmem and memfd_secret(2) system call");
static atomic_t secretmem_users;
bool secretmem_active(void)
{
return !!atomic_read(&secretmem_users);
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
static vm_fault_t secretmem_fault(struct vm_fault *vmf)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
struct inode *inode = file_inode(vmf->vma->vm_file);
pgoff_t offset = vmf->pgoff;
gfp_t gfp = vmf->gfp_mask;
unsigned long addr;
struct page *page;
int err;
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
return vmf_error(-EINVAL);
retry:
page = find_lock_page(mapping, offset);
if (!page) {
page = alloc_page(gfp | __GFP_ZERO);
if (!page)
return VM_FAULT_OOM;
err = set_direct_map_invalid_noflush(page);
if (err) {
put_page(page);
return vmf_error(err);
}
__SetPageUptodate(page);
err = add_to_page_cache_lru(page, mapping, offset, gfp);
if (unlikely(err)) {
put_page(page);
/*
* If a split of large page was required, it
* already happened when we marked the page invalid
* which guarantees that this call won't fail
*/
set_direct_map_default_noflush(page);
if (err == -EEXIST)
goto retry;
return vmf_error(err);
}
addr = (unsigned long)page_address(page);
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
}
vmf->page = page;
return VM_FAULT_LOCKED;
}
static const struct vm_operations_struct secretmem_vm_ops = {
.fault = secretmem_fault,
};
static int secretmem_release(struct inode *inode, struct file *file)
{
atomic_dec(&secretmem_users);
return 0;
}
static int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
{
unsigned long len = vma->vm_end - vma->vm_start;
if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
return -EINVAL;
if (mlock_future_check(vma->vm_mm, vma->vm_flags | VM_LOCKED, len))
return -EAGAIN;
vma->vm_flags |= VM_LOCKED | VM_DONTDUMP;
vma->vm_ops = &secretmem_vm_ops;
return 0;
}
bool vma_is_secretmem(struct vm_area_struct *vma)
{
return vma->vm_ops == &secretmem_vm_ops;
}
static const struct file_operations secretmem_fops = {
.release = secretmem_release,
.mmap = secretmem_mmap,
};
static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode)
{
return false;
}
static int secretmem_migratepage(struct address_space *mapping,
struct page *newpage, struct page *page,
enum migrate_mode mode)
{
return -EBUSY;
}
static void secretmem_freepage(struct page *page)
{
set_direct_map_default_noflush(page);
clear_highpage(page);
}
const struct address_space_operations secretmem_aops = {
.set_page_dirty = __set_page_dirty_no_writeback,
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
.freepage = secretmem_freepage,
.migratepage = secretmem_migratepage,
.isolate_page = secretmem_isolate_page,
};
static struct vfsmount *secretmem_mnt;
static struct file *secretmem_file_create(unsigned long flags)
{
struct file *file = ERR_PTR(-ENOMEM);
struct inode *inode;
inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
if (IS_ERR(inode))
return ERR_CAST(inode);
file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
O_RDWR, &secretmem_fops);
if (IS_ERR(file))
goto err_free_inode;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
mapping_set_unevictable(inode->i_mapping);
inode->i_mapping->a_ops = &secretmem_aops;
/* pretend we are a normal file with zero size */
inode->i_mode |= S_IFREG;
inode->i_size = 0;
return file;
err_free_inode:
iput(inode);
return file;
}
SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
{
struct file *file;
int fd, err;
/* make sure local flags do not confict with global fcntl.h */
BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
if (!secretmem_enable)
return -ENOSYS;
if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
return -EINVAL;
if (atomic_read(&secretmem_users) < 0)
return -ENFILE;
fd = get_unused_fd_flags(flags & O_CLOEXEC);
if (fd < 0)
return fd;
file = secretmem_file_create(flags);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto err_put_fd;
}
file->f_flags |= O_LARGEFILE;
atomic_inc(&secretmem_users);
fd_install(fd, file);
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
return fd;
err_put_fd:
put_unused_fd(fd);
return err;
}
static int secretmem_init_fs_context(struct fs_context *fc)
{
return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
}
static struct file_system_type secretmem_fs = {
.name = "secretmem",
.init_fs_context = secretmem_init_fs_context,
.kill_sb = kill_anon_super,
};
static int secretmem_init(void)
{
int ret = 0;
if (!secretmem_enable)
return ret;
secretmem_mnt = kern_mount(&secretmem_fs);
if (IS_ERR(secretmem_mnt))
ret = PTR_ERR(secretmem_mnt);
/* prevent secretmem mappings from ever getting PROT_EXEC */
secretmem_mnt->mnt_flags |= MNT_NOEXEC;
return ret;
}
fs_initcall(secretmem_init);