Commit 87d59863 authored by Heiko Carstens's avatar Heiko Carstens
Browse files

s390/mm: remove set_fs / rework address space handling



Remove set_fs support from s390. With doing this rework address space
handling and simplify it. As a result address spaces are now setup
like this:

CPU running in              | %cr1 ASCE | %cr7 ASCE | %cr13 ASCE
----------------------------|-----------|-----------|-----------
user space                  |  user     |  user     |  kernel
kernel, normal execution    |  kernel   |  user     |  kernel
kernel, kvm guest execution |  gmap     |  user     |  kernel

To achieve this the getcpu vdso syscall is removed in order to avoid
secondary address mode and a separate vdso address space in for user
space. The getcpu vdso syscall will be implemented differently with a
subsequent patch.

The kernel accesses user space always via secondary address space.
This happens in different ways:
- with mvcos in home space mode and directly read/write to secondary
  address space
- with mvcs/mvcp in primary space mode and copy from primary space to
  secondary space or vice versa
- with e.g. cs in secondary space mode and access secondary space

Switching translation modes happens with sacf before and after
instructions which access user space, like before.

Lazy handling of control register reloading is removed in the hope to
make everything simpler, but at the cost of making kernel entry and
exit a bit slower. That is: on kernel entry the primary asce is always
changed to contain the kernel asce, and on kernel exit the primary
asce is changed again so it contains the user asce.

In kernel mode there is only one exception to the primary asce: when
kvm guests are executed the primary asce contains the gmap asce (which
describes the guest address space). The primary asce is reset to
kernel asce whenever kvm guest execution is interrupted, so that this
doesn't has to be taken into account for any user space accesses.

Reviewed-by: default avatarSven Schnelle <svens@linux.ibm.com>
Signed-off-by: default avatarHeiko Carstens <hca@linux.ibm.com>
parent 77663819
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -190,7 +190,6 @@ config S390
	select PCI_DOMAINS		if PCI
	select PCI_MSI			if PCI
	select PCI_MSI_ARCH_FALLBACKS	if PCI_MSI
	select SET_FS
	select SPARSE_IRQ
	select SYSCTL_EXCEPTION_TRACE
	select THREAD_INFO_IN_TASK
+0 −6
Original line number Diff line number Diff line
@@ -26,9 +26,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
		u32 __user *uaddr)
{
	int oldval = 0, newval, ret;
	mm_segment_t old_fs;

	old_fs = enable_sacf_uaccess();
	switch (op) {
	case FUTEX_OP_SET:
		__futex_atomic_op("lr %2,%5\n",
@@ -53,7 +51,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
	default:
		ret = -ENOSYS;
	}
	disable_sacf_uaccess(old_fs);

	if (!ret)
		*oval = oldval;
@@ -64,10 +61,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
						u32 oldval, u32 newval)
{
	mm_segment_t old_fs;
	int ret;

	old_fs = enable_sacf_uaccess();
	asm volatile(
		"   sacf 256\n"
		"0: cs   %1,%4,0(%5)\n"
@@ -77,7 +72,6 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
		: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
		: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
		: "cc", "memory");
	disable_sacf_uaccess(old_fs);
	*uval = oldval;
	return ret;
}
+2 −2
Original line number Diff line number Diff line
@@ -116,7 +116,7 @@ struct lowcore {
	/* Address space pointer. */
	__u64	kernel_asce;			/* 0x0380 */
	__u64	user_asce;			/* 0x0388 */
	__u64	vdso_asce;			/* 0x0390 */
	__u8	pad_0x0390[0x0398-0x0390];	/* 0x0390 */

	/*
	 * The lpp and current_pid fields form a
@@ -134,7 +134,7 @@ struct lowcore {
	__u32	spinlock_index;			/* 0x03b0 */
	__u32	fpu_flags;			/* 0x03b4 */
	__u64	percpu_offset;			/* 0x03b8 */
	__u64	vdso_per_cpu_data;		/* 0x03c0 */
	__u8	pad_0x03c0[0x03c8-0x03c0];	/* 0x03c0 */
	__u64	machine_flags;			/* 0x03c8 */
	__u64	gmap;				/* 0x03d0 */
	__u8	pad_0x03d8[0x0400-0x03d8];	/* 0x03d8 */
+4 −21
Original line number Diff line number Diff line
@@ -71,16 +71,6 @@ static inline int init_new_context(struct task_struct *tsk,

#define destroy_context(mm)             do { } while (0)

static inline void set_user_asce(struct mm_struct *mm)
{
	S390_lowcore.user_asce = mm->context.asce;
	__ctl_load(S390_lowcore.user_asce, 1, 1);
	clear_cpu_flag(CIF_ASCE_PRIMARY);
}

mm_segment_t enable_sacf_uaccess(void);
void disable_sacf_uaccess(mm_segment_t old_fs);

static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
			     struct task_struct *tsk)
{
@@ -88,15 +78,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,

	S390_lowcore.user_asce = next->context.asce;
	cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
	/* Clear previous user-ASCE from CR1 and CR7 */
	if (!test_cpu_flag(CIF_ASCE_PRIMARY)) {
		__ctl_load(S390_lowcore.kernel_asce, 1, 1);
		set_cpu_flag(CIF_ASCE_PRIMARY);
	}
	if (test_cpu_flag(CIF_ASCE_SECONDARY)) {
		__ctl_load(S390_lowcore.vdso_asce, 7, 7);
		clear_cpu_flag(CIF_ASCE_SECONDARY);
	}
	/* Clear previous user-ASCE from CR7 */
	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
	if (prev != next)
		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
}
@@ -115,7 +98,7 @@ static inline void finish_arch_post_lock_switch(void)
		__tlb_flush_mm_lazy(mm);
		preempt_enable();
	}
	set_fs(current->thread.mm_segment);
	__ctl_load(S390_lowcore.user_asce, 7, 7);
}

#define enter_lazy_tlb(mm,tsk)	do { } while (0)
@@ -126,7 +109,7 @@ static inline void activate_mm(struct mm_struct *prev,
{
	switch_mm(prev, next, current);
	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
	set_user_asce(next);
	__ctl_load(S390_lowcore.user_asce, 7, 7);
}

#endif /* __S390_MMU_CONTEXT_H */
+0 −7
Original line number Diff line number Diff line
@@ -14,8 +14,6 @@

#include <linux/bits.h>

#define CIF_ASCE_PRIMARY	0	/* primary asce needs fixup / uaccess */
#define CIF_ASCE_SECONDARY	1	/* secondary asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
#define CIF_FPU			3	/* restore FPU registers */
#define CIF_IGNORE_IRQ		4	/* ignore interrupt (for udelay) */
@@ -23,8 +21,6 @@
#define CIF_MCCK_GUEST		6	/* machine check happening in guest */
#define CIF_DEDICATED_CPU	7	/* this CPU is dedicated */

#define _CIF_ASCE_PRIMARY	BIT(CIF_ASCE_PRIMARY)
#define _CIF_ASCE_SECONDARY	BIT(CIF_ASCE_SECONDARY)
#define _CIF_NOHZ_DELAY		BIT(CIF_NOHZ_DELAY)
#define _CIF_FPU		BIT(CIF_FPU)
#define _CIF_IGNORE_IRQ		BIT(CIF_IGNORE_IRQ)
@@ -102,8 +98,6 @@ extern void __bpon(void);

#define HAVE_ARCH_PICK_MMAP_LAYOUT

typedef unsigned int mm_segment_t;

/*
 * Thread structure
 */
@@ -116,7 +110,6 @@ struct thread_struct {
	unsigned long hardirq_timer;	/* task cputime in hardirq context */
	unsigned long softirq_timer;	/* task cputime in softirq context */
	unsigned long sys_call_table;	/* system call table address */
	mm_segment_t mm_segment;
	unsigned long gmap_addr;	/* address of last gmap fault. */
	unsigned int gmap_write_flag;	/* gmap fault write indication */
	unsigned int gmap_int_code;	/* int code of last gmap fault */
Loading