Commit 0fd2e9c5 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge commit 'upstream-x86-entry' into WIP.x86/mm


Pull in a minimal set of v4.15 entry code changes, for a base for the MM isolation patches.
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 1784f914 1e4c4f61
Showing with 186 additions and 187 deletions
+186 -187
......@@ -4,7 +4,7 @@ ORC unwinder
Overview
--------
The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
similar in concept to a DWARF unwinder. The difference is that the
format of the ORC data is much simpler than DWARF, which in turn allows
the ORC unwinder to be much simpler and faster.
......
......@@ -934,8 +934,8 @@ ifdef CONFIG_STACK_VALIDATION
ifeq ($(has_libelf),1)
objtool_target := tools/objtool FORCE
else
ifdef CONFIG_ORC_UNWINDER
$(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
ifdef CONFIG_UNWINDER_ORC
$(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
else
$(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
endif
......
......@@ -171,7 +171,7 @@ config X86
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
select HAVE_STACK_VALIDATION if X86_64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
......
......@@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG
choice
prompt "Choose kernel unwinder"
default FRAME_POINTER_UNWINDER
default UNWINDER_ORC if X86_64
default UNWINDER_FRAME_POINTER if X86_32
---help---
This determines which method will be used for unwinding kernel stack
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
livepatch, lockdep, and more.
config FRAME_POINTER_UNWINDER
bool "Frame pointer unwinder"
select FRAME_POINTER
---help---
This option enables the frame pointer unwinder for unwinding kernel
stack traces.
The unwinder itself is fast and it uses less RAM than the ORC
unwinder, but the kernel text size will grow by ~3% and the kernel's
overall performance will degrade by roughly 5-10%.
This option is recommended if you want to use the livepatch
consistency model, as this is currently the only way to get a
reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
config ORC_UNWINDER
config UNWINDER_ORC
bool "ORC unwinder"
depends on X86_64
select STACK_VALIDATION
......@@ -396,7 +382,22 @@ config ORC_UNWINDER
Enabling this option will increase the kernel's runtime memory usage
by roughly 2-4MB, depending on your kernel config.
config GUESS_UNWINDER
config UNWINDER_FRAME_POINTER
bool "Frame pointer unwinder"
select FRAME_POINTER
---help---
This option enables the frame pointer unwinder for unwinding kernel
stack traces.
The unwinder itself is fast and it uses less RAM than the ORC
unwinder, but the kernel text size will grow by ~3% and the kernel's
overall performance will degrade by roughly 5-10%.
This option is recommended if you want to use the livepatch
consistency model, as this is currently the only way to get a
reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
---help---
......@@ -411,7 +412,7 @@ config GUESS_UNWINDER
endchoice
config FRAME_POINTER
depends on !ORC_UNWINDER && !GUESS_UNWINDER
depends on !UNWINDER_ORC && !UNWINDER_GUESS
bool
endmenu
CONFIG_NOHIGHMEM=y
# CONFIG_HIGHMEM4G is not set
# CONFIG_HIGHMEM64G is not set
CONFIG_GUESS_UNWINDER=y
# CONFIG_FRAME_POINTER_UNWINDER is not set
CONFIG_UNWINDER_GUESS=y
# CONFIG_UNWINDER_FRAME_POINTER is not set
......@@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
CONFIG_UNWINDER_ORC=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
......
......@@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with
UNWIND_HINT_REGS offset=\offset
.endm
.macro RESTORE_EXTRA_REGS offset=0
movq 0*8+\offset(%rsp), %r15
movq 1*8+\offset(%rsp), %r14
movq 2*8+\offset(%rsp), %r13
movq 3*8+\offset(%rsp), %r12
movq 4*8+\offset(%rsp), %rbp
movq 5*8+\offset(%rsp), %rbx
UNWIND_HINT_REGS offset=\offset extra=0
.endm
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11
movq 6*8(%rsp), %r11
.endif
.if \rstor_r8910
movq 7*8(%rsp), %r10
movq 8*8(%rsp), %r9
movq 9*8(%rsp), %r8
.endif
.if \rstor_rax
movq 10*8(%rsp), %rax
.endif
.if \rstor_rcx
movq 11*8(%rsp), %rcx
.endif
.if \rstor_rdx
movq 12*8(%rsp), %rdx
.endif
movq 13*8(%rsp), %rsi
movq 14*8(%rsp), %rdi
UNWIND_HINT_IRET_REGS offset=16*8
.endm
.macro RESTORE_C_REGS
RESTORE_C_REGS_HELPER 1,1,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RAX
RESTORE_C_REGS_HELPER 0,1,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RCX
RESTORE_C_REGS_HELPER 1,0,1,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_R11
RESTORE_C_REGS_HELPER 1,1,0,1,1
.endm
.macro RESTORE_C_REGS_EXCEPT_RCX_R11
RESTORE_C_REGS_HELPER 1,0,0,1,1
.endm
.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
subq $-(15*8+\addskip), %rsp
.macro POP_EXTRA_REGS
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.endm
.macro POP_C_REGS
popq %r11
popq %r10
popq %r9
popq %r8
popq %rax
popq %rcx
popq %rdx
popq %rsi
popq %rdi
.endm
.macro icebp
......
......@@ -221,10 +221,9 @@ entry_SYSCALL_64_fastpath:
TRACE_IRQS_ON /* user mode is traced as IRQs on */
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
addq $6*8, %rsp /* skip extra regs -- they were preserved */
UNWIND_HINT_EMPTY
USERGS_SYSRET64
jmp .Lpop_c_regs_except_rcx_r11_and_sysret
1:
/*
......@@ -246,17 +245,18 @@ entry_SYSCALL64_slow_path:
call do_syscall_64 /* returns with IRQs disabled */
return_from_SYSCALL_64:
RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */
/*
* Try to use SYSRET instead of IRET if we're returning to
* a completely clean 64-bit userspace context.
* a completely clean 64-bit userspace context. If we're not,
* go to the slow exit path.
*/
movq RCX(%rsp), %rcx
movq RIP(%rsp), %r11
cmpq %rcx, %r11 /* RCX == RIP */
jne opportunistic_sysret_failed
cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
jne swapgs_restore_regs_and_return_to_usermode
/*
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
......@@ -274,14 +274,14 @@ return_from_SYSCALL_64:
/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
jne opportunistic_sysret_failed
jne swapgs_restore_regs_and_return_to_usermode
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
jne opportunistic_sysret_failed
jne swapgs_restore_regs_and_return_to_usermode
movq R11(%rsp), %r11
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
jne opportunistic_sysret_failed
jne swapgs_restore_regs_and_return_to_usermode
/*
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
......@@ -302,12 +302,12 @@ return_from_SYSCALL_64:
* would never get past 'stuck_here'.
*/
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
jnz opportunistic_sysret_failed
jnz swapgs_restore_regs_and_return_to_usermode
/* nothing to check for RSP */
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
jne opportunistic_sysret_failed
jne swapgs_restore_regs_and_return_to_usermode
/*
* We win! This label is here just for ease of understanding
......@@ -315,14 +315,20 @@ return_from_SYSCALL_64:
*/
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
UNWIND_HINT_EMPTY
POP_EXTRA_REGS
.Lpop_c_regs_except_rcx_r11_and_sysret:
popq %rsi /* skip r11 */
popq %r10
popq %r9
popq %r8
popq %rax
popq %rsi /* skip rcx */
popq %rdx
popq %rsi
popq %rdi
movq RSP-ORIG_RAX(%rsp), %rsp
USERGS_SYSRET64
opportunistic_sysret_failed:
SWAPGS
jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
ENTRY(stub_ptregs_64)
......@@ -423,8 +429,7 @@ ENTRY(ret_from_fork)
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
SWAPGS
jmp restore_regs_and_iret
jmp swapgs_restore_regs_and_return_to_usermode
1:
/* kernel thread */
......@@ -612,8 +617,21 @@ GLOBAL(retint_user)
mov %rsp,%rdi
call prepare_exit_to_usermode
TRACE_IRQS_IRETQ
GLOBAL(swapgs_restore_regs_and_return_to_usermode)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
jnz 1f
ud2
1:
#endif
SWAPGS
jmp restore_regs_and_iret
POP_EXTRA_REGS
POP_C_REGS
addq $8, %rsp /* skip regs->orig_ax */
INTERRUPT_RETURN
/* Returning to kernel space */
retint_kernel:
......@@ -633,15 +651,17 @@ retint_kernel:
*/
TRACE_IRQS_IRETQ
/*
* At this label, code paths which return to kernel and to user,
* which come from interrupts/exception and from syscalls, merge.
*/
GLOBAL(restore_regs_and_iret)
RESTORE_EXTRA_REGS
restore_c_regs_and_iret:
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
GLOBAL(restore_regs_and_return_to_kernel)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates kernel mode. */
testb $3, CS(%rsp)
jz 1f
ud2
1:
#endif
POP_EXTRA_REGS
POP_C_REGS
addq $8, %rsp /* skip regs->orig_ax */
INTERRUPT_RETURN
ENTRY(native_iret)
......@@ -818,7 +838,7 @@ ENTRY(\sym)
ASM_CLAC
.ifeq \has_error_code
.if \has_error_code == 0
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
......@@ -1059,6 +1079,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
idtentry xennmi do_nmi has_error_code=0
idtentry xendebug do_debug has_error_code=0
idtentry xenint3 do_int3 has_error_code=0
#endif
......@@ -1112,17 +1133,14 @@ ENTRY(paranoid_exit)
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF_DEBUG
testl %ebx, %ebx /* swapgs needed? */
jnz paranoid_exit_no_swapgs
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
SWAPGS_UNSAFE_STACK
jmp paranoid_exit_restore
paranoid_exit_no_swapgs:
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
paranoid_exit_restore:
RESTORE_EXTRA_REGS
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
INTERRUPT_RETURN
.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
END(paranoid_exit)
/*
......@@ -1223,10 +1241,13 @@ ENTRY(error_exit)
jmp retint_user
END(error_exit)
/* Runs on exception stack */
/* XXX: broken on Xen PV */
/*
* Runs on exception stack. Xen PV does not go through this path at all,
* so we can use real assembly here.
*/
ENTRY(nmi)
UNWIND_HINT_IRET_REGS
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
......@@ -1284,7 +1305,7 @@ ENTRY(nmi)
* stacks lest we corrupt the "NMI executing" variable.
*/
SWAPGS_UNSAFE_STACK
swapgs
cld
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
......@@ -1328,8 +1349,7 @@ ENTRY(nmi)
* Return back to user mode. We must *not* do the normal exit
* work, because we don't want to enable interrupts.
*/
SWAPGS
jmp restore_regs_and_iret
jmp swapgs_restore_regs_and_return_to_usermode
.Lnmi_from_kernel:
/*
......@@ -1450,7 +1470,7 @@ nested_nmi_out:
popq %rdx
/* We are returning to kernel mode, so this cannot result in a fault. */
INTERRUPT_RETURN
iretq
first_nmi:
/* Restore rdx. */
......@@ -1481,7 +1501,7 @@ first_nmi:
pushfq /* RFLAGS */
pushq $__KERNEL_CS /* CS */
pushq $1f /* RIP */
INTERRUPT_RETURN /* continues at repeat_nmi below */
iretq /* continues at repeat_nmi below */
UNWIND_HINT_IRET_REGS
1:
#endif
......@@ -1544,29 +1564,34 @@ end_repeat_nmi:
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
RESTORE_EXTRA_REGS
RESTORE_C_REGS
POP_EXTRA_REGS
POP_C_REGS
/* Point RSP at the "iret" frame. */
REMOVE_PT_GPREGS_FROM_STACK 6*8
/*
* Skip orig_ax and the "outermost" frame to point RSP at the "iret"
* at the "iret" frame.
*/
addq $6*8, %rsp
/*
* Clear "NMI executing". Set DF first so that we can easily
* distinguish the remaining code between here and IRET from
* the SYSCALL entry and exit paths. On a native kernel, we
* could just inspect RIP, but, on paravirt kernels,
* INTERRUPT_RETURN can translate into a jump into a
* hypercall page.
* the SYSCALL entry and exit paths.
*
* We arguably should just inspect RIP instead, but I (Andy) wrote
* this code when I had the misapprehension that Xen PV supported
* NMIs, and Xen PV would break that approach.
*/
std
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
* INTERRUPT_RETURN reads the "iret" frame and exits the NMI
* stack in a single instruction. We are returning to kernel
* mode, so this cannot result in a fault.
* iretq reads the "iret" frame and exits the NMI stack in a
* single instruction. We are returning to kernel mode, so this
* cannot result in a fault. Similarly, we don't need to worry
* about espfix64 on the way back to kernel mode.
*/
INTERRUPT_RETURN
iretq
END(nmi)
ENTRY(ignore_sysret)
......
......@@ -337,8 +337,7 @@ ENTRY(entry_INT80_compat)
/* Go back to user mode. */
TRACE_IRQS_ON
SWAPGS
jmp restore_regs_and_iret
jmp swapgs_restore_regs_and_return_to_usermode
END(entry_INT80_compat)
ENTRY(stub32_clone)
......
......@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
asm volatile(RDRAND_LONG "\n\t"
asm volatile(RDRAND_LONG
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
if (ok)
......@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
asm volatile(RDRAND_INT "\n\t"
asm volatile(RDRAND_INT
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
if (ok)
......@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
static inline bool rdseed_long(unsigned long *v)
{
bool ok;
asm volatile(RDSEED_LONG "\n\t"
asm volatile(RDSEED_LONG
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
return ok;
......@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
static inline bool rdseed_int(unsigned int *v)
{
bool ok;
asm volatile(RDSEED_INT "\n\t"
asm volatile(RDSEED_INT
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
return ok;
......
......@@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
{
bool negative;
asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
asm volatile(LOCK_PREFIX "andb %2,%1"
CC_SET(s)
: CC_OUT(s) (negative), ADDR
: "ir" ((char) ~(1 << nr)) : "memory");
......@@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
{
bool oldbit;
asm("bts %2,%1\n\t"
asm("bts %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
......@@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
{
bool oldbit;
asm volatile("btr %2,%1\n\t"
asm volatile("btr %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
......@@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
{
bool oldbit;
asm volatile("btc %2,%1\n\t"
asm volatile("btc %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr) : "memory");
......@@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
{
bool oldbit;
asm volatile("bt %2,%1\n\t"
asm volatile("bt %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
......
......@@ -7,6 +7,7 @@
*/
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <asm/processor.h>
#include <asm/user32.h>
#include <asm/unistd.h>
......
......@@ -126,11 +126,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
#define setup_clear_cpu_cap(bit) do { \
clear_cpu_cap(&boot_cpu_data, bit); \
set_bit(bit, (unsigned long *)cpu_caps_cleared); \
} while (0)
extern void setup_clear_cpu_cap(unsigned int bit);
extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
#define setup_force_cpu_cap(bit) do { \
set_cpu_cap(&boot_cpu_data, bit); \
set_bit(bit, (unsigned long *)cpu_caps_set); \
......
......@@ -22,6 +22,11 @@
* this feature bit is not displayed in /proc/cpuinfo at all.
*/
/*
* When adding new features here that depend on other features,
* please update the table in kernel/cpu/cpuid-deps.c
*/
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
......@@ -295,6 +300,12 @@
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
......
......@@ -6,7 +6,7 @@
#include <asm/orc_types.h>
struct mod_arch_specific {
#ifdef CONFIG_ORC_UNWINDER
#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
......
......@@ -16,10 +16,9 @@
#include <linux/cpumask.h>
#include <asm/frame.h>
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
static inline void load_sp0(unsigned long sp0)
{
PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
}
/* The paravirtualized CPUID instruction. */
......
......@@ -134,7 +134,7 @@ struct pv_cpu_ops {
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
void (*load_sp0)(unsigned long sp0);
void (*set_iopl_mask)(unsigned mask);
......
......@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
{
bool oldbit;
asm volatile("bt "__percpu_arg(2)",%1\n\t"
asm volatile("bt "__percpu_arg(2)",%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
......
......@@ -431,7 +431,9 @@ typedef struct {
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
#ifdef CONFIG_X86_32
unsigned long sp0;
#endif
unsigned long sp;
#ifdef CONFIG_X86_32
unsigned long sysenter_cs;
......@@ -518,16 +520,9 @@ static inline void native_set_iopl_mask(unsigned mask)
}
static inline void
native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
native_load_sp0(unsigned long sp0)
{
tss->x86_tss.sp0 = thread->sp0;
#ifdef CONFIG_X86_32
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
tss->x86_tss.ss1 = thread->sysenter_cs;
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
}
#endif
this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
}
static inline void native_swapgs(void)
......@@ -547,15 +542,20 @@ static inline unsigned long current_top_of_stack(void)
#endif
}
static inline bool on_thread_stack(void)
{
return (unsigned long)(current_top_of_stack() -
current_stack_pointer) < THREAD_SIZE;
}
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
#define __cpuid native_cpuid
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
static inline void load_sp0(unsigned long sp0)
{
native_load_sp0(tss, thread);
native_load_sp0(sp0);
}
#define set_iopl_mask native_set_iopl_mask
......@@ -804,6 +804,15 @@ static inline void spin_lock_prefetch(const void *x)
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
#define task_pt_regs(task) \
({ \
unsigned long __ptr = (unsigned long)task_stack_page(task); \
__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
((struct pt_regs *)__ptr) - 1; \
})
#ifdef CONFIG_X86_32
/*
* User space process size: 3GB (default).
......@@ -823,23 +832,6 @@ static inline void spin_lock_prefetch(const void *x)
.addr_limit = KERNEL_DS, \
}
/*
* TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
* This is necessary to guarantee that the entire "struct pt_regs"
* is accessible even if the CPU haven't stored the SS/ESP registers
* on the stack (interrupt gate does not save these registers
* when switching to the same priv ring).
* Therefore beware: accessing the ss/esp fields of the
* "struct pt_regs" is possible, but they may contain the
* completely wrong values.
*/
#define task_pt_regs(task) \
({ \
unsigned long __ptr = (unsigned long)task_stack_page(task); \
__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
((struct pt_regs *)__ptr) - 1; \
})
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else
......@@ -873,11 +865,9 @@ static inline void spin_lock_prefetch(const void *x)
#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.addr_limit = KERNEL_DS, \
}
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
extern unsigned long KSTK_ESP(struct task_struct *task);
#endif /* CONFIG_X86_64 */
......
......@@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs)
#endif
}
#ifdef CONFIG_X86_64
static inline bool user_64bit_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_64
#ifndef CONFIG_PARAVIRT
/*
* On non-paravirt systems, this is the only long mode CPL 3
......@@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
/* Headers are too twisted for this to go in paravirt.h. */
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
#endif
#else /* !CONFIG_X86_64 */
return false;
#endif
}
#ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment